-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathR_CodeTemplate.R
More file actions
296 lines (265 loc) · 12.4 KB
/
R_CodeTemplate.R
File metadata and controls
296 lines (265 loc) · 12.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
###############################################################################
# Script Title: [Your Script Title]
# Purpose: [A concise description of what this script does, e.g., Analyze dataset X to generate report Y]
# Author: [Your Name/Team]
# Date Created: [YYYY-MM-DD]
# Date Last Modified: [YYYY-MM-DD] # <-- Keep this updated!
# Version: [e.g., 1.0.0] # <-- Optional: Add versioning
#
# Usage: Rscript your_script_name.R [options]
# Example: Rscript your_script_name.R --input data/raw/input.csv --output results/processed_data.tsv --param_value 50 --seed 42
#
# Input Data:
# - --input: Path to the primary input file (e.g., CSV, TSV). [Specify format expected]
# - [Optional: Describe other inputs if necessary]
#
# Output Data:
# - --output: Path to the primary output file (e.g., TSV, RDS). [Specify format created]
# - [Optional: Describe other outputs if necessary]
#
# Dependencies:
# - R version: [e.g., 4.3.0 or higher]
# - Packages: [List required packages, e.g., optparse, dplyr, readr, ggplot2]
# - Environment: [Optional: Mention if specific environment variables or external tools are needed]
###############################################################################
#------------------------------------------------------------------------------
# Environment Setup: Options, Variables
#------------------------------------------------------------------------------
# Prevent scientific notation in output files
options(scipen = 999)
# Set timezone (optional, but good for consistency if handling dates/times)
# Sys.setenv(TZ='UTC')
#------------------------------------------------------------------------------
# Library Management: Install and load required packages
#------------------------------------------------------------------------------
message("Loading required packages...")
# List of required packages - Adjust this list for your specific script needs
required_packages <- c(
"optparse", # For handling command-line arguments
"readr" # Often faster and more robust for reading rectangular data (CSV/TSV)
# "dplyr", # Common for data manipulation
# "ggplot2", # Common for plotting
# "tidyr" # Common for data tidying
)
# Install missing packages if necessary
for (pkg in required_packages) {
if (!requireNamespace(pkg, quietly = TRUE)) {
message("Installing package: ", pkg)
install.packages(pkg, repos = "https://cloud.r-project.org")
}
# Load required packages
# Use require() or library() - require() is often used in scripts for its boolean return
if (require(pkg, character.only = TRUE, quietly = TRUE)) {
message("- ", pkg, " loaded.")
} else {
stop("Failed to load package: ", pkg, call. = FALSE)
}
}
message("Package loading complete.")
#------------------------------------------------------------------------------
# Parameter Initialization: Set up command-line options using optparse
#------------------------------------------------------------------------------
option_list <- list(
make_option(c("-i", "--input"),
type = "character", default = NULL,
help = "REQUIRED: Path to the primary input data file.", metavar = "FILE"
),
make_option(c("-o", "--output"),
type = "character", default = "output.tsv", # Sensible default, maybe TSV?
help = "Path to the primary output file [default= %default].", metavar = "FILE"
),
# Add more specific parameter names instead of generic ones
make_option(c("-p", "--param_value"), # Renamed for clarity
type = "numeric", default = 10,
help = "Example numeric parameter affecting analysis [default= %default].", metavar = "NUMBER"
),
make_option(c("-s", "--seed"),
type = "integer", default = NULL, # Default to NULL, set later if needed
help = "Random seed for reproducibility (optional).", metavar = "INTEGER"
),
make_option(c("-v", "--verbose"),
action = "store_true", default = TRUE,
help = "Print detailed execution messages [default= %default]."
),
make_option(c("-q", "--quiet"),
action = "store_false", dest = "verbose",
help = "Suppress detailed execution messages."
)
# Add other options specific to your script here
)
# Parse command-line arguments
opt_parser <- OptionParser(
option_list = option_list,
usage = "Usage: Rscript %prog --input <INPUT_FILE> --output <OUTPUT_FILE> [options]",
description = "Description: [A concise description of what this script does, matching the header purpose.]" # Add description
)
opt <- parse_args(opt_parser)
#------------------------------------------------------------------------------
# Parameter Validation & Processing
#------------------------------------------------------------------------------
message("Validating parameters...")
# Check required arguments
if (is.null(opt$input)) {
message("ERROR: Input file path ('--input') must be provided.")
print_help(opt_parser)
stop("Missing required argument.", call. = FALSE)
}
# Check if input file exists
if (!file.exists(opt$input)) {
stop("Input file not found: ", opt$input, call. = FALSE)
}
# Check numeric parameter validity (example)
if (!is.numeric(opt$param_value) || opt$param_value <= 0) {
stop("--param_value must be a positive number.", call. = FALSE)
}
# Assign options to more readable variables
input_file <- opt$input
output_file <- opt$output
param_value <- opt$param_value
seed_value <- opt$seed
verbose_output <- opt$verbose
# --- Handle Random Seed ---
# Set seed if provided, otherwise, potentially generate one and report it
if (!is.null(seed_value)) {
if (!is.integer(seed_value)) {
stop("--seed must be an integer.", call. = FALSE)
}
set.seed(seed_value)
message("Random seed set to: ", seed_value)
} else {
# If the script involves randomness, it's good practice to always set a seed
# You might want to generate one if not provided, e.g., based on time
# Or simply warn the user if randomness is critical
message("NOTE: No random seed provided via --seed.")
# set.seed(as.integer(Sys.time())) # Example: set seed based on time if none provided
}
#------------------------------------------------------------------------------
# Verbose Messaging & Timestamp Functionality
#------------------------------------------------------------------------------
# Define a helper function for verbose messaging
vmsg <- function(...) {
if (verbose_output) {
# Optional: Add timestamp to verbose messages
timestamp <- format(Sys.time(), "[%Y-%m-%d %H:%M:%S] ")
message(timestamp, ...)
}
}
# Function for mandatory messages (can also include timestamp)
msg <- function(...) {
timestamp <- format(Sys.time(), "[%Y-%m-%d %H:%M:%S] ")
message(timestamp, ...)
}
#------------------------------------------------------------------------------
# Output Directory Preparation (Optional)
#------------------------------------------------------------------------------
output_dir <- dirname(output_file)
if (!dir.exists(output_dir)) {
vmsg("Output directory does not exist: ", output_dir)
vmsg("Creating output directory...")
tryCatch({
dir.create(output_dir, recursive = TRUE)
vmsg("Output directory created successfully.")
}, error = function(e) {
stop("Failed to create output directory '", output_dir, "': ", e$message, call. = FALSE)
})
}
#------------------------------------------------------------------------------
# Print Parameters (if verbose)
#------------------------------------------------------------------------------
vmsg("---------------- Script Parameters ----------------")
vmsg("Input file : ", input_file)
vmsg("Output file : ", output_file)
vmsg("Parameter Value : ", param_value)
vmsg("Seed : ", ifelse(is.null(seed_value), "Not Set", seed_value))
vmsg("Verbose Output : ", verbose_output)
vmsg("---------------------------------------------------")
###############################################################################
# MAIN EXECUTION LOGIC
###############################################################################
#------------------------------------------------------------------------------
# Define Functions (Optional but Recommended for Complex Logic)
#------------------------------------------------------------------------------
# Example function structure
# process_data <- function(df, parameter) {
# vmsg("Entering process_data function...")
# # ... processing steps ...
# modified_df <- df * parameter # Example operation
# vmsg("Exiting process_data function.")
# return(modified_df)
# }
#------------------------------------------------------------------------------
# Data Loading
#------------------------------------------------------------------------------
msg("Loading input data from: ", input_file)
tryCatch(
{
# Use readr for potentially faster/better CSV/TSV reading
# Adjust reader function based on expected input format (read_csv, read_tsv, readRDS etc.)
data <- readr::read_csv(input_file, show_col_types = FALSE) # Use read_tsv for TSV
msg("Data loaded successfully.")
vmsg("Number of rows: ", nrow(data), ", Number of columns: ", ncol(data))
vmsg("Column names: ", paste(colnames(data), collapse=", "))
},
error = function(e) {
stop("Fatal Error: Could not load input data from '", input_file, "'. Details: ", e$message, call. = FALSE)
}
)
#------------------------------------------------------------------------------
# Data Processing / Analysis
#------------------------------------------------------------------------------
# Start of the main analysis steps
msg("Starting core analysis...")
# Example: Perform some operation using the parameter
# Consider using functions here for complex steps (e.g., results <- process_data(data, param_value))
vmsg("Performing operation with parameter: ", param_value)
# Ensure column types are appropriate for operations
# Example assumes numeric data - add checks/conversions if needed
results <- data * param_value # Placeholder operation
# Example: Further analysis or processing
vmsg("Performing further steps...")
# Combine original data with results (ensure this makes sense for your analysis)
# Might need checks for matching dimensions or use joins if appropriate
final_results <- cbind(data, results) # Placeholder combination
colnames(final_results) <- c(colnames(data), paste0(colnames(results), "_modified")) # Example renaming
msg("Core analysis finished.")
#------------------------------------------------------------------------------
# Output Generation
#------------------------------------------------------------------------------
msg("Saving results to: ", output_file)
# Save the results
tryCatch(
{
# Adjust writer function based on desired output format (write_csv, write_tsv, saveRDS etc.)
readr::write_tsv(final_results, output_file, na = "") # Example: writing TSV
msg("Results saved successfully.")
},
error = function(e) {
# Use 'msg' or 'stop' depending on severity. Stop is usually appropriate here.
stop("Fatal Error: Could not write output to '", output_file, "'. Details: ", e$message, call. = FALSE)
}
)
# Optional: Generate plots or other output files here
#------------------------------------------------------------------------------
# Results Summary (Optional, if verbose)
#------------------------------------------------------------------------------
if (verbose_output) {
vmsg("----------------- Results Summary -----------------")
# Use print(), summary(), str(), or custom summaries as needed
vmsg("First few rows of the output:")
print(head(final_results))
vmsg("Output dimensions: ", paste(dim(final_results), collapse=" x "))
vmsg("---------------------------------------------------")
}
###############################################################################
# Finalization
###############################################################################
#------------------------------------------------------------------------------
# Session Information
#------------------------------------------------------------------------------
# Always good practice to record the environment
msg("---------------- Session Information ----------------")
sessionInfo()
msg("---------------------------------------------------")
# End of script message
msg("Script execution completed successfully.")
# q(status=0) # Optional: Explicitly exit with success status