-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_helpers.R
More file actions
145 lines (118 loc) · 4.3 KB
/
_helpers.R
File metadata and controls
145 lines (118 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
### ------------------------------------
### Several helper Functions
### ------------------------------------
# Function to Convert Jupyter Notebooks to R-Files
convert_ipynb_to_r <- function(input, output = xfun::with_ext(input, "R"), keep_rmd = FALSE, ...) {
# Check if necessary packages are installed
if (!require("rmarkdown")) return("Missing necessary package: 'rmarkdown'")
if (!require("knitr")) return("Missing necessary package: 'knitr'")
# Check if file extension is matches Jupyter notebook.
if (tolower(xfun::file_ext(input)) != "ipynb") {
return("Error: Invalid file format")
}
# Conversion process: .ipynb to .Rmd
rmarkdown::convert_ipynb(input)
# Conversion process: .Rmd to .R
knitr::purl(xfun::with_ext(input, "Rmd"), output = output)
# Keep or remove intermediary .Rmd
if (keep_rmd == FALSE) {
file.remove(xfun::with_ext(input, "Rmd"))
}
}
### ------------------------------------
# Function to generate 1:n lags for a specific variable
get_lags <- function(df, var, lags) {
map_lag <- lags %>% purrr::map(~purrr::partial(lag, n = .x))
return(df %>% dplyr::mutate(dplyr::across(.cols = {{var}}, .fns = map_lag, .names = "{.col}_lag{lags}")))
}
### ------------------------------------
# Output Combination Function for foreach (Application: Finance)
comb <- function(...) {
mapply("rbind", ..., SIMPLIFY = FALSE)
}
### ------------------------------------
# Transformation function based on Matlab code from McCracken (Application: Inflation)
transform_tcode <- function(data) {
tcode <- data[1]
data <- data[2:length(data)]
# 1. No transformation
if (tcode == 1) {
output <- data
# 2. First differences
} else if (tcode == 2) {
output <- data - dplyr::lag(data, 1)
# 3. Second differences
} else if (tcode == 3) {
output <- (data - dplyr::lag(data, 1)) - (dplyr::lag(data, 1) - dplyr::lag(data, 2))
# 4. Natural Log
} else if (tcode == 4) {
output <- log(data)
# 5. First differences of natural log
} else if (tcode == 5) {
output <- log(data) - dplyr::lag(log(data), 1)
# 6. Second differences of natural log
} else if (tcode == 6) {
output <- (log(data) - dplyr::lag(log(data), 1)) - (dplyr::lag(log(data), 1) - dplyr::lag(log(data), 2))
# 7. First differences of percent change
} else {
output <- (data / dplyr::lag(data, 1) - 1) - (dplyr::lag(data, 1) / dplyr::lag(data, 2) - 1)
}
return(c(tcode, output))
}
### ------------------------------------
# Transformation Response Variable (Application: Inflation)
transform_ytcode <- function(data) {
# Differences of natural log
output <- log(data) - dplyr::lag(log(data), 1)
return(output)
}
### ------------------------------------
# Adjust for outliers using fraction of IQR (Application: Inflation)
adjout_oos <- function(obs, init, thrs, iwin) {
# Check NA-values
check_na(obs)
# Remove NA
na_ctr <- sum(is.na(obs))
y <- na.omit(obs)
# Define Variables
init_seq <- seq_len(init)
roll_seq <- seq(init + 1, length(y))
# Initialize vectors
mod_vec <- rep(NA, length(y))
iqr_vec <- rep(NA, length(y))
med_vec <- rep(NA, length(y))
# Training Sample
iqr_vec[init_seq] <- IQR(y[init_seq])
med_vec[init_seq] <- median(y[init_seq])
# Calculate Expanding Median and IQR for outlier detection
iqr_vec[roll_seq] <- sapply(roll_seq, function(i) IQR(y[1:i]))
med_vec[roll_seq] <- sapply(roll_seq, function(i) median(y[1:i]))
# Calculate Rolling Median for Imputation
for (i in seq_len(length(y))) {
j1 <- max(1, (i - iwin + 1))
j2 <- i
mod_vec[i] <- median(y[j1:j2])
}
# Absolute Deviation from Median
y_dev <- abs(y - med_vec)
# Above / Below Threshold
iabove <- y_dev > (thrs * iqr_vec)
ibelow <- y_dev <= (thrs * iqr_vec)
# Create Output
output <- (ibelow * y) + (iabove * mod_vec)
# Return
return(c(rep(NA, na_ctr), output))
}
### ------------------------------------
# Check whether a vector contains non-consecutive na-values from the start
check_na <- function(vec) {
# Find position of NA values
na_positions <- which(is.na(vec))
# Proceed if NA values in vector
if (length(na_positions) > 0) {
# Check if NA values are not consecutive from the start
if (any(diff(na_positions) != 1) || min(na_positions) != 1) {
warning("Vector contains non-consecutive NA-values.")
}
}
}