|
| 1 | +(ns puppetlabs.puppetdb.cli.pdb-dataset |
| 2 | + "Pg_restore and timeshift entries utility |
| 3 | + This command-line tool restores an empty database from a backup file (pg_dump generated file), then updates all the |
| 4 | + timestamps inside the database. |
| 5 | + It does this by calculating the period between the newest timestamp inside the file and the provided date. |
| 6 | + Then, every timestamp is shifted with that period. |
| 7 | + It accepts two parameters: |
| 8 | + - [Mandatory] -d / --dumpfile |
| 9 | + Path to the dumpfile that will be used to restore the database. |
| 10 | + - [Optional]-t / --shift-to-time |
| 11 | + Timestamp to which all timestamps from the dumpfile will be shifted after the restore. |
| 12 | + If it's not provided, the system's current timestamp will be used. |
| 13 | + !!! All timestamps are converted to a Zero timezone format. e.g timestamps like: 2015-03-26T10:58:51+10:00 |
| 14 | + will become 2015-03-26T11:58:51Z !!! |
| 15 | + !!! If the time difference between the latest entry in the dumpfile and the time provided to timeshift-to is less |
| 16 | + than 24 hours this tool will fail !!!" |
| 17 | + |
| 18 | + (:require |
| 19 | + [clojure.java.shell :as shell] |
| 20 | + [puppetlabs.puppetdb.cli.util :refer [exit run-cli-cmd]] |
| 21 | + [puppetlabs.kitchensink.core :as kitchensink] |
| 22 | + [puppetlabs.puppetdb.utils :as utils :refer [println-err]] |
| 23 | + [puppetlabs.puppetdb.jdbc :as jdbc] |
| 24 | + [puppetlabs.puppetdb.scf.partitioning :as partitioning] |
| 25 | + [puppetlabs.puppetdb.time :refer [now to-timestamp]]) |
| 26 | + (:import (java.lang Math))) |
| 27 | + |
| 28 | +;; Argument parsing |
| 29 | + |
| 30 | +(defn parse-timeshift-to |
| 31 | + [time-string] |
| 32 | + |
| 33 | + (let [parsed-time (to-timestamp time-string)] |
| 34 | + (if-not parsed-time |
| 35 | + (utils/throw-sink-cli-error "Error: time shift date must be in UTC format!")) |
| 36 | + parsed-time)) |
| 37 | + |
| 38 | +(defn validate-options |
| 39 | + [options] |
| 40 | + (let [parsed-time (if (:timeshift-to options) |
| 41 | + (parse-timeshift-to (:timeshift-to options)) |
| 42 | + (now))] |
| 43 | + {:timeshift-to parsed-time |
| 44 | + :dumpfile (:dumpfile options)})) |
| 45 | + |
| 46 | +(defn validate-cli! |
| 47 | + [args] |
| 48 | + (let [specs [["-t" "--timeshift-to DATE" "Date in UTC format"] |
| 49 | + ["-d" "--dumpfile DUMPFILE" "Dumpfile"]] |
| 50 | + required [:dumpfile]] |
| 51 | + (utils/try-process-cli |
| 52 | + (fn [] |
| 53 | + (-> args |
| 54 | + (kitchensink/cli! specs required) |
| 55 | + first |
| 56 | + validate-options))))) |
| 57 | + |
| 58 | +(defn collect-pdbbox-config |
| 59 | + [args] |
| 60 | + (let [pdbbox-path (System/getenv "PDBBOX") |
| 61 | + ini-file (str pdbbox-path "/conf.d/pdb.ini")] |
| 62 | + (if (or (nil? pdbbox-path) |
| 63 | + (empty? pdbbox-path)) |
| 64 | + (utils/throw-sink-cli-error "Error: PDBBOX env variable not set!")) |
| 65 | + (assoc args :config (:database (kitchensink/ini-to-map ini-file))))) |
| 66 | + |
| 67 | +;; Time manipulation |
| 68 | + |
| 69 | +(def miliseconds-in-day 86400000) |
| 70 | +(def minutes-in-day 1440) |
| 71 | +(def miliseconds-in-minute 60000) |
| 72 | + |
| 73 | +(defn make-minutes-time-diff |
| 74 | + [max-time substract-time] |
| 75 | + (let [max-time-mili (.getTime max-time) |
| 76 | + substract-time-mili (.getTime substract-time)] |
| 77 | + (quot (- max-time-mili substract-time-mili) miliseconds-in-minute))) |
| 78 | + |
| 79 | +(defn to-days |
| 80 | + [timestamp] |
| 81 | + (Math/round (float (/ (.getTime timestamp) miliseconds-in-day)))) |
| 82 | + |
| 83 | +(defn from-days-to-timestamp |
| 84 | + [instant] |
| 85 | + (to-timestamp (* instant miliseconds-in-day))) |
| 86 | + |
| 87 | +(defn days-from-inst-vec |
| 88 | + [timestamp-vec column-name] |
| 89 | + (distinct (mapv #(to-days (column-name %)) timestamp-vec))) |
| 90 | + |
| 91 | +;; Table updates |
| 92 | + |
| 93 | +(defn create-copy-table |
| 94 | + [table] |
| 95 | + (jdbc/do-commands (str "CREATE TABLE " table "_copy (LIKE " table " INCLUDING ALL)"))) |
| 96 | + |
| 97 | +(defn copy-table |
| 98 | + [table] |
| 99 | + (jdbc/do-commands (str "INSERT INTO " table "_copy |
| 100 | + SELECT * FROM " table))) |
| 101 | + |
| 102 | +(defn create-partitions |
| 103 | + "Creates new partitions for reports and resource-events tables. |
| 104 | + In order to calculate the new date of the partitions and not request |
| 105 | + creation of a new partitions for every entry, we obtain an array of |
| 106 | + unique dates shifted with the period indicated by cli user." |
| 107 | + [time-diff-reports time-diff-resource-events] |
| 108 | + (let [prod-timestamp-vec (jdbc/query-to-vec "SELECT producer_timestamp FROM reports") |
| 109 | + timestamp-re (jdbc/query-to-vec "SELECT timestamp FROM resource_events") |
| 110 | + reports-partitions (days-from-inst-vec prod-timestamp-vec :producer_timestamp) |
| 111 | + resource-events-partitions (days-from-inst-vec timestamp-re :timestamp) |
| 112 | + time-diff-reports (Math/round (float (/ time-diff-reports minutes-in-day))) |
| 113 | + time-diff-resource-events (Math/round (float (/ time-diff-resource-events minutes-in-day))) |
| 114 | + reports-new-partitions (mapv #(+ time-diff-reports %) reports-partitions) |
| 115 | + resource-events-new-partitions (mapv #(+ time-diff-resource-events %) resource-events-partitions)] |
| 116 | + (doseq [day-reports reports-new-partitions |
| 117 | + day-re resource-events-new-partitions] |
| 118 | + (partitioning/create-reports-partition (from-days-to-timestamp day-reports)) |
| 119 | + (partitioning/create-resource-events-partition (from-days-to-timestamp day-re))))) |
| 120 | + |
| 121 | +(defn database-empty? |
| 122 | + [] |
| 123 | + (let [schema_info (jdbc/query "SELECT 1 FROM information_schema.tables WHERE table_name = 'schema_migrations'")] |
| 124 | + (empty? schema_info))) |
| 125 | + |
| 126 | +(defn restore-database |
| 127 | + [args] |
| 128 | + (let [dumpfile_path (:dumpfile args)] |
| 129 | + (println-err "Restoring database from backup") |
| 130 | + (shell/sh "pg_restore" "--role=postgres" "-U" "puppetdb" "--no-owner" "--no-acl" "-d" "puppetdb" dumpfile_path) |
| 131 | + (if (database-empty?) |
| 132 | + (utils/throw-sink-cli-error "Error: Restore failed!")) |
| 133 | + args)) |
| 134 | + |
| 135 | +(defn ensure-database-empty |
| 136 | + [_] |
| 137 | + (if (not (database-empty?)) |
| 138 | + (utils/throw-sink-cli-error "Error: puppetdb database already exists and it isn't empty!"))) |
| 139 | + |
| 140 | +(defn update-simple-tables |
| 141 | + [table time-diff] |
| 142 | + (jdbc/do-commands (str "UPDATE " table " SET producer_timestamp = producer_timestamp + (" time-diff " * INTERVAL |
| 143 | + '1 minute'), timestamp = timestamp + (" time-diff " * INTERVAL '1 minute')"))) |
| 144 | + |
| 145 | +(defn add-reports-trigger |
| 146 | + [] |
| 147 | + (jdbc/do-prepared "create function reports_insert1_trigger() returns trigger |
| 148 | + language plpgsql |
| 149 | + as |
| 150 | + $$ |
| 151 | + DECLARE |
| 152 | + tablename varchar; |
| 153 | + BEGIN |
| 154 | + SELECT FORMAT('reports_%sZ', |
| 155 | + TO_CHAR(NEW.\"producer_timestamp\" AT TIME ZONE 'UTC', 'YYYYMMDD')) INTO tablename; |
| 156 | + EXECUTE 'INSERT INTO ' || tablename || ' SELECT ($1).*' |
| 157 | + USING NEW; |
| 158 | + RETURN NULL; |
| 159 | + END; |
| 160 | + $$; |
| 161 | + alter function reports_insert1_trigger() owner to puppetdb; |
| 162 | + CREATE TRIGGER reports_insert1_trigger |
| 163 | + BEFORE INSERT ON reports |
| 164 | + FOR EACH ROW EXECUTE PROCEDURE reports_insert1_trigger()")) |
| 165 | + |
| 166 | +(defn update-reports |
| 167 | + [time-diff] |
| 168 | + (create-copy-table "reports") |
| 169 | + (copy-table "reports") |
| 170 | + (jdbc/do-commands (str "UPDATE reports_copy |
| 171 | + SET producer_timestamp = producer_timestamp + (" time-diff " * INTERVAL '1 minute'), |
| 172 | + start_time = start_time + (" time-diff " * INTERVAL '1 minute'), |
| 173 | + end_time = end_time + (" time-diff " * INTERVAL '1 minute'), |
| 174 | + receive_time = receive_time + (" time-diff " * INTERVAL '1 minute')")) |
| 175 | + (jdbc/do-commands "DELETE FROM reports") |
| 176 | + (add-reports-trigger) |
| 177 | + (jdbc/do-commands "INSERT INTO reports SELECT * FROM reports_copy") |
| 178 | + (jdbc/do-commands "DROP FUNCTION reports_insert1_trigger() CASCADE") |
| 179 | + (jdbc/do-commands "DROP TABLE IF EXISTS reports_copy")) |
| 180 | + |
| 181 | +(defn update-resource-events |
| 182 | + [time-diff] |
| 183 | + (create-copy-table "resource_events") |
| 184 | + (copy-table "resource_events") |
| 185 | + (jdbc/do-commands (str "UPDATE resource_events_copy SET timestamp = timestamp + (" time-diff " * INTERVAL '1 minute')")) |
| 186 | + (jdbc/do-commands "DELETE FROM resource_events") |
| 187 | + (jdbc/do-commands "INSERT INTO resource_events SELECT * FROM resource_events_copy") |
| 188 | + (jdbc/do-commands "DROP TABLE IF EXISTS resource_events_copy")) |
| 189 | + |
| 190 | +(defn update-tables |
| 191 | + [args] |
| 192 | + (let [time-to-shift-to (to-timestamp (:timeshift-to args)) |
| 193 | + max-time (:max (first (jdbc/query "SELECT max(producer_timestamp) FROM reports"))) |
| 194 | + max-time-re (:max (first (jdbc/query "SELECT max(timestamp) FROM resource_events"))) |
| 195 | + time-diff (make-minutes-time-diff time-to-shift-to max-time) |
| 196 | + time-diff-re (make-minutes-time-diff time-to-shift-to max-time-re)] |
| 197 | + (println-err "Updating data timestamps") |
| 198 | + (update-simple-tables "catalogs" time-diff) |
| 199 | + (update-simple-tables "factsets" time-diff) |
| 200 | + (create-partitions time-diff time-diff-re) |
| 201 | + (update-reports time-diff) |
| 202 | + (update-resource-events time-diff-re))) |
| 203 | + |
| 204 | +(defn vacuum-db |
| 205 | + [_] |
| 206 | + (println-err "Running vacuum full on puppetdb database") |
| 207 | + (shell/sh "vacuumdb" "-f" "puppetdb" "-U" "postgres")) |
| 208 | + |
| 209 | +(defn connect-to-db |
| 210 | + [args methods-array] |
| 211 | + (let [config (assoc (:config args) |
| 212 | + :user "puppetdb" |
| 213 | + :subprotocol "postgresql" |
| 214 | + :pool-name "PDBDataSetPool" |
| 215 | + :connection-timeout 3000 |
| 216 | + :rewrite-batched-inserts "true")] |
| 217 | + (binding [jdbc/*db* {:datasource (jdbc/make-connection-pool config)}] |
| 218 | + (mapv #(% args) methods-array)))) |
| 219 | + |
| 220 | +(defn -main |
| 221 | + [& args] |
| 222 | + |
| 223 | + (exit (run-cli-cmd #(do |
| 224 | + (-> args |
| 225 | + validate-cli! |
| 226 | + collect-pdbbox-config |
| 227 | + (connect-to-db [ensure-database-empty |
| 228 | + restore-database |
| 229 | + update-tables |
| 230 | + vacuum-db])) |
| 231 | + 0)))) |
0 commit comments