Skip to content

Commit 560fdf9

Browse files
committed
Merge 7.x into main
2 parents 599b111 + 18fa7cc commit 560fdf9

File tree

6 files changed

+181
-15
lines changed

6 files changed

+181
-15
lines changed

documentation/release_notes_7.markdown

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,35 @@ canonical: "/puppetdb/latest/release_notes.html"
1515

1616
# PuppetDB: Release notes
1717

18+
## PuppetDB 7.20.1
19+
20+
Released TBD.
21+
22+
### Bug fixes
23+
24+
* Fixed an issue with report garbage collection where a partition would become
25+
partially detached and block future garbage collection progress. Garbage
26+
collection will now finalize the partition detach operation and remove the
27+
table. ([GitHub #4013](https://github.com/puppetlabs/puppetdb/issues/4013))
28+
* Fixed an issue with report garbage collection where a partition would be
29+
detached, but the table was never deleted. Garbage collection will now
30+
identify and clean-up these tables.
31+
([GitHub #4013](https://github.com/puppetlabs/puppetdb/issues/4013))
32+
33+
## PuppetDB 7.20.0
34+
35+
Released October 22 2024
36+
37+
### Improvements
38+
39+
* Released support and packages for Debian 12 (bookworm)
40+
41+
### Bug fixes
42+
43+
* Added a database constraint to prevent duplicate catalogs. If your database
44+
contains any duplicate catalogs, only the most recent catalog for each
45+
certname will be kept.
46+
1847
## PuppetDB 7.19.1
1948

2049
Released July 23 2024

documentation/release_notes_8.markdown

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,21 @@ canonical: "/puppetdb/latest/release_notes.html"
1414

1515
## PuppetDB 8.8.0
1616

17+
Released TBD.
18+
19+
### Bug fixes
20+
21+
* Fixed an issue with report garbage collection where a partition would become
22+
partially detached and block future garbage collection progress. Garbage
23+
collection will now finalize the partition detach operation and remove the
24+
table. ([GitHub #4013](https://github.com/puppetlabs/puppetdb/issues/4013))
25+
* Fixed an issue with report garbage collection where a partition would be
26+
detached, but the table was never deleted. Garbage collection will now
27+
identify and clean-up these tables.
28+
([GitHub #4013](https://github.com/puppetlabs/puppetdb/issues/4013))
29+
30+
## PuppetDB 8.8.0
31+
1732
Released October 22 2024
1833

1934
### Improvements

src/puppetlabs/puppetdb/jdbc.clj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
:invalid-regular-expression "2201B"
4141
:lock-not-available "55P03"
4242
:program-limit-exceeded "54000"
43+
:not-in-prerequisite-state "55000"
4344
:query-canceled "57014"
4445
:serialization-failure "40001"
4546
;; check constraint violation

src/puppetlabs/puppetdb/scf/partitioning.clj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
(ns puppetlabs.puppetdb.scf.partitioning
22
"Handles all work related to database table partitioning"
33
(:require
4+
[clojure.string :refer [lower-case]]
45
[puppetlabs.i18n.core :refer [trs]]
56
[puppetlabs.puppetdb.jdbc :as jdbc]
67
[schema.core :as s])
@@ -33,7 +34,7 @@
3334
(defn date-suffix
3435
[date]
3536
(let [formatter (.withZone DateTimeFormatter/BASIC_ISO_DATE (ZoneId/of "UTC"))]
36-
(.format date formatter)))
37+
(lower-case (.format date formatter))))
3738

3839
(defn to-zoned-date-time
3940
[date]

src/puppetlabs/puppetdb/scf/storage.clj

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,6 +1625,39 @@
16251625
(let [{current-db-version :version} (sutils/db-metadata)]
16261626
(not (neg? (compare current-db-version pg14-db)))))
16271627

1628+
(defn finalize-pending-detach
1629+
"Finalize a previously failed detach operation. A partitioned table can
1630+
only have one partition pending detachment at any time."
1631+
[parent]
1632+
(let [pending (->> ["SELECT inhrelid::regclass AS child
1633+
FROM pg_catalog.pg_inherits
1634+
WHERE inhparent = ?::regclass AND inhdetachpending = true"
1635+
parent]
1636+
jdbc/query-to-vec
1637+
first
1638+
:child)]
1639+
(when pending
1640+
(log/info (trs "Finalizing detach for partition {0}" pending))
1641+
(jdbc/do-commands (format "ALTER TABLE %s DETACH PARTITION %s FINALIZE" parent pending))
1642+
(str pending))))
1643+
1644+
(defn find-stranded-partitions
1645+
"Identify tables that match the child format of a partitioned table (like reports_historical)
1646+
that are not present in the pg_inherits table. These partitions have been detached, but failed
1647+
to be deleted.
1648+
1649+
Tables that are not partitioned will also not be in the pg_inherits table, so you MUST
1650+
write a child-format that does not match any non-partitioned tables.
1651+
1652+
Returns a list of strings. Each string is a stranded partition that should be removed."
1653+
[child-format]
1654+
(->> [(str "SELECT tablename"
1655+
" FROM pg_tables WHERE tablename ~ ?"
1656+
" AND tablename NOT IN (SELECT inhrelid::regclass::text FROM pg_catalog.pg_inherits)")
1657+
child-format]
1658+
jdbc/query-to-vec
1659+
(map (comp str :tablename))))
1660+
16281661
(defn prune-daily-partitions
16291662
"Either detaches or drops obsolete day-oriented partitions
16301663
older than the date. Deletes or detaches only the oldest such candidate if
@@ -1650,14 +1683,27 @@
16501683
candidates (->> (partitioning/get-partition-names parent-table)
16511684
(filter expired?)
16521685
sort)
1653-
drop-one (fn [table]
1686+
detach (fn detach [parent child]
1687+
(jdbc/do-commands-outside-txn
1688+
(format "alter table %s detach partition %s concurrently" parent child)))
1689+
drop-one (fn drop-one [table]
16541690
(update-lock-status status-key inc)
16551691
(try!
16561692
(if just-detach?
1657-
(jdbc/do-commands-outside-txn
1658-
(format "alter table %s detach partition %s concurrently" parent-table table))
1693+
(let [ex (try
1694+
(detach parent-table table)
1695+
(catch SQLException ex
1696+
(if (= (jdbc/sql-state :not-in-prerequisite-state) (.getSQLState ex))
1697+
ex
1698+
(throw ex))))]
1699+
(when (instance? SQLException ex)
1700+
(let [finalized-table (finalize-pending-detach parent-table)]
1701+
(when-not (= finalized-table table)
1702+
;; Retry, unless the finalized partition detach was
1703+
;; for the same table
1704+
(detach parent-table table)))))
16591705
(jdbc/do-commands
1660-
(format "drop table if exists %s cascade" table)))
1706+
(format "drop table if exists %s cascade" table)))
16611707
(finally
16621708
(update-lock-status status-key dec))))
16631709
drop #(if incremental?
@@ -1697,7 +1743,7 @@
16971743
"Drops the given set of tables. Will throw an SQLException termination if the
16981744
operation takes much longer than PDB_GC_DAILY_PARTITION_DROP_LOCK_TIMEOUT_MS."
16991745
[old-partition-tables update-lock-status status-key]
1700-
(let [drop #(doseq [table old-partition-tables]
1746+
(let [drop #(doseq [table (distinct old-partition-tables)]
17011747
(try
17021748
(update-lock-status status-key inc)
17031749
(jdbc/do-commands
@@ -1720,10 +1766,11 @@
17201766
update-lock-status :write-locking-resource-events))
17211767
;; PG14+
17221768
(let [detached-tables
1723-
(detach-daily-partitions "resource_events_historical" date incremental?
1724-
update-lock-status :write-locking-resource-events)]
1769+
(detach-daily-partitions "resource_events_historical" date incremental?
1770+
update-lock-status :write-locking-resource-events)
1771+
stranded-tables (find-stranded-partitions "^resource_events_\\d\\d\\d\\d\\d\\d\\d\\dz$")]
17251772
(jdbc/with-db-transaction []
1726-
(drop-partition-tables! detached-tables
1773+
(drop-partition-tables! (concat detached-tables stranded-tables)
17271774
update-lock-status :write-locking-resource-events)))))
17281775

17291776
(defn delete-reports-older-than-in-pg-11!
@@ -1767,10 +1814,12 @@
17671814
(detach-daily-partitions "resource_events_historical" effective-resource-events-ttl
17681815
incremental? update-lock-status
17691816
:write-locking-resource-events)
1817+
stranded-events-tables (find-stranded-partitions "^resource_events_\\d\\d\\d\\d\\d\\d\\d\\dz$")
17701818
detached-report-tables
17711819
(detach-daily-partitions "reports_historical" report-ttl
17721820
incremental? update-lock-status
1773-
:write-locking-reports)]
1821+
:write-locking-reports)
1822+
stranded-reports-tables (find-stranded-partitions "^reports_\\d\\d\\d\\d\\d\\d\\d\\dz$")]
17741823
;; Now we can delete the partitions with less intrusive locking.
17751824
(jdbc/with-db-transaction []
17761825
;; Nothing should acquire locks on the detached tables, but to be safe, acquire
@@ -1780,9 +1829,9 @@
17801829
;; force a resource-events GC. prior to partitioning, this would have happened
17811830
;; via a cascade when the report was deleted, but now we just drop whole tables
17821831
;; of resource events.
1783-
(drop-partition-tables! detached-resource-event-tables
1832+
(drop-partition-tables! (concat detached-resource-event-tables stranded-events-tables)
17841833
update-lock-status :write-locking-resource-events)
1785-
(drop-partition-tables! detached-report-tables
1834+
(drop-partition-tables! (concat detached-report-tables stranded-reports-tables)
17861835
update-lock-status :write-locking-reports))))))
17871836

17881837
;; A db version that is "allowed" but not supported is deprecated

test/puppetlabs/puppetdb/cli/services_test.clj

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
[puppetlabs.puppetdb.scf.partitioning
99
:refer [create-resource-events-partition
1010
create-reports-partition
11-
get-temporal-partitions]]
11+
get-temporal-partitions]
12+
:as part]
1213
[puppetlabs.trapperkeeper.testutils.logging
1314
:refer [with-log-output
1415
logs-matching
@@ -592,7 +593,7 @@
592593
(with-test-db
593594
(let [config (-> (create-temp-config)
594595
(assoc :database *db* :read-database *read-db*)
595-
(assoc-in [:database :gc-interval] "0.01"))
596+
(assoc-in [:database :gc-interval] "60"))
596597
store-report #(sync-command-post (svc-utils/pdb-cmd-url)
597598
example-certname
598599
"store report"
@@ -620,7 +621,77 @@
620621
:resource-events-ttl (time/parse-period "1d")
621622
:db-lock-status db-lock-status})
622623
(is (= 1 (count (jdbc/query ["SELECT * FROM reports_latest"]))))
623-
(is (empty? (jdbc/query ["SELECT * FROM reports_historical"])))))))))
624+
(is (empty? (jdbc/query ["SELECT * FROM reports_historical"])))
625+
(jdbc/do-commands "DELETE FROM reports"))
626+
627+
;; These tests are not applicable unless our Postgres version is new enough
628+
;; to support the concurrent partition detach feature.
629+
(when (scf-store/detach-partitions-concurrently?)
630+
(testing "a partition stuck in the pending state is finalized and removed"
631+
(let [old-ts (-> 2 time/days time/ago)
632+
partition-table (format "reports_%s"
633+
(part/date-suffix (part/to-zoned-date-time (time/to-timestamp old-ts))))
634+
lock-acquired (promise)
635+
partition-pending-detach (promise)]
636+
(store-report (time/to-string old-ts))
637+
(store-report (to-string (now)))
638+
639+
(future
640+
;; Create a query that will block the ACCESS EXCLUSIVE lock needed
641+
;; by the second transaction of the concurrent detach below
642+
(jdbc/with-transacted-connection *read-db*
643+
(jdbc/with-db-transaction []
644+
(jdbc/query [(format "select * from %s" partition-table)])
645+
(deliver lock-acquired partition-table)
646+
647+
;; wait for partition detach to fail
648+
@partition-pending-detach)))
649+
650+
;; Wait until we are sure that the detach partition operation will be blocked
651+
@lock-acquired
652+
653+
(try
654+
(jdbc/do-commands-outside-txn
655+
"SET statement_timeout = 100"
656+
(format "ALTER TABLE reports_historical DETACH PARTITION %s CONCURRENTLY" partition-table))
657+
(catch java.sql.SQLException _)
658+
(finally
659+
(deliver partition-pending-detach partition-table)
660+
(jdbc/do-commands-outside-txn "SET statement_timeout = 0")))
661+
662+
(is (= [{:inhdetachpending true}]
663+
(jdbc/query ["select inhdetachpending from pg_catalog.pg_inherits where inhparent = 'reports_historical'::regclass and inhrelid = ?::regclass" partition-table])))
664+
665+
(svcs/sweep-reports! *db* {:incremental? false
666+
:report-ttl (time/parse-period "1d")
667+
:resource-events-ttl (time/parse-period "1d")
668+
:db-lock-status db-lock-status})
669+
670+
(is (empty?
671+
(jdbc/query ["SELECT tablename FROM pg_tables WHERE tablename = ?" partition-table])))
672+
673+
(jdbc/do-commands "DELETE FROM reports")))
674+
675+
(testing "a detached partition that was not removed is cleaned up by gc"
676+
(let [old-ts (-> 2 time/days time/ago)
677+
partition-table (format "reports_%s"
678+
(part/date-suffix (part/to-zoned-date-time (time/to-timestamp old-ts))))]
679+
(store-report (time/to-string old-ts))
680+
(store-report (to-string (now)))
681+
682+
;; Strand the partition before calling GC
683+
(jdbc/do-commands-outside-txn
684+
(format "ALTER TABLE reports_historical DETACH PARTITION %s CONCURRENTLY" partition-table))
685+
686+
(svcs/sweep-reports! *db* {:incremental? false
687+
:report-ttl (time/parse-period "1d")
688+
:resource-events-ttl (time/parse-period "1d")
689+
:db-lock-status db-lock-status})
690+
691+
(is (empty?
692+
(jdbc/query ["SELECT tablename FROM pg_tables WHERE tablename = ?" partition-table])))
693+
694+
(jdbc/do-commands "DELETE FROM reports")))))))))
624695

625696
(deftest reports-analysis
626697
;; For now, just test for the initial invocation

0 commit comments

Comments
 (0)