|
1673 | 1673 | (let [{current-db-version :version} (sutils/db-metadata)] |
1674 | 1674 | (not (neg? (compare current-db-version pg14-db))))) |
1675 | 1675 |
|
| 1676 | +(defn finalize-pending-detach |
| 1677 | + "Finalize a previously failed detach operation. A partitioned table can |
| 1678 | + only have one partition pending detachment at any time." |
| 1679 | + [parent] |
| 1680 | + (let [pending (->> ["SELECT inhrelid::regclass AS child |
| 1681 | + FROM pg_catalog.pg_inherits |
| 1682 | + WHERE inhparent = ?::regclass AND inhdetachpending = true" |
| 1683 | + parent] |
| 1684 | + jdbc/query-to-vec |
| 1685 | + first |
| 1686 | + :child)] |
| 1687 | + (when pending |
| 1688 | + (log/info (trs "Finalizing detach for partition {0}" pending)) |
| 1689 | + (jdbc/do-commands (format "ALTER TABLE %s DETACH PARTITION %s FINALIZE" parent pending)) |
| 1690 | + (str pending)))) |
| 1691 | + |
| 1692 | +(defn find-stranded-partitions |
| 1693 | + "Identify tables that match the child format of a partitioned table (like reports_historical) |
| 1694 | + that are not present in the pg_inherits table. These partitions have been detached, but failed |
| 1695 | + to be deleted. |
| 1696 | +
|
| 1697 | + Tables that are not partitioned will also not be in the pg_inherits table, so you MUST |
| 1698 | + write a child-format that does not match any non-partitioned tables. |
| 1699 | +
|
| 1700 | + Returns a list of strings. Each string is a stranded partition that should be removed." |
| 1701 | + [child-format] |
| 1702 | + (->> [(str "SELECT tablename" |
| 1703 | + " FROM pg_tables WHERE tablename ~ ?" |
| 1704 | + " AND tablename NOT IN (SELECT inhrelid::regclass::text FROM pg_catalog.pg_inherits)") |
| 1705 | + child-format] |
| 1706 | + jdbc/query-to-vec |
| 1707 | + (map (comp str :tablename)))) |
| 1708 | + |
1676 | 1709 | (defn prune-daily-partitions |
1677 | 1710 | "Either detaches or drops obsolete day-oriented partitions |
1678 | 1711 | older than the date. Deletes or detaches only the oldest such candidate if |
|
1698 | 1731 | candidates (->> (partitioning/get-partition-names table-prefix) |
1699 | 1732 | (filter expired?) |
1700 | 1733 | sort) |
1701 | | - drop-one (fn [table] |
| 1734 | + detach (fn detach [parent child] |
| 1735 | + (jdbc/do-commands-outside-txn |
| 1736 | + (format "alter table %s detach partition %s concurrently" parent child))) |
| 1737 | + drop-one (fn drop-one [table] |
1702 | 1738 | (update-lock-status status-key inc) |
1703 | 1739 | (try! |
1704 | 1740 | (if just-detach? |
1705 | | - (jdbc/do-commands-outside-txn |
1706 | | - (format "alter table %s detach partition %s concurrently" table-prefix table)) |
| 1741 | + (let [ex (try |
| 1742 | + (detach table-prefix table) |
| 1743 | + (catch SQLException ex |
| 1744 | + (if (= (jdbc/sql-state :not-in-prerequisite-state) (.getSQLState ex)) |
| 1745 | + ex |
| 1746 | + (throw ex))))] |
| 1747 | + (when (instance? SQLException ex) |
| 1748 | + (let [finalized-table (finalize-pending-detach table-prefix)] |
| 1749 | + (when-not (= finalized-table table) |
| 1750 | + ;; Retry, unless the finalized partition detach was |
| 1751 | + ;; for the same table |
| 1752 | + (detach table-prefix table))))) |
1707 | 1753 | (jdbc/do-commands |
1708 | | - (format "drop table if exists %s cascade" table))) |
| 1754 | + (format "drop table if exists %s cascade" table))) |
1709 | 1755 | (finally |
1710 | 1756 | (update-lock-status status-key dec)))) |
1711 | 1757 | drop #(if incremental? |
|
1745 | 1791 | "Drops the given set of tables. Will throw an SQLException termination if the |
1746 | 1792 | operation takes much longer than PDB_GC_DAILY_PARTITION_DROP_LOCK_TIMEOUT_MS." |
1747 | 1793 | [old-partition-tables update-lock-status status-key] |
1748 | | - (let [drop #(doseq [table old-partition-tables] |
| 1794 | + (let [drop #(doseq [table (distinct old-partition-tables)] |
1749 | 1795 | (try |
1750 | 1796 | (update-lock-status status-key inc) |
1751 | 1797 | (jdbc/do-commands |
|
1769 | 1815 | ;; PG14+ |
1770 | 1816 | (let [detached-tables |
1771 | 1817 | (detach-daily-partitions "resource_events" date incremental? |
1772 | | - update-lock-status :write-locking-resource-events)] |
| 1818 | + update-lock-status :write-locking-resource-events) |
| 1819 | + stranded-tables (find-stranded-partitions "^resource_events_\\d\\d\\d\\d\\d\\d\\d\\dz$")] |
1773 | 1820 | (jdbc/with-db-transaction [] |
1774 | | - (drop-partition-tables! detached-tables |
| 1821 | + (drop-partition-tables! (concat detached-tables stranded-tables) |
1775 | 1822 | update-lock-status :write-locking-resource-events))))) |
1776 | 1823 |
|
1777 | 1824 | (defn cleanup-dropped-report-certnames |
|
1824 | 1871 | ;; PG14+ |
1825 | 1872 | ;; Detach partition concurrently must take place outside of a transaction. |
1826 | 1873 | (let [detached-resource-event-tables |
1827 | | - (detach-daily-partitions "resource_events" effective-resource-events-ttl |
1828 | | - incremental? update-lock-status |
1829 | | - :write-locking-resource-events) |
| 1874 | + (detach-daily-partitions "resource_events" effective-resource-events-ttl |
| 1875 | + incremental? update-lock-status |
| 1876 | + :write-locking-resource-events) |
| 1877 | + stranded-events-tables (find-stranded-partitions "^resource_events_\\d\\d\\d\\d\\d\\d\\d\\dz$") |
1830 | 1878 | detached-report-tables |
1831 | | - (detach-daily-partitions "reports" report-ttl |
1832 | | - incremental? update-lock-status |
1833 | | - :write-locking-reports)] |
| 1879 | + (detach-daily-partitions "reports" report-ttl |
| 1880 | + incremental? update-lock-status |
| 1881 | + :write-locking-reports) |
| 1882 | + stranded-reports-tables (find-stranded-partitions "^reports_\\d\\d\\d\\d\\d\\d\\d\\dz$")] |
1834 | 1883 | ;; Now we can delete the partitions with less intrusive locking. |
1835 | 1884 | (jdbc/with-db-transaction [] |
1836 | 1885 | ;; Nothing should acquire locks on the detached tables, but to be safe, acquire |
|
1840 | 1889 | ;; force a resource-events GC. prior to partitioning, this would have happened |
1841 | 1890 | ;; via a cascade when the report was deleted, but now we just drop whole tables |
1842 | 1891 | ;; of resource events. |
1843 | | - (drop-partition-tables! detached-resource-event-tables |
| 1892 | + (drop-partition-tables! (concat detached-resource-event-tables stranded-events-tables) |
1844 | 1893 | update-lock-status :write-locking-resource-events) |
1845 | | - (drop-partition-tables! detached-report-tables |
| 1894 | + (drop-partition-tables! (concat detached-report-tables stranded-reports-tables) |
1846 | 1895 | update-lock-status :write-locking-reports) |
1847 | 1896 | ;; since we cannot cascade back to the certnames table anymore, go clean up |
1848 | 1897 | ;; the latest_report_id column after a GC. |
|
0 commit comments