From 37b64553547fdbee20786b68eb28ba01a2498ffd Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Wed, 12 Mar 2025 20:52:13 +0800 Subject: [PATCH 01/12] feat(pgsql): add pgsql util playbooks - check_pg_ready - is_patroni_paused - start postgres - start and stop postgres gracefully - patroni pause|resume gracefully --- roles/pgsql/tasks/util/check_pg_ready.yml | 28 ++++++++++ .../pgsql/tasks/util/grace_patroni_pause.yml | 26 +++++++++ .../pgsql/tasks/util/grace_patroni_resume.yml | 26 +++++++++ roles/pgsql/tasks/util/grace_start_pg.yml | 56 +++++++++++++++++++ roles/pgsql/tasks/util/grace_stop_pg.yml | 26 +++++++++ roles/pgsql/tasks/util/is_patroni_paused.yml | 21 +++++++ roles/pgsql/tasks/util/pg_backup.yml | 49 ++++++++++++++++ roles/pgsql/tasks/util/start_pg.yml | 26 +++++++++ 8 files changed, 258 insertions(+) create mode 100644 roles/pgsql/tasks/util/check_pg_ready.yml create mode 100644 roles/pgsql/tasks/util/grace_patroni_pause.yml create mode 100644 roles/pgsql/tasks/util/grace_patroni_resume.yml create mode 100644 roles/pgsql/tasks/util/grace_start_pg.yml create mode 100644 roles/pgsql/tasks/util/grace_stop_pg.yml create mode 100644 roles/pgsql/tasks/util/is_patroni_paused.yml create mode 100644 roles/pgsql/tasks/util/pg_backup.yml create mode 100644 roles/pgsql/tasks/util/start_pg.yml diff --git a/roles/pgsql/tasks/util/check_pg_ready.yml b/roles/pgsql/tasks/util/check_pg_ready.yml new file mode 100644 index 000000000..3365d6702 --- /dev/null +++ b/roles/pgsql/tasks/util/check_pg_ready.yml @@ -0,0 +1,28 @@ +#!/usr/bin/ansible-playbook +--- + +- name: check postgres ready + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: wait for postgres + # when: pg_role == 'primary' + wait_for: host={{ inventory_hostname }} port={{ pg_port }} state=started timeout=60 + ignore_errors: true + + - name: check postgres ready + become_user: "{{ dbsu }}" + shell: | + {{ pg_bin_dir }}/pg_isready -t 5 -p {{ pg_port }} + register: result + retries: 6 + until: result.rc == 0 + delay: 5 + + - name: Set fact pg_ready_result + set_fact: + pg_ready_result: "{{ result }}" + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/grace_patroni_pause.yml b/roles/pgsql/tasks/util/grace_patroni_pause.yml new file mode 100644 index 000000000..ac5e9bd94 --- /dev/null +++ b/roles/pgsql/tasks/util/grace_patroni_pause.yml @@ -0,0 +1,26 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# patroni pause pg_cls gracefully +#--------------------------------------------------------------# + +- name: patroni pause gracefully + tags: grace_patroni_pause + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + any_errors_fatal: true + block: + - name: check is paused {{ pg_cluster }} + import_tasks: is_patroni_paused.yml + + - name: patroni pause {{ pg_cluster }} + when: is_patroni_paused == '' + command: /usr/bin/patronictl -c /pg/bin/patroni.yml pause + register: patroni_pause_result + until: patroni_pause_result.rc == 0 and patroni_pause_result.stdout.find('Success') != -1 + retries: 5 + delay: 3 + run_once: true + +... diff --git a/roles/pgsql/tasks/util/grace_patroni_resume.yml b/roles/pgsql/tasks/util/grace_patroni_resume.yml new file mode 100644 index 000000000..ded8b9287 --- /dev/null +++ b/roles/pgsql/tasks/util/grace_patroni_resume.yml @@ -0,0 +1,26 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# patroni resume pg_cls gracefully +#--------------------------------------------------------------# + +- name: patroni resume pg_cls gracefully + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + any_errors_fatal: true + block: + - name: check is paused {{ pg_cluster }} + import_tasks: is_patroni_paused.yml + + - name: patroni resume {{ pg_cluster }} + when: is_patroni_paused|length > 0 + command: /usr/bin/patronictl -c /pg/bin/patroni.yml resume + register: patroni_resume_result + until: patroni_resume_result.rc == 0 and patroni_resume_result.stdout.find('Success') != -1 + retries: 5 + delay: 3 + run_once: true + +... diff --git a/roles/pgsql/tasks/util/grace_start_pg.yml b/roles/pgsql/tasks/util/grace_start_pg.yml new file mode 100644 index 000000000..d1758b1cc --- /dev/null +++ b/roles/pgsql/tasks/util/grace_start_pg.yml @@ -0,0 +1,56 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# start postgres cluster gracefully [grace_start_pg] +#--------------------------------------------------------------# + +- name: start postgres cluster gracefully + tags: grace_start_pg + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - import_tasks: start_pg.yml + when: pg_role == 'primary' + + - import_tasks: start_pg.yml + when: pg_role != 'primary' + + - name: sleep 5 seconds before patroni resume if needed + when: patroni_mode != 'remove' + command: sleep 5 + changed_when: false + + - import_tasks: check_pg_ready.yml + + - name: run patronictl list + become_user: "{{ dbsu }}" + when: patroni_mode != 'remove' + command: /usr/bin/patronictl -c /pg/bin/patroni.yml list -f tsv + register: patronictl_list + changed_when: false + + - name: check patroni status all ready (no stopped status) and set fact + when: patroni_mode != 'remove' + set_fact: + patroni_all_ready: "{{ patronictl_list.stdout_lines | select('search', 'stopped') | list | length == 0 }}" + changed_when: false + + - name: sleep extra 15 seconds before patroni resume if needed + when: patroni_mode != 'remove' and patroni_all_ready == false + command: sleep 15 + changed_when: false + + + rescue: + - name: check postgres ready failed for {{ pg_cluster }} + debug: + msg: | + rc: {{ pg_ready_result.rc }} + STDOUT: {{ pg_ready_result.stdout }} + STDERR: {{ pg_ready_result.stderr }} + when: pg_ready_result is defined and pg_ready_result.rc != 0 + + - name: Exit Playbook due to error + meta: end_play + +... diff --git a/roles/pgsql/tasks/util/grace_stop_pg.yml b/roles/pgsql/tasks/util/grace_stop_pg.yml new file mode 100644 index 000000000..dbbb79a48 --- /dev/null +++ b/roles/pgsql/tasks/util/grace_stop_pg.yml @@ -0,0 +1,26 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# stop postgres cluster gracefully [grace_stop_pg] +#--------------------------------------------------------------# + +- name: stop postgres cluster gracefully + tags: grace_stop_pg + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + any_errors_fatal: true + block: + - name: stop postgres replica of {{ pg_cluster }} + when: pg_role != 'primary' + shell: | + {{ pg_bin_dir }}/pg_ctl -D {{ pg_data }} stop + sleep 3 + + - name: stop postgres primary of {{ pg_cluster }} + when: pg_role == 'primary' + shell: | + {{ pg_bin_dir }}/pg_ctl -D {{ pg_data }} stop + sync; sync; + +... diff --git a/roles/pgsql/tasks/util/is_patroni_paused.yml b/roles/pgsql/tasks/util/is_patroni_paused.yml new file mode 100644 index 000000000..139ce80e2 --- /dev/null +++ b/roles/pgsql/tasks/util/is_patroni_paused.yml @@ -0,0 +1,21 @@ +#!/usr/bin/ansible-playbook +--- + +- name: check patroni maintenance mode + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ pg_dbsu|default('postgres') }}" + block: + - name: run patroni list + args: { executable: /bin/bash } + shell: | + /usr/bin/patronictl -c /pg/bin/patroni.yml list 2>/dev/null | tail -n 3 + register: patroni_status_cmd + changed_when: false + ignore_errors: yes + + - name: set variable is_patroni_paused by patroni maintenance mode + set_fact: + is_patroni_paused: "{{ patroni_status_cmd.stdout | default('') | regex_search('Maintenance mode: on') }}" + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/pg_backup.yml b/roles/pgsql/tasks/util/pg_backup.yml new file mode 100644 index 000000000..cd595d221 --- /dev/null +++ b/roles/pgsql/tasks/util/pg_backup.yml @@ -0,0 +1,49 @@ +#!/usr/bin/ansible-playbook +--- + +- name: set variable + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: pg_role_runtime is undefined + block: + - name: run pg-role + command: /pg/bin/pg-role + register: pg_role_cmd + + - name: set variable pg_role_runtime + set_fact: + pg_role_runtime: "{{ pg_role_cmd.stdout | default(pg_role) | trim }}" + + +- name: pg_backup + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: pgbackrest_enabled|bool + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + pg_role: "{{ pg_role_runtime }}" + block: + - name: full backup cls {{ pg_cluster }} + become_user: "{{ dbsu }}" + when: pg_role == 'primary' + command: /pg/bin/pg-backup full + register: back_ret_cmd + ignore_errors: false + + - name: show backup result for {{ pg_cluster }} + when: pg_role == 'primary' + debug: + msg: | + STDOUT {{ back_ret_cmd.stdout_lines }}, + STDERR {{ back_ret_cmd.stderr_lines }} + changed_when: false + + rescue: + - name: pg-backup failed for {{ pg_cluster }} + debug: + msg: | + STDOUT: {{ back_ret_cmd.stdout }}, + STDERR: {{ back_ret_cmd.stderr }} + when: back_ret_cmd is defined + + - name: Exit Playbook due to backup failure + meta: end_play +... diff --git a/roles/pgsql/tasks/util/start_pg.yml b/roles/pgsql/tasks/util/start_pg.yml new file mode 100644 index 000000000..b9734f319 --- /dev/null +++ b/roles/pgsql/tasks/util/start_pg.yml @@ -0,0 +1,26 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# start postgres +#--------------------------------------------------------------# + +- name: start postgres + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: check if postmaster.pid exists and is not empty + stat: + path: "{{ pg_data }}/postmaster.pid" + register: postmaster_pid_stat + changed_when: false + + - name: start postgres member of {{ pg_cluster }} + become_user: "{{ dbsu }}" + when: (postmaster_pid_stat.stat.exists == false or postmaster_pid_stat.stat.size == 0) + args: { executable: /bin/bash } + shell: | + {{ pg_bin_dir }}/pg_ctl -D {{ pg_data }} start + sync; sync; + sleep 3 + +... From 69c79f2f6f7069c98f3b5783d38479c4f211a9d8 Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Fri, 14 Mar 2025 16:33:36 +0800 Subject: [PATCH 02/12] feat(pgsql): add pgsql util playbooks for hugepage - patroni_restart - grace_patroni_restart - pg_read_hugepage - pg_write_hugepage --- files/postgres/pg-member | 32 +++++ files/postgres/pg-primary-host | 14 ++ files/postgres/pg-primary-member | 14 ++ roles/pgsql/tasks/grace_patroni_restart.yml | 92 +++++++++++++ .../tasks/util/patroni_primary_runtime.yml | 34 +++++ .../tasks/util/patroni_restart_at_primary.yml | 48 +++++++ .../tasks/util/patroni_restart_at_replica.yml | 32 +++++ .../pgsql/tasks/util/patroni_restart_cls.yml | 23 ++++ roles/pgsql/tasks/util/pg_read_hugepage.yml | 121 ++++++++++++++++++ roles/pgsql/tasks/util/pg_write_hugepage.yml | 47 +++++++ 10 files changed, 457 insertions(+) create mode 100644 files/postgres/pg-member create mode 100644 files/postgres/pg-primary-host create mode 100644 files/postgres/pg-primary-member create mode 100644 roles/pgsql/tasks/grace_patroni_restart.yml create mode 100644 roles/pgsql/tasks/util/patroni_primary_runtime.yml create mode 100644 roles/pgsql/tasks/util/patroni_restart_at_primary.yml create mode 100644 roles/pgsql/tasks/util/patroni_restart_at_replica.yml create mode 100644 roles/pgsql/tasks/util/patroni_restart_cls.yml create mode 100644 roles/pgsql/tasks/util/pg_read_hugepage.yml create mode 100644 roles/pgsql/tasks/util/pg_write_hugepage.yml diff --git a/files/postgres/pg-member b/files/postgres/pg-member new file mode 100644 index 000000000..f55dc8851 --- /dev/null +++ b/files/postgres/pg-member @@ -0,0 +1,32 @@ +#!/bin/bash +set -uo pipefail +#==============================================================# +# File : pg-member +# Desc : retrieve patroni member name from patroni REST API +# Path : /pg/bin/pg-member +# Depend : patroni +# License : AGPLv3 @ https://pigsty.io/docs/about/license +# Author : waiting +#==============================================================# + +# method 1: get patroni member name from patroni REST API +api_ip=$(ss -tlnpH | grep -E ':8008\b' | awk '{print $4}' | cut -d':' -f1 | grep -vE '^0\.0\.0\.0$|^::$' | head -n1) +[ -z "$api_ip" ] && api_ip="127.0.0.1" + +name=$(curl -s "http://${api_ip}:8008/patroni" 2>/dev/null | jq -r '.patroni.name') + +# method 2: get patroni member name from patronictl +if [ -z "$name" ]; then + ips=($(hostname -I | tr ' ' '\n' | grep -v '^$')) + if [ ${#ips[@]} -eq 0 ]; then + # echo "Cannot get local IP address" >&2 + exit 0 + fi + ip_json=$(printf '"%s",' "${ips[@]}" | sed 's/,$//') + name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json | jq -r --argjson ips "[$ip_json]" '.[] | select(.Host as $h | $ips | index($h)) | .Member') +fi + +if [[ -n "$name" ]]; then + echo "$name" +fi + diff --git a/files/postgres/pg-primary-host b/files/postgres/pg-primary-host new file mode 100644 index 000000000..5476d6ae6 --- /dev/null +++ b/files/postgres/pg-primary-host @@ -0,0 +1,14 @@ +#!/bin/bash +set -uo pipefail +#==============================================================# +# File : pg-member +# Desc : retrieve patroni member name from patroni REST API +# Path : /pg/bin/pg-member +# Depend : patroni +# License : AGPLv3 @ https://pigsty.io/docs/about/license +# Author : waiting +#==============================================================# + +name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json 2>/dev/null | jq -r '.[] | select(.Role == "Leader") | .Host' ) +echo $name + diff --git a/files/postgres/pg-primary-member b/files/postgres/pg-primary-member new file mode 100644 index 000000000..5697a0015 --- /dev/null +++ b/files/postgres/pg-primary-member @@ -0,0 +1,14 @@ +#!/bin/bash +set -uo pipefail +#==============================================================# +# File : pg-member +# Desc : retrieve patroni member name from patroni REST API +# Path : /pg/bin/pg-member +# Depend : patroni +# License : AGPLv3 @ https://pigsty.io/docs/about/license +# Author : waiting +#==============================================================# + +name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json 2>/dev/null | jq -r '.[] | select(.Role == "Leader") | .Member' ) +echo $name + diff --git a/roles/pgsql/tasks/grace_patroni_restart.yml b/roles/pgsql/tasks/grace_patroni_restart.yml new file mode 100644 index 000000000..3e03d8d55 --- /dev/null +++ b/roles/pgsql/tasks/grace_patroni_restart.yml @@ -0,0 +1,92 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# patroni restart pg_cls gracefully [pt_restart] +# avoiding switching pg primary/standby +# usage: +# - ./pgsql.yml -l -t pt_restart -e "pt_restart=true" +# - ./pgsql.yml -l ',&' -t pt_restart -e "pt_restart=true" +#--------------------------------------------------------------# +# inner steps: +# 1. patroni pause +# 2. patroni restart pg_cls +# 3. patroni resume +#--------------------------------------------------------------# +# tested scenarios: +# - ./pgsql.yml -l -t pt_restart -e "pt_restart=true" +# all nodes have been restarted +# - ./pgsql.yml -l ',&' -t pt_restart -e "pt_restart=true" +# nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored +#--------------------------------------------------------------# + +- name: set variable + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: pg_role_runtime is undefined or pg_primary_host_runtime is undefined + block: + - name: run pg-role + command: /pg/bin/pg-role + register: pg_role_cmd + + - name: set variable pg_role_runtime + set_fact: + pg_role_runtime: "{{ pg_role_cmd.stdout | default(pg_role) | trim }}" + + - name: set pg_primary_host_runtime + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + import_tasks: util/patroni_primary_runtime.yml + + - name: print variables + debug: + msg: | + pg_role_runtime: {{ pg_role_runtime|default('') }}, + pg_primary_host_runtime: {{ pg_primary_host_runtime|default('') }}, + pg_primary_member_runtime: {{ pg_primary_member_runtime|default('') }} + changed_when: false + + +- name: patroni restart pg_cls gracefully + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: patroni_mode != 'remove' + become_user: "{{ dbsu }}" + vars: + pg_role: "{{ pg_role_runtime }}" + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: patroni pause gracefully + include_tasks: util/grace_patroni_pause.yml + run_once: true + + - import_tasks: util/patroni_restart_cls.yml + + - name: check pg ready + include_tasks: util/check_pg_ready.yml + run_once: true + + - name: patroni resume pg_cls gracefully + import_tasks: util/grace_patroni_resume.yml + run_once: true + + - name: print message + debug: + msg: finally, check if all postgres is ready + + # finally, check if all postgres is ready + - import_tasks: util/check_pg_ready.yml + + - name: re-set variable pt_restart to false + set_fact: + pt_restart: false + + rescue: + - name: check postgres ready failed for {{ pg_cluster }} + debug: + msg: | + rc: {{ pg_ready_result.rc }} + STDOUT: {{ pg_ready_result.stdout }} + STDERR: {{ pg_ready_result.stderr }} + when: pg_ready_result is defined and pg_ready_result.rc != 0 + + - name: Exit Playbook due to error + meta: end_play + +... diff --git a/roles/pgsql/tasks/util/patroni_primary_runtime.yml b/roles/pgsql/tasks/util/patroni_primary_runtime.yml new file mode 100644 index 000000000..1864e6291 --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_primary_runtime.yml @@ -0,0 +1,34 @@ +#!/usr/bin/ansible-playbook +--- + +- name: retrieve runtime patroni primary member + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: retrieve runtime patroni primary member of {{ pg_cluster }} + command: /pg/bin/pg-primary-member + register: pg_pri_member_name_cmd + + - name: set variable pg_primary_member_runtime + set_fact: + pg_primary_member_runtime: "{{ (pg_pri_member_name_cmd is defined and pg_pri_member_name_cmd.stdout) | default('') | trim }}" + changed_when: false + + +- name: retrieve runtime patroni primary host + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: retrieve runtime patroni primary host of {{ pg_cluster }} + command: /pg/bin/pg-primary-host + register: pg_pri_member_name_cmd + + - name: set variable pg_primary_host_runtime + set_fact: + pg_primary_host_runtime: "{{ (pg_pri_member_name_cmd is defined and pg_pri_member_name_cmd.stdout) | default('') | trim }}" + changed_when: false +... diff --git a/roles/pgsql/tasks/util/patroni_restart_at_primary.yml b/roles/pgsql/tasks/util/patroni_restart_at_primary.yml new file mode 100644 index 000000000..ea9b6a0f9 --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_restart_at_primary.yml @@ -0,0 +1,48 @@ +#!/usr/bin/ansible-playbook +--- + +- name: patroni restart primary pg_cluster at primary + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: patroni_mode != 'remove' and pg_role == 'primary' + block: + # result may be success, but contains 'Failed' message, + # due to node has multiple ip addresses and got "Failed: ... status code=403, (Access is denied)" for replica + - name: patroni restart primary {{ pg_cluster }} at primary + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + args: { executable: /bin/bash } + shell: | + /usr/bin/patronictl -c /pg/bin/patroni.yml restart --force {{ pg_cluster }} + register: patroni_restart_cmd + + - name: set pt_restart_full_succeed + when: patroni_restart_cmd is defined and (patroni_restart_cmd.rc == 0 and patroni_restart_cmd.stdout.find('Failed') == -1) + set_fact: + pt_restart_full_succeed: true + # delegate_to: localhost + + - name: print result if failed at primary + debug: + msg: | + rc: {{ patroni_restart_cmd.rc }}, + stdout: "{{ patroni_restart_cmd.stdout_lines }}", + stderr: "{{ patroni_restart_cmd.stderr_lines }}" + when: patroni_restart_cmd is defined and patroni_restart_cmd.rc is defined and patroni_restart_cmd.rc != 0 + changed_when: false + + - name: print result if succeed but contains 'Failed' at primary + debug: + var: patroni_restart_cmd.stdout_lines + when: patroni_restart_cmd is defined and (patroni_restart_cmd.rc == 0 and patroni_restart_cmd.stdout.find('Failed') != -1) + changed_when: false + + - name: sleep after patroni restart + when: patroni_restart_cmd is defined and (patroni_restart_cmd.rc == 0 and patroni_restart_cmd.stdout.find('Failed') != -1) + args: { executable: /bin/bash } + shell: | + sync; sync; + sleep 1 + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/patroni_restart_at_replica.yml b/roles/pgsql/tasks/util/patroni_restart_at_replica.yml new file mode 100644 index 000000000..687c099d9 --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_restart_at_replica.yml @@ -0,0 +1,32 @@ +#!/usr/bin/ansible-playbook +--- + +- name: patroni restart {{ pg_cluster }} at replica individually + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ dbsu }}" + when: pg_role != 'primary' + block: + - name: retrieve current patroni member at {{ pg_role }} of {{ pg_cluster }} + command: /pg/bin/pg-member + register: pg_member_name_cmd + + - name: set variable curr_pg_member_name + set_fact: + curr_pg_member_name: "{{ pg_member_name_cmd.stdout | default('') | trim }}" + changed_when: false + + - name: restart patroni member replica of {{ pg_cluster }} + when: curr_pg_member_name is defined and curr_pg_member_name|length>0 + command: /usr/bin/patronictl -c /pg/bin/patroni.yml restart --force {{ pg_cluster }} {{ curr_pg_member_name }} + register: patroni_restart_cmd + + - name: print result if failed at replica + when: patroni_restart_cmd is defined and patroni_restart_cmd.rc is defined and patroni_restart_cmd.rc != 0 + debug: + msg: | + rc: {{ patroni_restart_cmd.rc }}, + stdout: {{ patroni_restart_cmd.stdout }}, + stderr: {{ patroni_restart_cmd.stderr }} + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/patroni_restart_cls.yml b/roles/pgsql/tasks/util/patroni_restart_cls.yml new file mode 100644 index 000000000..a9ad5ea4e --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_restart_cls.yml @@ -0,0 +1,23 @@ +#!/usr/bin/ansible-playbook +--- + +- name: patroni restart cls {{ pg_cluster }} + become_user: "{{ dbsu }}" + when: patroni_mode != 'remove' + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: restart {{ pg_cluster }} at primary + when: pg_role == 'primary' + include_tasks: patroni_restart_at_primary.yml + + - name: set variable need_restart_at_replica at all nodes + set_fact: + need_restart_at_replica: "{{ not (hostvars[pg_primary_host_runtime].pt_restart_full_succeed | default(false) | string | trim | bool) }}" + changed_when: false + + - name: patroni restart at replica of {{ pg_cluster }} if needed {{ need_restart_at_replica }} + include_tasks: patroni_restart_at_replica.yml + when: pg_role != 'primary' and need_restart_at_replica == true + +... diff --git a/roles/pgsql/tasks/util/pg_read_hugepage.yml b/roles/pgsql/tasks/util/pg_read_hugepage.yml new file mode 100644 index 000000000..76b872a32 --- /dev/null +++ b/roles/pgsql/tasks/util/pg_read_hugepage.yml @@ -0,0 +1,121 @@ +--- +#--------------------------------------------------------------# +# read and calculate hugepage settings +#--------------------------------------------------------------# + +- name: Set default value of hugepage_count, hugepage_ratio + set_fact: + hugepage_count: "{{ node_hugepage_count|default(0) }}" + hugepage_ratio: "{{ node_hugepage_ratio|default(0) }}" + pg_hugepage_value: 0 + changed_when: false + +- name: calculate hugepage from configs hugepage_count and hugepage_count + when: hugepage_count|int > 0 or hugepage_ratio|float > 0 + block: + - name: calculate hugepage from configs hugepage_count and hugepage_count + args: { executable: /bin/bash } + set_fact: + pg_hugepage_value: |- + {% if hugepage_count is defined and hugepage_count|int > 0 %} + {% if hugepage_count|float >= node_mem_bytes|float / 2097152.0 * 0.90 %} + {{ (node_mem_bytes|int / 2097152.0 * 0.90 )|round(0, 'ceil')|int }} + {% else %} + {{ hugepage_count }} + {% endif %} + {% else %} + {% if hugepage_ratio|float > 0 and hugepage_ratio|float < 0.90 %} + {{ (node_mem_bytes|int / 2097152.0 * hugepage_ratio )|round(0, 'ceil')|int }} + {% else %}0 + {% endif %} + {% endif %} + + - name: print pg_hugepage_value + debug: + var: pg_hugepage_value + changed_when: false + +- name: read pg hugepage settings (PG 15+) + become_user: "{{ pg_dbsu|default('postgres') }}" + when: pg_version >= 15 and (hugepage_count|int == -1 or pg_hugepage_value|int > 0) # check pg_hugepage_value + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + pg_bin_prefix: "{{ pg_bin_dir|default('/usr/pgsql/bin') }}" + localhost: "{{ pg_localhost|default('/var/run/postgresql') }}" + port: "{{ pg_port|default(5432) }}" + block: + - name: read pg huge_pages (PG 15+) + args: { executable: /bin/bash } + ignore_errors: true + shell: | + {{ pg_bin_prefix }}/psql -h {{ localhost }} -p {{ port }} -qwAXtc 'show huge_pages' + register: huge_pages_cmd + + - name: Set variable pg_hugepage_enabled + set_fact: + pg_hugepage_enabled: "{{ huge_pages_cmd.stdout | default('off') | lower in ['try', 'on'] }}" + # pg_hugepage_enabled: |- + # {% if huge_pages_cmd.stdout is defined %}{{ huge_pages_cmd.stdout|lower in ['try', 'on'] }}{% else %}false|bool{% endif %} + + - name: read pg shared_memory_size_in_huge_pages + args: { executable: /bin/bash } + when: pg_hugepage_enabled|bool + ignore_errors: true + shell: | + {{ pg_bin_prefix }}/psql -h {{ localhost }} -p {{ port }} -qwAXtc 'show shared_memory_size_in_huge_pages' + register: shared_memory_size_in_huge_pages_cmd + + - name: Set variable pg_shared_memory_size_in_huge_pages if hugepage enabled (try or on) + set_fact: + pg_shared_memory_size_in_huge_pages: |- + {% if pg_hugepage_enabled|bool and shared_memory_size_in_huge_pages_cmd.stdout is defined %} + {{ shared_memory_size_in_huge_pages_cmd.stdout|int }} + {% else %}0{% endif %} + + - name: print pg hugepage settings + when: pg_hugepage_enabled is defined + debug: + msg: | + pg_hugepage_enabled: {{ pg_hugepage_enabled }}, + pg_shared_memory_size_in_huge_pages: {{ pg_shared_memory_size_in_huge_pages }} + changed_when: false + + +- name: calculate real hugepage during node_hugepage_count -1 + when: hugepage_count|int == -1 and pg_shared_memory_size_in_huge_pages|default(0)|int > 0 + set_fact: + new_nr_hugepages: "{{ pg_shared_memory_size_in_huge_pages|default(0) }}" + + +- name: calculate real hugepage from pg_hugepage_value and pg_shared_memory_size_in_huge_pages + when: hugepage_count|int != -1 + vars: + v_conf: "{{ pg_hugepage_value|default(0) }}" + v_pg: "{{ pg_shared_memory_size_in_huge_pages|default(0) }}" + set_fact: + new_nr_hugepages: |- + {% if v_pg|int > v_conf|int %} + {{ v_pg }} + {% else %} + {{ v_conf }} + {% endif %} + + +- name: read current nr_hugepages by sysctl + args: { executable: /bin/bash } + become: yes + shell: | + sysctl -n vm.nr_hugepages + register: curr_nr_hugepages_cmd + ignore_errors: true + + +- name: Set variable curr_nr_hugepages + when: curr_nr_hugepages_cmd.stdout is defined + set_fact: + curr_nr_hugepages: |- + {% if curr_nr_hugepages_cmd.stdout is defined %} + {{ curr_nr_hugepages_cmd.stdout|int }} + {% else %}0{% endif %} + +... \ No newline at end of file diff --git a/roles/pgsql/tasks/util/pg_write_hugepage.yml b/roles/pgsql/tasks/util/pg_write_hugepage.yml new file mode 100644 index 000000000..a973aef43 --- /dev/null +++ b/roles/pgsql/tasks/util/pg_write_hugepage.yml @@ -0,0 +1,47 @@ +--- +#--------------------------------------------------------------# +# Write hugepage sysctl parameter if needed +#--------------------------------------------------------------# + +- name: write hugepage sysctl parameter if needed + become: yes + when: new_nr_hugepages is defined + block: + - name: write hugepage sysctl parameter + vars: + txt: | + vm.nr_hugepages = {{ new_nr_hugepages }} + copy: + dest: /etc/sysctl.d/hugepage.conf + # write txt first line to /etc/sysctl.d/hugepage.conf + content: | + {{ txt.split('\n')[0] }} + + - name: activate tuned profile, prepare for database performance impact! + args: { executable: /bin/bash } + shell: | + sync; echo 3 > /proc/sys/vm/drop_caches # 刷盘,释放系统缓存(请做好数据库性能受到冲击的准备) + sysctl -p /etc/sysctl.d/hugepage.conf + ignore_errors: true + + - name: print new_nr_hugepages after write + become: yes + args: { executable: /bin/bash } + shell: | + cat /proc/meminfo | grep HugePages_ + sysctl -a | grep vm.nr_hugepages + ignore_errors: true + changed_when: false + register: hugepage_after_write_cmd + + - name: print hugepage_after_write_cmd + when: hugepage_after_write_cmd is defined + debug: + msg: | + rc: {{ hugepage_after_write_cmd.rc }} + STDOUT: {{ hugepage_after_write_cmd.stdout }} + STDERR: {{ hugepage_after_write_cmd.stderr }} + ignore_errors: true + changed_when: false + +... From a17c4505e3124989e059a3e850662a68f9116a5a Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Mon, 17 Mar 2025 17:26:33 +0800 Subject: [PATCH 03/12] feat(pgsql): param `node_hugepage_count` accepts value -1 to set hugepage automatically (only PG 15+) nr_hugepages calculated by `shared_memory_size_in_huge_pages` if `node_hugepage_count` or `node_hugepage_ratio` changed, it will full backup the pg_cls and restart it gracefully support usage: - `pig install` - `bin/pgsql-add ` - `bin/pgsql-add ` only new nodes changed and restarted - `pgsql.yml -l -t pg_hugepage` calculation rules: 1. `shared_memory_size_in_huge_pages` (from PG15+): if `node_hugepage_count` is -1 and huge_pages (from PG) != off 2. if `node_hugepage_count` > 0 - `shared_memory_size_in_huge_pages`: if `shared_memory_size_in_huge_pages` > `node_hugepage_count` - `node_hugepage_count` 3. disable hugepage if `node_hugepage_count` is 0 4. if `node_hugepage_ratio` > 0 - `shared_memory_size_in_huge_pages`: if `shared_memory_size_in_huge_pages` > pages calculated from `node_hugepage_ratio` - page value calculated from `node_hugepage_ratio` All above rules and usages are tested successfully on Rocky Linux 9 --- roles/node/defaults/main.yml | 3 +- roles/pgsql/tasks/main.yml | 15 ++++++++ roles/pgsql/tasks/pg_hugepage.yml | 62 +++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 roles/pgsql/tasks/pg_hugepage.yml diff --git a/roles/node/defaults/main.yml b/roles/node/defaults/main.yml index 0545f6b63..07af703c7 100644 --- a/roles/node/defaults/main.yml +++ b/roles/node/defaults/main.yml @@ -38,7 +38,8 @@ node_disable_swap: false # disable node swap, use with caution node_static_network: true # preserve dns resolver settings after reboot node_disk_prefetch: false # setup disk prefetch on HDD to increase performance node_kernel_modules: [ softdog, br_netfilter, ip_vs, ip_vs_rr, ip_vs_wrr, ip_vs_sh ] -node_hugepage_count: 0 # number of 2MB hugepage, take precedence over ratio +node_hugepage_count: 0 # number of 2MB hugepage, take precedence over ratio, + # -1: use shared_memory_size_in_huge_pages calculated by pg (PG 15+ available) node_hugepage_ratio: 0 # node mem hugepage ratio, 0 disable it by default node_overcommit_ratio: 0 # node mem overcommit ratio, 0 disable it by default node_tune: oltp # node tuned profile: none,oltp,olap,crit,tiny diff --git a/roles/pgsql/tasks/main.yml b/roles/pgsql/tasks/main.yml index 1e1dd267c..5b06af554 100644 --- a/roles/pgsql/tasks/main.yml +++ b/roles/pgsql/tasks/main.yml @@ -55,6 +55,21 @@ tags: [ patroni, pg_launch ] when: patroni_enabled|bool +#--------------------------------------------------------------# +# pg hugepages [pg_hugepage] +#--------------------------------------------------------------# +- import_tasks: pg_hugepage.yml + when: patroni_enabled|bool + tags: [ patroni, pg_launch, pg_hugepage ] + +#--------------------------------------------------------------# +# patroni restart pg_cls gracefully [pt_restart] +#--------------------------------------------------------------# +- name: patroni restart pg_cls gracefully + include_tasks: grace_patroni_restart.yml + when: patroni_enabled|bool and (pt_restart is defined and pt_restart|bool) + tags: [ patroni, pg_launch, pt_restart, pg_hugepage ] + #--------------------------------------------------------------# # Users [pg_user] #--------------------------------------------------------------# diff --git a/roles/pgsql/tasks/pg_hugepage.yml b/roles/pgsql/tasks/pg_hugepage.yml new file mode 100644 index 000000000..7df214305 --- /dev/null +++ b/roles/pgsql/tasks/pg_hugepage.yml @@ -0,0 +1,62 @@ +--- +#--------------------------------------------------------------# +# Enable hugepage for pg and restart pg cluster [pg_hugepage] +# if `node_hugepage_count` or `node_hugepage_ratio` changed, it will full backup the pg_cls and restart it gracefully +# support usage: +# - `pig install` +# - `bin/pgsql-add ` +# - `pgsql.yml -l -t pg_hugepage` +#--------------------------------------------------------------# +# calculation rules: +# 1. `shared_memory_size_in_huge_pages` (from PG15+): if `node_hugepage_count` is -1 and huge_pages (from PG) != off +# 2. if `node_hugepage_count` > 0 +# - `shared_memory_size_in_huge_pages`: if `shared_memory_size_in_huge_pages` > `node_hugepage_count` +# - `node_hugepage_count` +# 3. disable hugepage if `node_hugepage_count` is 0 +# 4. if `node_hugepage_ratio` > 0 +# - `shared_memory_size_in_huge_pages`: if `shared_memory_size_in_huge_pages` > pages calculated from `node_hugepage_ratio` +# - page value calculated from `node_hugepage_ratio` +# All above rules and usages are tested successfully on Rocky Linux 9 +#--------------------------------------------------------------# + +- name: Set default value + set_fact: + curr_nr_hugepages: 0 + new_nr_hugepages: 0 + changed_when: false + + +- name: enable hugepage for pg + tags: pg_hugepage + vars: + block: + - import_tasks: util/pg_read_hugepage.yml + + - name: calculate hugepage need update + set_fact: + hugepages_need_update: "{{ new_nr_hugepages|int != curr_nr_hugepages|int }}" + changed_when: false + + - name: set pt_restart according to hugepages_need_update + set_fact: + pt_restart: "{{ hugepages_need_update|bool }}" + changed_when: false + + - name: print nr_hugepages values + when: new_nr_hugepages is defined and curr_nr_hugepages is defined + debug: + msg: | + curr_nr_hugepages: {{ curr_nr_hugepages|trim }}, new_nr_hugepages: {{ new_nr_hugepages|trim }}, + hugepages_need_update: {{ hugepages_need_update|bool }}, + pt_restart: {{ pt_restart|bool }} + changed_when: false + + - name: full backup pg before hugepage update if has primary + include_tasks: util/pg_backup.yml + when: hugepages_need_update|bool and pg_role == 'primary' + + - name: update hugepage + include_tasks: util/pg_write_hugepage.yml + when: hugepages_need_update|bool + +... From ef4bada9b1e4cb6a98d65fa4878e9bc6d1ad0266 Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Mon, 17 Mar 2025 19:04:14 +0800 Subject: [PATCH 04/12] chore(pgsql): update comments --- files/postgres/pg-primary-host | 6 +++--- files/postgres/pg-primary-member | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/files/postgres/pg-primary-host b/files/postgres/pg-primary-host index 5476d6ae6..f3459f9c9 100644 --- a/files/postgres/pg-primary-host +++ b/files/postgres/pg-primary-host @@ -1,9 +1,9 @@ #!/bin/bash set -uo pipefail #==============================================================# -# File : pg-member -# Desc : retrieve patroni member name from patroni REST API -# Path : /pg/bin/pg-member +# File : pg-primary-host +# Desc : retrieve patroni primary member host from patroni REST API +# Path : /pg/bin/pg-primary-host # Depend : patroni # License : AGPLv3 @ https://pigsty.io/docs/about/license # Author : waiting diff --git a/files/postgres/pg-primary-member b/files/postgres/pg-primary-member index 5697a0015..35bd159ab 100644 --- a/files/postgres/pg-primary-member +++ b/files/postgres/pg-primary-member @@ -1,9 +1,9 @@ #!/bin/bash set -uo pipefail #==============================================================# -# File : pg-member -# Desc : retrieve patroni member name from patroni REST API -# Path : /pg/bin/pg-member +# File : pg-primary-member +# Desc : retrieve patroni primary member name from patroni REST API +# Path : /pg/bin/pg-primary-member # Depend : patroni # License : AGPLv3 @ https://pigsty.io/docs/about/license # Author : waiting From fefd5a88e77cf5ae765559226e2d2090db87a99e Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Mon, 17 Mar 2025 22:27:45 +0800 Subject: [PATCH 05/12] chore(pgsql): update comments of pg_hugepage.yml --- roles/pgsql/tasks/grace_patroni_restart.yml | 5 +++-- roles/pgsql/tasks/pg_hugepage.yml | 10 +++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/roles/pgsql/tasks/grace_patroni_restart.yml b/roles/pgsql/tasks/grace_patroni_restart.yml index 3e03d8d55..771cecc9b 100644 --- a/roles/pgsql/tasks/grace_patroni_restart.yml +++ b/roles/pgsql/tasks/grace_patroni_restart.yml @@ -1,7 +1,7 @@ #!/usr/bin/ansible-playbook --- #--------------------------------------------------------------# -# patroni restart pg_cls gracefully [pt_restart] +# patroni restart pg_cls gracefully [pt_restart] # avoiding switching pg primary/standby # usage: # - ./pgsql.yml -l -t pt_restart -e "pt_restart=true" @@ -16,7 +16,8 @@ # - ./pgsql.yml -l -t pt_restart -e "pt_restart=true" # all nodes have been restarted # - ./pgsql.yml -l ',&' -t pt_restart -e "pt_restart=true" -# nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored +# nodes in ansible_play_hosts_all have been restarted, +# other nodes in the same pg_cls ignored #--------------------------------------------------------------# - name: set variable diff --git a/roles/pgsql/tasks/pg_hugepage.yml b/roles/pgsql/tasks/pg_hugepage.yml index 7df214305..522efecec 100644 --- a/roles/pgsql/tasks/pg_hugepage.yml +++ b/roles/pgsql/tasks/pg_hugepage.yml @@ -1,11 +1,19 @@ --- #--------------------------------------------------------------# # Enable hugepage for pg and restart pg cluster [pg_hugepage] -# if `node_hugepage_count` or `node_hugepage_ratio` changed, it will full backup the pg_cls and restart it gracefully +# if `node_hugepage_count` or `node_hugepage_ratio` changed, +# it will full backup the pg_cls and restart it gracefully # support usage: # - `pig install` # - `bin/pgsql-add ` +# - `bin/pgsql-add ` +# service of nodes in ansible_play_hosts_all will be unavailable during the process, +# other nodes in the same pg_cls ignored # - `pgsql.yml -l -t pg_hugepage` +# entire service of this postgres cluster will be unavailable during the process +# - `pgsql.yml -l ',&' -t pg_hugepage` +# service of nodes in ansible_play_hosts_all will be unavailable during the process, +# other nodes in the same pg_cls ignored #--------------------------------------------------------------# # calculation rules: # 1. `shared_memory_size_in_huge_pages` (from PG15+): if `node_hugepage_count` is -1 and huge_pages (from PG) != off From cce037085854b7dbd19fb650de6ec47a70befa4e Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Tue, 18 Mar 2025 12:28:38 +0800 Subject: [PATCH 06/12] chore(pgsql): update roles/pgsql/tasks/main.yml move import of pg_hugepage.yml and grace_patroni_restart.yml at bottom --- roles/pgsql/tasks/main.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/roles/pgsql/tasks/main.yml b/roles/pgsql/tasks/main.yml index 5b06af554..e77114e6e 100644 --- a/roles/pgsql/tasks/main.yml +++ b/roles/pgsql/tasks/main.yml @@ -55,21 +55,6 @@ tags: [ patroni, pg_launch ] when: patroni_enabled|bool -#--------------------------------------------------------------# -# pg hugepages [pg_hugepage] -#--------------------------------------------------------------# -- import_tasks: pg_hugepage.yml - when: patroni_enabled|bool - tags: [ patroni, pg_launch, pg_hugepage ] - -#--------------------------------------------------------------# -# patroni restart pg_cls gracefully [pt_restart] -#--------------------------------------------------------------# -- name: patroni restart pg_cls gracefully - include_tasks: grace_patroni_restart.yml - when: patroni_enabled|bool and (pt_restart is defined and pt_restart|bool) - tags: [ patroni, pg_launch, pt_restart, pg_hugepage ] - #--------------------------------------------------------------# # Users [pg_user] #--------------------------------------------------------------# @@ -161,6 +146,21 @@ vars: { database: "{{ item }}" } with_items: "{{ pg_databases }}" +#--------------------------------------------------------------# +# pg hugepages [pg_hugepage] +#--------------------------------------------------------------# +- import_tasks: pg_hugepage.yml + when: patroni_enabled|bool + tags: [ patroni, pg_launch, pg_hugepage ] + +#--------------------------------------------------------------# +# patroni restart pg_cls gracefully [pt_restart] +#--------------------------------------------------------------# +- name: patroni restart pg_cls gracefully + include_tasks: grace_patroni_restart.yml + when: patroni_enabled|bool and (pt_restart is defined and pt_restart|bool) + tags: [ patroni, pg_launch, pt_restart, pg_hugepage ] + #--------------------------------------------------------------# # Summary [pg_done] #--------------------------------------------------------------# From c93e0de0f221cc3b65369f1026e1ab1dd8e4f89a Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Tue, 18 Mar 2025 12:55:33 +0800 Subject: [PATCH 07/12] chore(pgsql): update messages of grace_patroni_restart.yml --- roles/pgsql/tasks/grace_patroni_restart.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/roles/pgsql/tasks/grace_patroni_restart.yml b/roles/pgsql/tasks/grace_patroni_restart.yml index 771cecc9b..807c877eb 100644 --- a/roles/pgsql/tasks/grace_patroni_restart.yml +++ b/roles/pgsql/tasks/grace_patroni_restart.yml @@ -53,28 +53,28 @@ pg_role: "{{ pg_role_runtime }}" dbsu: "{{ pg_dbsu|default('postgres') }}" block: - - name: patroni pause gracefully + - name: 1. patroni pause gracefully {{ pg_cluster }} include_tasks: util/grace_patroni_pause.yml run_once: true - import_tasks: util/patroni_restart_cls.yml - - name: check pg ready + - name: 2. check pg ready once {{ pg_cluster }} include_tasks: util/check_pg_ready.yml run_once: true - - name: patroni resume pg_cls gracefully + - name: 3.patroni resume pg_cls gracefully {{ pg_cluster }} import_tasks: util/grace_patroni_resume.yml run_once: true - - name: print message + - name: 4. print message debug: - msg: finally, check if all postgres is ready + msg: finally, check if all postgres is ready {{ pg_cluster }} # finally, check if all postgres is ready - import_tasks: util/check_pg_ready.yml - - name: re-set variable pt_restart to false + - name: 5. re-set variable pt_restart to false set_fact: pt_restart: false From d02961cda00cf264163c074aedee3b706a5a023e Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Tue, 18 Mar 2025 14:05:51 +0800 Subject: [PATCH 08/12] chore(pgsql): update comments --- roles/pgsql/tasks/grace_patroni_restart.yml | 11 ++++++----- roles/pgsql/tasks/util/check_pg_ready.yml | 3 +-- .../tasks/util/patroni_restart_at_primary.yml | 19 +++++++++++++++---- .../tasks/util/patroni_restart_at_replica.yml | 14 ++++++++++---- .../pgsql/tasks/util/patroni_restart_cls.yml | 9 +++++++-- 5 files changed, 39 insertions(+), 17 deletions(-) diff --git a/roles/pgsql/tasks/grace_patroni_restart.yml b/roles/pgsql/tasks/grace_patroni_restart.yml index 807c877eb..4b2837050 100644 --- a/roles/pgsql/tasks/grace_patroni_restart.yml +++ b/roles/pgsql/tasks/grace_patroni_restart.yml @@ -57,24 +57,25 @@ include_tasks: util/grace_patroni_pause.yml run_once: true - - import_tasks: util/patroni_restart_cls.yml + - name: 2. patroni restart pg_cls {{ pg_cluster }} + include_tasks: util/patroni_restart_cls.yml - - name: 2. check pg ready once {{ pg_cluster }} + - name: 3. check pg ready once {{ pg_cluster }} include_tasks: util/check_pg_ready.yml run_once: true - - name: 3.patroni resume pg_cls gracefully {{ pg_cluster }} + - name: 4.patroni resume pg_cls gracefully {{ pg_cluster }} import_tasks: util/grace_patroni_resume.yml run_once: true - - name: 4. print message + - name: 5. print message debug: msg: finally, check if all postgres is ready {{ pg_cluster }} # finally, check if all postgres is ready - import_tasks: util/check_pg_ready.yml - - name: 5. re-set variable pt_restart to false + - name: 6. re-set variable pt_restart to false set_fact: pt_restart: false diff --git a/roles/pgsql/tasks/util/check_pg_ready.yml b/roles/pgsql/tasks/util/check_pg_ready.yml index 3365d6702..f62779b9f 100644 --- a/roles/pgsql/tasks/util/check_pg_ready.yml +++ b/roles/pgsql/tasks/util/check_pg_ready.yml @@ -6,8 +6,7 @@ vars: dbsu: "{{ pg_dbsu|default('postgres') }}" block: - - name: wait for postgres - # when: pg_role == 'primary' + - name: wait for postgres ready wait_for: host={{ inventory_hostname }} port={{ pg_port }} state=started timeout=60 ignore_errors: true diff --git a/roles/pgsql/tasks/util/patroni_restart_at_primary.yml b/roles/pgsql/tasks/util/patroni_restart_at_primary.yml index ea9b6a0f9..2c289ba6b 100644 --- a/roles/pgsql/tasks/util/patroni_restart_at_primary.yml +++ b/roles/pgsql/tasks/util/patroni_restart_at_primary.yml @@ -1,5 +1,8 @@ #!/usr/bin/ansible-playbook --- +#--------------------------------------------------------------# +# patroni restart **all** nodes of pg_cluster at primary +#--------------------------------------------------------------# - name: patroni restart primary pg_cluster at primary tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] @@ -17,10 +20,12 @@ register: patroni_restart_cmd - name: set pt_restart_full_succeed - when: patroni_restart_cmd is defined and (patroni_restart_cmd.rc == 0 and patroni_restart_cmd.stdout.find('Failed') == -1) + when: + - patroni_restart_cmd is defined + - patroni_restart_cmd.rc == 0 + - patroni_restart_cmd.stdout.find('Failed') == -1 set_fact: pt_restart_full_succeed: true - # delegate_to: localhost - name: print result if failed at primary debug: @@ -34,11 +39,17 @@ - name: print result if succeed but contains 'Failed' at primary debug: var: patroni_restart_cmd.stdout_lines - when: patroni_restart_cmd is defined and (patroni_restart_cmd.rc == 0 and patroni_restart_cmd.stdout.find('Failed') != -1) + when: + - patroni_restart_cmd is defined + - patroni_restart_cmd.rc == 0 + - patroni_restart_cmd.stdout.find('Failed') != -1 changed_when: false - name: sleep after patroni restart - when: patroni_restart_cmd is defined and (patroni_restart_cmd.rc == 0 and patroni_restart_cmd.stdout.find('Failed') != -1) + when: + - patroni_restart_cmd is defined + - patroni_restart_cmd.rc == 0 + - patroni_restart_cmd.stdout.find('Failed') != -1 args: { executable: /bin/bash } shell: | sync; sync; diff --git a/roles/pgsql/tasks/util/patroni_restart_at_replica.yml b/roles/pgsql/tasks/util/patroni_restart_at_replica.yml index 687c099d9..c5ce76fc7 100644 --- a/roles/pgsql/tasks/util/patroni_restart_at_replica.yml +++ b/roles/pgsql/tasks/util/patroni_restart_at_replica.yml @@ -1,12 +1,15 @@ #!/usr/bin/ansible-playbook --- +#--------------------------------------------------------------# +# patroni restart affected replica nodes at replica individually +#--------------------------------------------------------------# - name: patroni restart {{ pg_cluster }} at replica individually tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] become_user: "{{ dbsu }}" - when: pg_role != 'primary' + when: patroni_mode != 'remove' and pg_role != 'primary' block: - - name: retrieve current patroni member at {{ pg_role }} of {{ pg_cluster }} + - name: retrieve current patroni replica member of {{ pg_cluster }} command: /pg/bin/pg-member register: pg_member_name_cmd @@ -15,13 +18,16 @@ curr_pg_member_name: "{{ pg_member_name_cmd.stdout | default('') | trim }}" changed_when: false - - name: restart patroni member replica of {{ pg_cluster }} + - name: restart patroni replica member of {{ pg_cluster }} when: curr_pg_member_name is defined and curr_pg_member_name|length>0 command: /usr/bin/patronictl -c /pg/bin/patroni.yml restart --force {{ pg_cluster }} {{ curr_pg_member_name }} register: patroni_restart_cmd - name: print result if failed at replica - when: patroni_restart_cmd is defined and patroni_restart_cmd.rc is defined and patroni_restart_cmd.rc != 0 + when: + - patroni_restart_cmd is defined + - patroni_restart_cmd.rc is defined + - patroni_restart_cmd.rc != 0 debug: msg: | rc: {{ patroni_restart_cmd.rc }}, diff --git a/roles/pgsql/tasks/util/patroni_restart_cls.yml b/roles/pgsql/tasks/util/patroni_restart_cls.yml index a9ad5ea4e..d9bb2474f 100644 --- a/roles/pgsql/tasks/util/patroni_restart_cls.yml +++ b/roles/pgsql/tasks/util/patroni_restart_cls.yml @@ -2,12 +2,14 @@ --- - name: patroni restart cls {{ pg_cluster }} + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] become_user: "{{ dbsu }}" when: patroni_mode != 'remove' vars: dbsu: "{{ pg_dbsu|default('postgres') }}" block: - - name: restart {{ pg_cluster }} at primary + # if ansible_play_hosts_all contains primary node, restart all nodes at primary + - name: restart all nodes of {{ pg_cluster }} at primary when: pg_role == 'primary' include_tasks: patroni_restart_at_primary.yml @@ -16,7 +18,10 @@ need_restart_at_replica: "{{ not (hostvars[pg_primary_host_runtime].pt_restart_full_succeed | default(false) | string | trim | bool) }}" changed_when: false - - name: patroni restart at replica of {{ pg_cluster }} if needed {{ need_restart_at_replica }} + # if ansible_play_hosts_all not contains primary node, + # or primary node restart failed, + # restart affected replica nodes at replica + - name: patroni restart ansible_play_hosts_all at replica if needed include_tasks: patroni_restart_at_replica.yml when: pg_role != 'primary' and need_restart_at_replica == true From c2d015ce4af3b39ca08d568aaaf8a880df55b1af Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Tue, 18 Mar 2025 15:27:29 +0800 Subject: [PATCH 09/12] feat(pgsql): delegate task to primary node if previous pause/resume failed --- roles/pgsql/tasks/grace_patroni_restart.yml | 7 +++---- roles/pgsql/tasks/util/grace_patroni_pause.yml | 16 +++++++++++++--- roles/pgsql/tasks/util/grace_patroni_resume.yml | 17 +++++++++++++---- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/roles/pgsql/tasks/grace_patroni_restart.yml b/roles/pgsql/tasks/grace_patroni_restart.yml index 4b2837050..e95205d2d 100644 --- a/roles/pgsql/tasks/grace_patroni_restart.yml +++ b/roles/pgsql/tasks/grace_patroni_restart.yml @@ -45,7 +45,7 @@ changed_when: false -- name: patroni restart pg_cls gracefully +- name: 0. patroni restart pg_cls gracefully tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] when: patroni_mode != 'remove' become_user: "{{ dbsu }}" @@ -60,12 +60,11 @@ - name: 2. patroni restart pg_cls {{ pg_cluster }} include_tasks: util/patroni_restart_cls.yml - - name: 3. check pg ready once {{ pg_cluster }} + - name: 3. check pg ready {{ pg_cluster }} include_tasks: util/check_pg_ready.yml - run_once: true - name: 4.patroni resume pg_cls gracefully {{ pg_cluster }} - import_tasks: util/grace_patroni_resume.yml + include_tasks: util/grace_patroni_resume.yml run_once: true - name: 5. print message diff --git a/roles/pgsql/tasks/util/grace_patroni_pause.yml b/roles/pgsql/tasks/util/grace_patroni_pause.yml index ac5e9bd94..5dbb438b7 100644 --- a/roles/pgsql/tasks/util/grace_patroni_pause.yml +++ b/roles/pgsql/tasks/util/grace_patroni_pause.yml @@ -9,7 +9,6 @@ become_user: "{{ dbsu }}" vars: dbsu: "{{ pg_dbsu|default('postgres') }}" - any_errors_fatal: true block: - name: check is paused {{ pg_cluster }} import_tasks: is_patroni_paused.yml @@ -19,8 +18,19 @@ command: /usr/bin/patronictl -c /pg/bin/patroni.yml pause register: patroni_pause_result until: patroni_pause_result.rc == 0 and patroni_pause_result.stdout.find('Success') != -1 - retries: 5 - delay: 3 + retries: 2 + delay: 1 run_once: true + ignore_errors: true + + - name: check is paused {{ pg_cluster }} + import_tasks: is_patroni_paused.yml + + - name: delegate task to the primary node of {{ pg_cluster }} if previous failed + command: /usr/bin/patronictl -c /pg/bin/patroni.yml pause + when: + - is_patroni_paused == '' + - pg_primary_host_runtime != '' + delegate_to: "{{ pg_primary_host_runtime }}" ... diff --git a/roles/pgsql/tasks/util/grace_patroni_resume.yml b/roles/pgsql/tasks/util/grace_patroni_resume.yml index ded8b9287..1b26a71d6 100644 --- a/roles/pgsql/tasks/util/grace_patroni_resume.yml +++ b/roles/pgsql/tasks/util/grace_patroni_resume.yml @@ -9,7 +9,6 @@ become_user: "{{ dbsu }}" vars: dbsu: "{{ pg_dbsu|default('postgres') }}" - any_errors_fatal: true block: - name: check is paused {{ pg_cluster }} import_tasks: is_patroni_paused.yml @@ -19,8 +18,18 @@ command: /usr/bin/patronictl -c /pg/bin/patroni.yml resume register: patroni_resume_result until: patroni_resume_result.rc == 0 and patroni_resume_result.stdout.find('Success') != -1 - retries: 5 - delay: 3 - run_once: true + retries: 2 + delay: 1 + ignore_errors: true + + - name: check is paused {{ pg_cluster }} + import_tasks: is_patroni_paused.yml + + - name: delegate task to the primary node of {{ pg_cluster }} if previous failed + command: /usr/bin/patronictl -c /pg/bin/patroni.yml resume + when: + - is_patroni_paused|length > 0 + - pg_primary_host_runtime != '' + delegate_to: "{{ pg_primary_host_runtime }}" ... From ad051544014b043a28dc0a59c0bcaa4d3252962d Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Tue, 18 Mar 2025 15:30:57 +0800 Subject: [PATCH 10/12] refactor(pgsql): patroni restart node tested scenarios: - `./pgsql.yml -l -t pt_restart -e "pt_restart=true"` - all nodes have been restarted - `./pgsql.yml -l ',&' -t pt_restart -e "pt_restart=true"` - nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored - `./pgsql.yml -l ',,&' -t pt_restart -e "pt_restart=true"` - nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored - switchover triggered when patroni restart primary failed --- roles/pgsql/tasks/grace_patroni_restart.yml | 8 ++- .../tasks/util/patroni_current_member.yml | 20 +++++++ roles/pgsql/tasks/util/patroni_restart.yml | 31 ++++++++++ .../tasks/util/patroni_restart_at_primary.yml | 59 ------------------- .../tasks/util/patroni_restart_at_replica.yml | 38 ------------ .../pgsql/tasks/util/patroni_restart_cls.yml | 19 ++---- 6 files changed, 61 insertions(+), 114 deletions(-) create mode 100644 roles/pgsql/tasks/util/patroni_current_member.yml create mode 100644 roles/pgsql/tasks/util/patroni_restart.yml delete mode 100644 roles/pgsql/tasks/util/patroni_restart_at_primary.yml delete mode 100644 roles/pgsql/tasks/util/patroni_restart_at_replica.yml diff --git a/roles/pgsql/tasks/grace_patroni_restart.yml b/roles/pgsql/tasks/grace_patroni_restart.yml index e95205d2d..542f171dd 100644 --- a/roles/pgsql/tasks/grace_patroni_restart.yml +++ b/roles/pgsql/tasks/grace_patroni_restart.yml @@ -14,10 +14,12 @@ #--------------------------------------------------------------# # tested scenarios: # - ./pgsql.yml -l -t pt_restart -e "pt_restart=true" -# all nodes have been restarted +# - all nodes have been restarted # - ./pgsql.yml -l ',&' -t pt_restart -e "pt_restart=true" -# nodes in ansible_play_hosts_all have been restarted, -# other nodes in the same pg_cls ignored +# - nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored +# - ./pgsql.yml -l ',,&' -t pt_restart -e "pt_restart=true" +# - nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored +# - switchover triggered when patroni restart primary failed #--------------------------------------------------------------# - name: set variable diff --git a/roles/pgsql/tasks/util/patroni_current_member.yml b/roles/pgsql/tasks/util/patroni_current_member.yml new file mode 100644 index 000000000..dc606ba09 --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_current_member.yml @@ -0,0 +1,20 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# get current member of patroni cluster +#--------------------------------------------------------------# + +- name: get current member of patroni cluster + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ pg_dbsu }}" + block: + - name: retrieve current patroni member of {{ pg_cluster }} + command: /pg/bin/pg-member + register: pg_member_name_cmd + + - name: set variable curr_pg_member_name + set_fact: + curr_pg_member_name: "{{ pg_member_name_cmd.stdout | default('') | trim }}" + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/patroni_restart.yml b/roles/pgsql/tasks/util/patroni_restart.yml new file mode 100644 index 000000000..505abef1b --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_restart.yml @@ -0,0 +1,31 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# patroni restart nodes individually +#--------------------------------------------------------------# + +- name: patroni restart node + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ pg_dbsu }}" + when: patroni_mode != 'remove' + block: + - name: get current patroni member of {{ pg_cluster }} + import_tasks: patroni_current_member.yml + + # result may be success but contains 'Failed' message, + # due to node has multiple ip addresses and got "Failed: ... status code=403, (Access is denied)" for replica + - name: patroni restart members of {{ pg_cluster }} + when: curr_pg_member_name is defined and curr_pg_member_name|length>0 + command: /usr/bin/patronictl -c /pg/bin/patroni.yml restart --force {{ pg_cluster }} {{ curr_pg_member_name }} + register: patroni_restart_cmd + failed_when: patroni_restart_cmd.rc != 0 or patroni_restart_cmd.stdout.find('Failed') != -1 + + - name: sleep after restart if succeed at primary + when: pg_role_runtime is defined and pg_role_runtime == 'primary' + args: { executable: /bin/bash } + shell: | + sync; sync; + sleep 3 + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/patroni_restart_at_primary.yml b/roles/pgsql/tasks/util/patroni_restart_at_primary.yml deleted file mode 100644 index 2c289ba6b..000000000 --- a/roles/pgsql/tasks/util/patroni_restart_at_primary.yml +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/ansible-playbook ---- -#--------------------------------------------------------------# -# patroni restart **all** nodes of pg_cluster at primary -#--------------------------------------------------------------# - -- name: patroni restart primary pg_cluster at primary - tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] - when: patroni_mode != 'remove' and pg_role == 'primary' - block: - # result may be success, but contains 'Failed' message, - # due to node has multiple ip addresses and got "Failed: ... status code=403, (Access is denied)" for replica - - name: patroni restart primary {{ pg_cluster }} at primary - become_user: "{{ dbsu }}" - vars: - dbsu: "{{ pg_dbsu|default('postgres') }}" - args: { executable: /bin/bash } - shell: | - /usr/bin/patronictl -c /pg/bin/patroni.yml restart --force {{ pg_cluster }} - register: patroni_restart_cmd - - - name: set pt_restart_full_succeed - when: - - patroni_restart_cmd is defined - - patroni_restart_cmd.rc == 0 - - patroni_restart_cmd.stdout.find('Failed') == -1 - set_fact: - pt_restart_full_succeed: true - - - name: print result if failed at primary - debug: - msg: | - rc: {{ patroni_restart_cmd.rc }}, - stdout: "{{ patroni_restart_cmd.stdout_lines }}", - stderr: "{{ patroni_restart_cmd.stderr_lines }}" - when: patroni_restart_cmd is defined and patroni_restart_cmd.rc is defined and patroni_restart_cmd.rc != 0 - changed_when: false - - - name: print result if succeed but contains 'Failed' at primary - debug: - var: patroni_restart_cmd.stdout_lines - when: - - patroni_restart_cmd is defined - - patroni_restart_cmd.rc == 0 - - patroni_restart_cmd.stdout.find('Failed') != -1 - changed_when: false - - - name: sleep after patroni restart - when: - - patroni_restart_cmd is defined - - patroni_restart_cmd.rc == 0 - - patroni_restart_cmd.stdout.find('Failed') != -1 - args: { executable: /bin/bash } - shell: | - sync; sync; - sleep 1 - changed_when: false - -... diff --git a/roles/pgsql/tasks/util/patroni_restart_at_replica.yml b/roles/pgsql/tasks/util/patroni_restart_at_replica.yml deleted file mode 100644 index c5ce76fc7..000000000 --- a/roles/pgsql/tasks/util/patroni_restart_at_replica.yml +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/ansible-playbook ---- -#--------------------------------------------------------------# -# patroni restart affected replica nodes at replica individually -#--------------------------------------------------------------# - -- name: patroni restart {{ pg_cluster }} at replica individually - tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] - become_user: "{{ dbsu }}" - when: patroni_mode != 'remove' and pg_role != 'primary' - block: - - name: retrieve current patroni replica member of {{ pg_cluster }} - command: /pg/bin/pg-member - register: pg_member_name_cmd - - - name: set variable curr_pg_member_name - set_fact: - curr_pg_member_name: "{{ pg_member_name_cmd.stdout | default('') | trim }}" - changed_when: false - - - name: restart patroni replica member of {{ pg_cluster }} - when: curr_pg_member_name is defined and curr_pg_member_name|length>0 - command: /usr/bin/patronictl -c /pg/bin/patroni.yml restart --force {{ pg_cluster }} {{ curr_pg_member_name }} - register: patroni_restart_cmd - - - name: print result if failed at replica - when: - - patroni_restart_cmd is defined - - patroni_restart_cmd.rc is defined - - patroni_restart_cmd.rc != 0 - debug: - msg: | - rc: {{ patroni_restart_cmd.rc }}, - stdout: {{ patroni_restart_cmd.stdout }}, - stderr: {{ patroni_restart_cmd.stderr }} - changed_when: false - -... diff --git a/roles/pgsql/tasks/util/patroni_restart_cls.yml b/roles/pgsql/tasks/util/patroni_restart_cls.yml index d9bb2474f..9c38a0a26 100644 --- a/roles/pgsql/tasks/util/patroni_restart_cls.yml +++ b/roles/pgsql/tasks/util/patroni_restart_cls.yml @@ -8,21 +8,12 @@ vars: dbsu: "{{ pg_dbsu|default('postgres') }}" block: - # if ansible_play_hosts_all contains primary node, restart all nodes at primary - - name: restart all nodes of {{ pg_cluster }} at primary + - name: restart primary of {{ pg_cluster }} when: pg_role == 'primary' - include_tasks: patroni_restart_at_primary.yml + include_tasks: patroni_restart.yml - - name: set variable need_restart_at_replica at all nodes - set_fact: - need_restart_at_replica: "{{ not (hostvars[pg_primary_host_runtime].pt_restart_full_succeed | default(false) | string | trim | bool) }}" - changed_when: false - - # if ansible_play_hosts_all not contains primary node, - # or primary node restart failed, - # restart affected replica nodes at replica - - name: patroni restart ansible_play_hosts_all at replica if needed - include_tasks: patroni_restart_at_replica.yml - when: pg_role != 'primary' and need_restart_at_replica == true + - name: patroni restart replica of {{ pg_cluster }} + include_tasks: patroni_restart.yml + when: pg_role != 'primary' ... From 434e86bae1d4f5a2bc415460944450038d132a54 Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Tue, 18 Mar 2025 16:35:54 +0800 Subject: [PATCH 11/12] feat(pgsql): set vm.hugetlb_shm_group if necessary when new_nr_hugepages equals to pg_shared_memory_size_in_huge_pages --- roles/pgsql/tasks/util/pg_read_hugepage.yml | 11 +++++++++++ roles/pgsql/tasks/util/pg_write_hugepage.yml | 16 +++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/roles/pgsql/tasks/util/pg_read_hugepage.yml b/roles/pgsql/tasks/util/pg_read_hugepage.yml index 76b872a32..69d3e5c8a 100644 --- a/roles/pgsql/tasks/util/pg_read_hugepage.yml +++ b/roles/pgsql/tasks/util/pg_read_hugepage.yml @@ -118,4 +118,15 @@ {{ curr_nr_hugepages_cmd.stdout|int }} {% else %}0{% endif %} +- name: get gid from {{ pg_dbsu }} + command: /usr/bin/id -g {{ pg_dbsu }} + register: get_gid_cmd + ignore_errors: true + +- name: set variable pg_dbsu_gid + when: get_gid_cmd.rc == 0 + set_fact: + pg_dbsu_gid: "{{ get_gid_cmd.stdout }}" + changed_when: false + ... \ No newline at end of file diff --git a/roles/pgsql/tasks/util/pg_write_hugepage.yml b/roles/pgsql/tasks/util/pg_write_hugepage.yml index a973aef43..1fe8883d6 100644 --- a/roles/pgsql/tasks/util/pg_write_hugepage.yml +++ b/roles/pgsql/tasks/util/pg_write_hugepage.yml @@ -6,16 +6,26 @@ - name: write hugepage sysctl parameter if needed become: yes when: new_nr_hugepages is defined + vars: + pg_hugetlb_shm_group: '' block: + - name: set variable pg_hugetlb_shm_group if needed + when: + - pg_dbsu_gid is defined and pg_dbsu_gid|int > 0 + - pg_shared_memory_size_in_huge_pages is defined + - new_nr_hugepages|int == pg_shared_memory_size_in_huge_pages|int + set_fact: + pg_hugetlb_shm_group: "vm.hugetlb_shm_group = {{ pg_dbsu_gid }}" + changed_when: false + - name: write hugepage sysctl parameter vars: txt: | vm.nr_hugepages = {{ new_nr_hugepages }} + {{ pg_hugetlb_shm_group }} copy: dest: /etc/sysctl.d/hugepage.conf - # write txt first line to /etc/sysctl.d/hugepage.conf - content: | - {{ txt.split('\n')[0] }} + content: "{{ txt }}" - name: activate tuned profile, prepare for database performance impact! args: { executable: /bin/bash } From d6205bf641c8fadf9c44a27c1800922505677754 Mon Sep 17 00:00:00 2001 From: waiting <1661926154@qq.com> Date: Tue, 18 Mar 2025 17:17:03 +0800 Subject: [PATCH 12/12] feat(pgsql): update pg-backup strategy before update hugepage - full backup when ansible_play_hosts_all contains primary - full or incremental backup when ansible_play_hosts_all contains no primary tested scenarios: - `./pgsql.yml -l -t pg_hugepage` - full backup - `./pgsql.yml -l ',,&' -t pg_hugepage` - full backup - `./pgsql.yml -l ',&' -t pg_hugepage` - full or incremental backup --- roles/pgsql/tasks/pg_hugepage.yml | 5 ++-- roles/pgsql/tasks/util/pg_backup.yml | 41 +++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/roles/pgsql/tasks/pg_hugepage.yml b/roles/pgsql/tasks/pg_hugepage.yml index 522efecec..0e361f8e1 100644 --- a/roles/pgsql/tasks/pg_hugepage.yml +++ b/roles/pgsql/tasks/pg_hugepage.yml @@ -59,9 +59,10 @@ pt_restart: {{ pt_restart|bool }} changed_when: false - - name: full backup pg before hugepage update if has primary + # full back if contains primary, otherwise backup (full or incremental) at primary + - name: pg-backup before update hugepage include_tasks: util/pg_backup.yml - when: hugepages_need_update|bool and pg_role == 'primary' + when: hugepages_need_update|bool - name: update hugepage include_tasks: util/pg_write_hugepage.yml diff --git a/roles/pgsql/tasks/util/pg_backup.yml b/roles/pgsql/tasks/util/pg_backup.yml index cd595d221..7f3380240 100644 --- a/roles/pgsql/tasks/util/pg_backup.yml +++ b/roles/pgsql/tasks/util/pg_backup.yml @@ -1,9 +1,22 @@ #!/usr/bin/ansible-playbook --- +#--------------------------------------------------------------# +# backup pg_cls if pgbackrest enabled [pg_backup] +# - full backup when ansible_play_hosts_all contains primary +# - full or incremental backup when ansible_play_hosts_all contains no primary +#--------------------------------------------------------------# +# tested scenarios: +# - `./pgsql.yml -l -t pg_hugepage` +# - full backup +# - `./pgsql.yml -l ',,&' -t pg_hugepage` +# - full backup +# - `./pgsql.yml -l ',&' -t pg_hugepage` +# - full or incremental backup +#--------------------------------------------------------------# - name: set variable tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] - when: pg_role_runtime is undefined + when: pg_role_runtime is undefined or pg_primary_host_runtime is undefined block: - name: run pg-role command: /pg/bin/pg-role @@ -13,29 +26,49 @@ set_fact: pg_role_runtime: "{{ pg_role_cmd.stdout | default(pg_role) | trim }}" + - name: set variable pg_role_runtime + set_fact: + pg_role_runtime: "{{ pg_role_cmd.stdout | default(pg_role) | trim }}" + + - name: set pg_primary_host_runtime + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + import_tasks: util/patroni_primary_runtime.yml - name: pg_backup tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] when: pgbackrest_enabled|bool + become_user: "{{ pg_dbsu }}" vars: - dbsu: "{{ pg_dbsu|default('postgres') }}" pg_role: "{{ pg_role_runtime }}" block: - name: full backup cls {{ pg_cluster }} - become_user: "{{ dbsu }}" when: pg_role == 'primary' command: /pg/bin/pg-backup full register: back_ret_cmd ignore_errors: false - name: show backup result for {{ pg_cluster }} - when: pg_role == 'primary' + when: pg_role == 'primary' and back_ret_cmd is defined debug: msg: | STDOUT {{ back_ret_cmd.stdout_lines }}, STDERR {{ back_ret_cmd.stderr_lines }} changed_when: false + - name: set variable pt_backup_at_primary from primary + when: pg_role != 'primary' and pg_role_runtime is defined + set_fact: + pt_backup_at_primary: "{{ (hostvars[pg_primary_host_runtime].back_ret_cmd | default(false) | string | trim | bool) }}" + changed_when: false + + - name: full or incr backup cls {{ pg_cluster }} if contains no primary at primary + when: pg_role != 'primary' and (not pt_backup_at_primary) and pg_role_runtime is defined + command: /pg/bin/pg-backup + register: back_ret_cmd + delegate_to: "{{ pg_primary_host_runtime }}" + run_once: true + + rescue: - name: pg-backup failed for {{ pg_cluster }} debug: