diff --git a/files/postgres/pg-member b/files/postgres/pg-member new file mode 100644 index 000000000..f55dc8851 --- /dev/null +++ b/files/postgres/pg-member @@ -0,0 +1,32 @@ +#!/bin/bash +set -uo pipefail +#==============================================================# +# File : pg-member +# Desc : retrieve patroni member name from patroni REST API +# Path : /pg/bin/pg-member +# Depend : patroni +# License : AGPLv3 @ https://pigsty.io/docs/about/license +# Author : waiting +#==============================================================# + +# method 1: get patroni member name from patroni REST API +api_ip=$(ss -tlnpH | grep -E ':8008\b' | awk '{print $4}' | cut -d':' -f1 | grep -vE '^0\.0\.0\.0$|^::$' | head -n1) +[ -z "$api_ip" ] && api_ip="127.0.0.1" + +name=$(curl -s "http://${api_ip}:8008/patroni" 2>/dev/null | jq -r '.patroni.name') + +# method 2: get patroni member name from patronictl +if [ -z "$name" ]; then + ips=($(hostname -I | tr ' ' '\n' | grep -v '^$')) + if [ ${#ips[@]} -eq 0 ]; then + # echo "Cannot get local IP address" >&2 + exit 0 + fi + ip_json=$(printf '"%s",' "${ips[@]}" | sed 's/,$//') + name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json | jq -r --argjson ips "[$ip_json]" '.[] | select(.Host as $h | $ips | index($h)) | .Member') +fi + +if [[ -n "$name" ]]; then + echo "$name" +fi + diff --git a/files/postgres/pg-primary-host b/files/postgres/pg-primary-host new file mode 100644 index 000000000..f3459f9c9 --- /dev/null +++ b/files/postgres/pg-primary-host @@ -0,0 +1,14 @@ +#!/bin/bash +set -uo pipefail +#==============================================================# +# File : pg-primary-host +# Desc : retrieve patroni primary member host from patroni REST API +# Path : /pg/bin/pg-primary-host +# Depend : patroni +# License : AGPLv3 @ https://pigsty.io/docs/about/license +# Author : waiting +#==============================================================# + +name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json 2>/dev/null | jq -r '.[] | select(.Role == "Leader") | .Host' ) +echo $name + diff --git a/files/postgres/pg-primary-member b/files/postgres/pg-primary-member new file mode 100644 index 000000000..35bd159ab --- /dev/null +++ b/files/postgres/pg-primary-member @@ -0,0 +1,14 @@ +#!/bin/bash +set -uo pipefail +#==============================================================# +# File : pg-primary-member +# Desc : retrieve patroni primary member name from patroni REST API +# Path : /pg/bin/pg-primary-member +# Depend : patroni +# License : AGPLv3 @ https://pigsty.io/docs/about/license +# Author : waiting +#==============================================================# + +name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json 2>/dev/null | jq -r '.[] | select(.Role == "Leader") | .Member' ) +echo $name + diff --git a/roles/node/defaults/main.yml b/roles/node/defaults/main.yml index 0545f6b63..07af703c7 100644 --- a/roles/node/defaults/main.yml +++ b/roles/node/defaults/main.yml @@ -38,7 +38,8 @@ node_disable_swap: false # disable node swap, use with caution node_static_network: true # preserve dns resolver settings after reboot node_disk_prefetch: false # setup disk prefetch on HDD to increase performance node_kernel_modules: [ softdog, br_netfilter, ip_vs, ip_vs_rr, ip_vs_wrr, ip_vs_sh ] -node_hugepage_count: 0 # number of 2MB hugepage, take precedence over ratio +node_hugepage_count: 0 # number of 2MB hugepage, take precedence over ratio, + # -1: use shared_memory_size_in_huge_pages calculated by pg (PG 15+ available) node_hugepage_ratio: 0 # node mem hugepage ratio, 0 disable it by default node_overcommit_ratio: 0 # node mem overcommit ratio, 0 disable it by default node_tune: oltp # node tuned profile: none,oltp,olap,crit,tiny diff --git a/roles/pgsql/tasks/grace_patroni_restart.yml b/roles/pgsql/tasks/grace_patroni_restart.yml new file mode 100644 index 000000000..542f171dd --- /dev/null +++ b/roles/pgsql/tasks/grace_patroni_restart.yml @@ -0,0 +1,95 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# patroni restart pg_cls gracefully [pt_restart] +# avoiding switching pg primary/standby +# usage: +# - ./pgsql.yml -l -t pt_restart -e "pt_restart=true" +# - ./pgsql.yml -l ',&' -t pt_restart -e "pt_restart=true" +#--------------------------------------------------------------# +# inner steps: +# 1. patroni pause +# 2. patroni restart pg_cls +# 3. patroni resume +#--------------------------------------------------------------# +# tested scenarios: +# - ./pgsql.yml -l -t pt_restart -e "pt_restart=true" +# - all nodes have been restarted +# - ./pgsql.yml -l ',&' -t pt_restart -e "pt_restart=true" +# - nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored +# - ./pgsql.yml -l ',,&' -t pt_restart -e "pt_restart=true" +# - nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored +# - switchover triggered when patroni restart primary failed +#--------------------------------------------------------------# + +- name: set variable + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: pg_role_runtime is undefined or pg_primary_host_runtime is undefined + block: + - name: run pg-role + command: /pg/bin/pg-role + register: pg_role_cmd + + - name: set variable pg_role_runtime + set_fact: + pg_role_runtime: "{{ pg_role_cmd.stdout | default(pg_role) | trim }}" + + - name: set pg_primary_host_runtime + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + import_tasks: util/patroni_primary_runtime.yml + + - name: print variables + debug: + msg: | + pg_role_runtime: {{ pg_role_runtime|default('') }}, + pg_primary_host_runtime: {{ pg_primary_host_runtime|default('') }}, + pg_primary_member_runtime: {{ pg_primary_member_runtime|default('') }} + changed_when: false + + +- name: 0. patroni restart pg_cls gracefully + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: patroni_mode != 'remove' + become_user: "{{ dbsu }}" + vars: + pg_role: "{{ pg_role_runtime }}" + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: 1. patroni pause gracefully {{ pg_cluster }} + include_tasks: util/grace_patroni_pause.yml + run_once: true + + - name: 2. patroni restart pg_cls {{ pg_cluster }} + include_tasks: util/patroni_restart_cls.yml + + - name: 3. check pg ready {{ pg_cluster }} + include_tasks: util/check_pg_ready.yml + + - name: 4.patroni resume pg_cls gracefully {{ pg_cluster }} + include_tasks: util/grace_patroni_resume.yml + run_once: true + + - name: 5. print message + debug: + msg: finally, check if all postgres is ready {{ pg_cluster }} + + # finally, check if all postgres is ready + - import_tasks: util/check_pg_ready.yml + + - name: 6. re-set variable pt_restart to false + set_fact: + pt_restart: false + + rescue: + - name: check postgres ready failed for {{ pg_cluster }} + debug: + msg: | + rc: {{ pg_ready_result.rc }} + STDOUT: {{ pg_ready_result.stdout }} + STDERR: {{ pg_ready_result.stderr }} + when: pg_ready_result is defined and pg_ready_result.rc != 0 + + - name: Exit Playbook due to error + meta: end_play + +... diff --git a/roles/pgsql/tasks/main.yml b/roles/pgsql/tasks/main.yml index 1e1dd267c..e77114e6e 100644 --- a/roles/pgsql/tasks/main.yml +++ b/roles/pgsql/tasks/main.yml @@ -146,6 +146,21 @@ vars: { database: "{{ item }}" } with_items: "{{ pg_databases }}" +#--------------------------------------------------------------# +# pg hugepages [pg_hugepage] +#--------------------------------------------------------------# +- import_tasks: pg_hugepage.yml + when: patroni_enabled|bool + tags: [ patroni, pg_launch, pg_hugepage ] + +#--------------------------------------------------------------# +# patroni restart pg_cls gracefully [pt_restart] +#--------------------------------------------------------------# +- name: patroni restart pg_cls gracefully + include_tasks: grace_patroni_restart.yml + when: patroni_enabled|bool and (pt_restart is defined and pt_restart|bool) + tags: [ patroni, pg_launch, pt_restart, pg_hugepage ] + #--------------------------------------------------------------# # Summary [pg_done] #--------------------------------------------------------------# diff --git a/roles/pgsql/tasks/pg_hugepage.yml b/roles/pgsql/tasks/pg_hugepage.yml new file mode 100644 index 000000000..0e361f8e1 --- /dev/null +++ b/roles/pgsql/tasks/pg_hugepage.yml @@ -0,0 +1,71 @@ +--- +#--------------------------------------------------------------# +# Enable hugepage for pg and restart pg cluster [pg_hugepage] +# if `node_hugepage_count` or `node_hugepage_ratio` changed, +# it will full backup the pg_cls and restart it gracefully +# support usage: +# - `pig install` +# - `bin/pgsql-add ` +# - `bin/pgsql-add ` +# service of nodes in ansible_play_hosts_all will be unavailable during the process, +# other nodes in the same pg_cls ignored +# - `pgsql.yml -l -t pg_hugepage` +# entire service of this postgres cluster will be unavailable during the process +# - `pgsql.yml -l ',&' -t pg_hugepage` +# service of nodes in ansible_play_hosts_all will be unavailable during the process, +# other nodes in the same pg_cls ignored +#--------------------------------------------------------------# +# calculation rules: +# 1. `shared_memory_size_in_huge_pages` (from PG15+): if `node_hugepage_count` is -1 and huge_pages (from PG) != off +# 2. if `node_hugepage_count` > 0 +# - `shared_memory_size_in_huge_pages`: if `shared_memory_size_in_huge_pages` > `node_hugepage_count` +# - `node_hugepage_count` +# 3. disable hugepage if `node_hugepage_count` is 0 +# 4. if `node_hugepage_ratio` > 0 +# - `shared_memory_size_in_huge_pages`: if `shared_memory_size_in_huge_pages` > pages calculated from `node_hugepage_ratio` +# - page value calculated from `node_hugepage_ratio` +# All above rules and usages are tested successfully on Rocky Linux 9 +#--------------------------------------------------------------# + +- name: Set default value + set_fact: + curr_nr_hugepages: 0 + new_nr_hugepages: 0 + changed_when: false + + +- name: enable hugepage for pg + tags: pg_hugepage + vars: + block: + - import_tasks: util/pg_read_hugepage.yml + + - name: calculate hugepage need update + set_fact: + hugepages_need_update: "{{ new_nr_hugepages|int != curr_nr_hugepages|int }}" + changed_when: false + + - name: set pt_restart according to hugepages_need_update + set_fact: + pt_restart: "{{ hugepages_need_update|bool }}" + changed_when: false + + - name: print nr_hugepages values + when: new_nr_hugepages is defined and curr_nr_hugepages is defined + debug: + msg: | + curr_nr_hugepages: {{ curr_nr_hugepages|trim }}, new_nr_hugepages: {{ new_nr_hugepages|trim }}, + hugepages_need_update: {{ hugepages_need_update|bool }}, + pt_restart: {{ pt_restart|bool }} + changed_when: false + + # full back if contains primary, otherwise backup (full or incremental) at primary + - name: pg-backup before update hugepage + include_tasks: util/pg_backup.yml + when: hugepages_need_update|bool + + - name: update hugepage + include_tasks: util/pg_write_hugepage.yml + when: hugepages_need_update|bool + +... diff --git a/roles/pgsql/tasks/util/check_pg_ready.yml b/roles/pgsql/tasks/util/check_pg_ready.yml new file mode 100644 index 000000000..f62779b9f --- /dev/null +++ b/roles/pgsql/tasks/util/check_pg_ready.yml @@ -0,0 +1,27 @@ +#!/usr/bin/ansible-playbook +--- + +- name: check postgres ready + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: wait for postgres ready + wait_for: host={{ inventory_hostname }} port={{ pg_port }} state=started timeout=60 + ignore_errors: true + + - name: check postgres ready + become_user: "{{ dbsu }}" + shell: | + {{ pg_bin_dir }}/pg_isready -t 5 -p {{ pg_port }} + register: result + retries: 6 + until: result.rc == 0 + delay: 5 + + - name: Set fact pg_ready_result + set_fact: + pg_ready_result: "{{ result }}" + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/grace_patroni_pause.yml b/roles/pgsql/tasks/util/grace_patroni_pause.yml new file mode 100644 index 000000000..5dbb438b7 --- /dev/null +++ b/roles/pgsql/tasks/util/grace_patroni_pause.yml @@ -0,0 +1,36 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# patroni pause pg_cls gracefully +#--------------------------------------------------------------# + +- name: patroni pause gracefully + tags: grace_patroni_pause + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: check is paused {{ pg_cluster }} + import_tasks: is_patroni_paused.yml + + - name: patroni pause {{ pg_cluster }} + when: is_patroni_paused == '' + command: /usr/bin/patronictl -c /pg/bin/patroni.yml pause + register: patroni_pause_result + until: patroni_pause_result.rc == 0 and patroni_pause_result.stdout.find('Success') != -1 + retries: 2 + delay: 1 + run_once: true + ignore_errors: true + + - name: check is paused {{ pg_cluster }} + import_tasks: is_patroni_paused.yml + + - name: delegate task to the primary node of {{ pg_cluster }} if previous failed + command: /usr/bin/patronictl -c /pg/bin/patroni.yml pause + when: + - is_patroni_paused == '' + - pg_primary_host_runtime != '' + delegate_to: "{{ pg_primary_host_runtime }}" + +... diff --git a/roles/pgsql/tasks/util/grace_patroni_resume.yml b/roles/pgsql/tasks/util/grace_patroni_resume.yml new file mode 100644 index 000000000..1b26a71d6 --- /dev/null +++ b/roles/pgsql/tasks/util/grace_patroni_resume.yml @@ -0,0 +1,35 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# patroni resume pg_cls gracefully +#--------------------------------------------------------------# + +- name: patroni resume pg_cls gracefully + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: check is paused {{ pg_cluster }} + import_tasks: is_patroni_paused.yml + + - name: patroni resume {{ pg_cluster }} + when: is_patroni_paused|length > 0 + command: /usr/bin/patronictl -c /pg/bin/patroni.yml resume + register: patroni_resume_result + until: patroni_resume_result.rc == 0 and patroni_resume_result.stdout.find('Success') != -1 + retries: 2 + delay: 1 + ignore_errors: true + + - name: check is paused {{ pg_cluster }} + import_tasks: is_patroni_paused.yml + + - name: delegate task to the primary node of {{ pg_cluster }} if previous failed + command: /usr/bin/patronictl -c /pg/bin/patroni.yml resume + when: + - is_patroni_paused|length > 0 + - pg_primary_host_runtime != '' + delegate_to: "{{ pg_primary_host_runtime }}" + +... diff --git a/roles/pgsql/tasks/util/grace_start_pg.yml b/roles/pgsql/tasks/util/grace_start_pg.yml new file mode 100644 index 000000000..d1758b1cc --- /dev/null +++ b/roles/pgsql/tasks/util/grace_start_pg.yml @@ -0,0 +1,56 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# start postgres cluster gracefully [grace_start_pg] +#--------------------------------------------------------------# + +- name: start postgres cluster gracefully + tags: grace_start_pg + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - import_tasks: start_pg.yml + when: pg_role == 'primary' + + - import_tasks: start_pg.yml + when: pg_role != 'primary' + + - name: sleep 5 seconds before patroni resume if needed + when: patroni_mode != 'remove' + command: sleep 5 + changed_when: false + + - import_tasks: check_pg_ready.yml + + - name: run patronictl list + become_user: "{{ dbsu }}" + when: patroni_mode != 'remove' + command: /usr/bin/patronictl -c /pg/bin/patroni.yml list -f tsv + register: patronictl_list + changed_when: false + + - name: check patroni status all ready (no stopped status) and set fact + when: patroni_mode != 'remove' + set_fact: + patroni_all_ready: "{{ patronictl_list.stdout_lines | select('search', 'stopped') | list | length == 0 }}" + changed_when: false + + - name: sleep extra 15 seconds before patroni resume if needed + when: patroni_mode != 'remove' and patroni_all_ready == false + command: sleep 15 + changed_when: false + + + rescue: + - name: check postgres ready failed for {{ pg_cluster }} + debug: + msg: | + rc: {{ pg_ready_result.rc }} + STDOUT: {{ pg_ready_result.stdout }} + STDERR: {{ pg_ready_result.stderr }} + when: pg_ready_result is defined and pg_ready_result.rc != 0 + + - name: Exit Playbook due to error + meta: end_play + +... diff --git a/roles/pgsql/tasks/util/grace_stop_pg.yml b/roles/pgsql/tasks/util/grace_stop_pg.yml new file mode 100644 index 000000000..dbbb79a48 --- /dev/null +++ b/roles/pgsql/tasks/util/grace_stop_pg.yml @@ -0,0 +1,26 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# stop postgres cluster gracefully [grace_stop_pg] +#--------------------------------------------------------------# + +- name: stop postgres cluster gracefully + tags: grace_stop_pg + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + any_errors_fatal: true + block: + - name: stop postgres replica of {{ pg_cluster }} + when: pg_role != 'primary' + shell: | + {{ pg_bin_dir }}/pg_ctl -D {{ pg_data }} stop + sleep 3 + + - name: stop postgres primary of {{ pg_cluster }} + when: pg_role == 'primary' + shell: | + {{ pg_bin_dir }}/pg_ctl -D {{ pg_data }} stop + sync; sync; + +... diff --git a/roles/pgsql/tasks/util/is_patroni_paused.yml b/roles/pgsql/tasks/util/is_patroni_paused.yml new file mode 100644 index 000000000..139ce80e2 --- /dev/null +++ b/roles/pgsql/tasks/util/is_patroni_paused.yml @@ -0,0 +1,21 @@ +#!/usr/bin/ansible-playbook +--- + +- name: check patroni maintenance mode + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ pg_dbsu|default('postgres') }}" + block: + - name: run patroni list + args: { executable: /bin/bash } + shell: | + /usr/bin/patronictl -c /pg/bin/patroni.yml list 2>/dev/null | tail -n 3 + register: patroni_status_cmd + changed_when: false + ignore_errors: yes + + - name: set variable is_patroni_paused by patroni maintenance mode + set_fact: + is_patroni_paused: "{{ patroni_status_cmd.stdout | default('') | regex_search('Maintenance mode: on') }}" + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/patroni_current_member.yml b/roles/pgsql/tasks/util/patroni_current_member.yml new file mode 100644 index 000000000..dc606ba09 --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_current_member.yml @@ -0,0 +1,20 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# get current member of patroni cluster +#--------------------------------------------------------------# + +- name: get current member of patroni cluster + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ pg_dbsu }}" + block: + - name: retrieve current patroni member of {{ pg_cluster }} + command: /pg/bin/pg-member + register: pg_member_name_cmd + + - name: set variable curr_pg_member_name + set_fact: + curr_pg_member_name: "{{ pg_member_name_cmd.stdout | default('') | trim }}" + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/patroni_primary_runtime.yml b/roles/pgsql/tasks/util/patroni_primary_runtime.yml new file mode 100644 index 000000000..1864e6291 --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_primary_runtime.yml @@ -0,0 +1,34 @@ +#!/usr/bin/ansible-playbook +--- + +- name: retrieve runtime patroni primary member + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: retrieve runtime patroni primary member of {{ pg_cluster }} + command: /pg/bin/pg-primary-member + register: pg_pri_member_name_cmd + + - name: set variable pg_primary_member_runtime + set_fact: + pg_primary_member_runtime: "{{ (pg_pri_member_name_cmd is defined and pg_pri_member_name_cmd.stdout) | default('') | trim }}" + changed_when: false + + +- name: retrieve runtime patroni primary host + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ dbsu }}" + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: retrieve runtime patroni primary host of {{ pg_cluster }} + command: /pg/bin/pg-primary-host + register: pg_pri_member_name_cmd + + - name: set variable pg_primary_host_runtime + set_fact: + pg_primary_host_runtime: "{{ (pg_pri_member_name_cmd is defined and pg_pri_member_name_cmd.stdout) | default('') | trim }}" + changed_when: false +... diff --git a/roles/pgsql/tasks/util/patroni_restart.yml b/roles/pgsql/tasks/util/patroni_restart.yml new file mode 100644 index 000000000..505abef1b --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_restart.yml @@ -0,0 +1,31 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# patroni restart nodes individually +#--------------------------------------------------------------# + +- name: patroni restart node + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ pg_dbsu }}" + when: patroni_mode != 'remove' + block: + - name: get current patroni member of {{ pg_cluster }} + import_tasks: patroni_current_member.yml + + # result may be success but contains 'Failed' message, + # due to node has multiple ip addresses and got "Failed: ... status code=403, (Access is denied)" for replica + - name: patroni restart members of {{ pg_cluster }} + when: curr_pg_member_name is defined and curr_pg_member_name|length>0 + command: /usr/bin/patronictl -c /pg/bin/patroni.yml restart --force {{ pg_cluster }} {{ curr_pg_member_name }} + register: patroni_restart_cmd + failed_when: patroni_restart_cmd.rc != 0 or patroni_restart_cmd.stdout.find('Failed') != -1 + + - name: sleep after restart if succeed at primary + when: pg_role_runtime is defined and pg_role_runtime == 'primary' + args: { executable: /bin/bash } + shell: | + sync; sync; + sleep 3 + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/patroni_restart_cls.yml b/roles/pgsql/tasks/util/patroni_restart_cls.yml new file mode 100644 index 000000000..9c38a0a26 --- /dev/null +++ b/roles/pgsql/tasks/util/patroni_restart_cls.yml @@ -0,0 +1,19 @@ +#!/usr/bin/ansible-playbook +--- + +- name: patroni restart cls {{ pg_cluster }} + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + become_user: "{{ dbsu }}" + when: patroni_mode != 'remove' + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: restart primary of {{ pg_cluster }} + when: pg_role == 'primary' + include_tasks: patroni_restart.yml + + - name: patroni restart replica of {{ pg_cluster }} + include_tasks: patroni_restart.yml + when: pg_role != 'primary' + +... diff --git a/roles/pgsql/tasks/util/pg_backup.yml b/roles/pgsql/tasks/util/pg_backup.yml new file mode 100644 index 000000000..7f3380240 --- /dev/null +++ b/roles/pgsql/tasks/util/pg_backup.yml @@ -0,0 +1,82 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# backup pg_cls if pgbackrest enabled [pg_backup] +# - full backup when ansible_play_hosts_all contains primary +# - full or incremental backup when ansible_play_hosts_all contains no primary +#--------------------------------------------------------------# +# tested scenarios: +# - `./pgsql.yml -l -t pg_hugepage` +# - full backup +# - `./pgsql.yml -l ',,&' -t pg_hugepage` +# - full backup +# - `./pgsql.yml -l ',&' -t pg_hugepage` +# - full or incremental backup +#--------------------------------------------------------------# + +- name: set variable + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: pg_role_runtime is undefined or pg_primary_host_runtime is undefined + block: + - name: run pg-role + command: /pg/bin/pg-role + register: pg_role_cmd + + - name: set variable pg_role_runtime + set_fact: + pg_role_runtime: "{{ pg_role_cmd.stdout | default(pg_role) | trim }}" + + - name: set variable pg_role_runtime + set_fact: + pg_role_runtime: "{{ pg_role_cmd.stdout | default(pg_role) | trim }}" + + - name: set pg_primary_host_runtime + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + import_tasks: util/patroni_primary_runtime.yml + +- name: pg_backup + tags: [ pg_hugepage, patroni, pg_launch, pt_restart ] + when: pgbackrest_enabled|bool + become_user: "{{ pg_dbsu }}" + vars: + pg_role: "{{ pg_role_runtime }}" + block: + - name: full backup cls {{ pg_cluster }} + when: pg_role == 'primary' + command: /pg/bin/pg-backup full + register: back_ret_cmd + ignore_errors: false + + - name: show backup result for {{ pg_cluster }} + when: pg_role == 'primary' and back_ret_cmd is defined + debug: + msg: | + STDOUT {{ back_ret_cmd.stdout_lines }}, + STDERR {{ back_ret_cmd.stderr_lines }} + changed_when: false + + - name: set variable pt_backup_at_primary from primary + when: pg_role != 'primary' and pg_role_runtime is defined + set_fact: + pt_backup_at_primary: "{{ (hostvars[pg_primary_host_runtime].back_ret_cmd | default(false) | string | trim | bool) }}" + changed_when: false + + - name: full or incr backup cls {{ pg_cluster }} if contains no primary at primary + when: pg_role != 'primary' and (not pt_backup_at_primary) and pg_role_runtime is defined + command: /pg/bin/pg-backup + register: back_ret_cmd + delegate_to: "{{ pg_primary_host_runtime }}" + run_once: true + + + rescue: + - name: pg-backup failed for {{ pg_cluster }} + debug: + msg: | + STDOUT: {{ back_ret_cmd.stdout }}, + STDERR: {{ back_ret_cmd.stderr }} + when: back_ret_cmd is defined + + - name: Exit Playbook due to backup failure + meta: end_play +... diff --git a/roles/pgsql/tasks/util/pg_read_hugepage.yml b/roles/pgsql/tasks/util/pg_read_hugepage.yml new file mode 100644 index 000000000..69d3e5c8a --- /dev/null +++ b/roles/pgsql/tasks/util/pg_read_hugepage.yml @@ -0,0 +1,132 @@ +--- +#--------------------------------------------------------------# +# read and calculate hugepage settings +#--------------------------------------------------------------# + +- name: Set default value of hugepage_count, hugepage_ratio + set_fact: + hugepage_count: "{{ node_hugepage_count|default(0) }}" + hugepage_ratio: "{{ node_hugepage_ratio|default(0) }}" + pg_hugepage_value: 0 + changed_when: false + +- name: calculate hugepage from configs hugepage_count and hugepage_count + when: hugepage_count|int > 0 or hugepage_ratio|float > 0 + block: + - name: calculate hugepage from configs hugepage_count and hugepage_count + args: { executable: /bin/bash } + set_fact: + pg_hugepage_value: |- + {% if hugepage_count is defined and hugepage_count|int > 0 %} + {% if hugepage_count|float >= node_mem_bytes|float / 2097152.0 * 0.90 %} + {{ (node_mem_bytes|int / 2097152.0 * 0.90 )|round(0, 'ceil')|int }} + {% else %} + {{ hugepage_count }} + {% endif %} + {% else %} + {% if hugepage_ratio|float > 0 and hugepage_ratio|float < 0.90 %} + {{ (node_mem_bytes|int / 2097152.0 * hugepage_ratio )|round(0, 'ceil')|int }} + {% else %}0 + {% endif %} + {% endif %} + + - name: print pg_hugepage_value + debug: + var: pg_hugepage_value + changed_when: false + +- name: read pg hugepage settings (PG 15+) + become_user: "{{ pg_dbsu|default('postgres') }}" + when: pg_version >= 15 and (hugepage_count|int == -1 or pg_hugepage_value|int > 0) # check pg_hugepage_value + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + pg_bin_prefix: "{{ pg_bin_dir|default('/usr/pgsql/bin') }}" + localhost: "{{ pg_localhost|default('/var/run/postgresql') }}" + port: "{{ pg_port|default(5432) }}" + block: + - name: read pg huge_pages (PG 15+) + args: { executable: /bin/bash } + ignore_errors: true + shell: | + {{ pg_bin_prefix }}/psql -h {{ localhost }} -p {{ port }} -qwAXtc 'show huge_pages' + register: huge_pages_cmd + + - name: Set variable pg_hugepage_enabled + set_fact: + pg_hugepage_enabled: "{{ huge_pages_cmd.stdout | default('off') | lower in ['try', 'on'] }}" + # pg_hugepage_enabled: |- + # {% if huge_pages_cmd.stdout is defined %}{{ huge_pages_cmd.stdout|lower in ['try', 'on'] }}{% else %}false|bool{% endif %} + + - name: read pg shared_memory_size_in_huge_pages + args: { executable: /bin/bash } + when: pg_hugepage_enabled|bool + ignore_errors: true + shell: | + {{ pg_bin_prefix }}/psql -h {{ localhost }} -p {{ port }} -qwAXtc 'show shared_memory_size_in_huge_pages' + register: shared_memory_size_in_huge_pages_cmd + + - name: Set variable pg_shared_memory_size_in_huge_pages if hugepage enabled (try or on) + set_fact: + pg_shared_memory_size_in_huge_pages: |- + {% if pg_hugepage_enabled|bool and shared_memory_size_in_huge_pages_cmd.stdout is defined %} + {{ shared_memory_size_in_huge_pages_cmd.stdout|int }} + {% else %}0{% endif %} + + - name: print pg hugepage settings + when: pg_hugepage_enabled is defined + debug: + msg: | + pg_hugepage_enabled: {{ pg_hugepage_enabled }}, + pg_shared_memory_size_in_huge_pages: {{ pg_shared_memory_size_in_huge_pages }} + changed_when: false + + +- name: calculate real hugepage during node_hugepage_count -1 + when: hugepage_count|int == -1 and pg_shared_memory_size_in_huge_pages|default(0)|int > 0 + set_fact: + new_nr_hugepages: "{{ pg_shared_memory_size_in_huge_pages|default(0) }}" + + +- name: calculate real hugepage from pg_hugepage_value and pg_shared_memory_size_in_huge_pages + when: hugepage_count|int != -1 + vars: + v_conf: "{{ pg_hugepage_value|default(0) }}" + v_pg: "{{ pg_shared_memory_size_in_huge_pages|default(0) }}" + set_fact: + new_nr_hugepages: |- + {% if v_pg|int > v_conf|int %} + {{ v_pg }} + {% else %} + {{ v_conf }} + {% endif %} + + +- name: read current nr_hugepages by sysctl + args: { executable: /bin/bash } + become: yes + shell: | + sysctl -n vm.nr_hugepages + register: curr_nr_hugepages_cmd + ignore_errors: true + + +- name: Set variable curr_nr_hugepages + when: curr_nr_hugepages_cmd.stdout is defined + set_fact: + curr_nr_hugepages: |- + {% if curr_nr_hugepages_cmd.stdout is defined %} + {{ curr_nr_hugepages_cmd.stdout|int }} + {% else %}0{% endif %} + +- name: get gid from {{ pg_dbsu }} + command: /usr/bin/id -g {{ pg_dbsu }} + register: get_gid_cmd + ignore_errors: true + +- name: set variable pg_dbsu_gid + when: get_gid_cmd.rc == 0 + set_fact: + pg_dbsu_gid: "{{ get_gid_cmd.stdout }}" + changed_when: false + +... \ No newline at end of file diff --git a/roles/pgsql/tasks/util/pg_write_hugepage.yml b/roles/pgsql/tasks/util/pg_write_hugepage.yml new file mode 100644 index 000000000..1fe8883d6 --- /dev/null +++ b/roles/pgsql/tasks/util/pg_write_hugepage.yml @@ -0,0 +1,57 @@ +--- +#--------------------------------------------------------------# +# Write hugepage sysctl parameter if needed +#--------------------------------------------------------------# + +- name: write hugepage sysctl parameter if needed + become: yes + when: new_nr_hugepages is defined + vars: + pg_hugetlb_shm_group: '' + block: + - name: set variable pg_hugetlb_shm_group if needed + when: + - pg_dbsu_gid is defined and pg_dbsu_gid|int > 0 + - pg_shared_memory_size_in_huge_pages is defined + - new_nr_hugepages|int == pg_shared_memory_size_in_huge_pages|int + set_fact: + pg_hugetlb_shm_group: "vm.hugetlb_shm_group = {{ pg_dbsu_gid }}" + changed_when: false + + - name: write hugepage sysctl parameter + vars: + txt: | + vm.nr_hugepages = {{ new_nr_hugepages }} + {{ pg_hugetlb_shm_group }} + copy: + dest: /etc/sysctl.d/hugepage.conf + content: "{{ txt }}" + + - name: activate tuned profile, prepare for database performance impact! + args: { executable: /bin/bash } + shell: | + sync; echo 3 > /proc/sys/vm/drop_caches # 刷盘,释放系统缓存(请做好数据库性能受到冲击的准备) + sysctl -p /etc/sysctl.d/hugepage.conf + ignore_errors: true + + - name: print new_nr_hugepages after write + become: yes + args: { executable: /bin/bash } + shell: | + cat /proc/meminfo | grep HugePages_ + sysctl -a | grep vm.nr_hugepages + ignore_errors: true + changed_when: false + register: hugepage_after_write_cmd + + - name: print hugepage_after_write_cmd + when: hugepage_after_write_cmd is defined + debug: + msg: | + rc: {{ hugepage_after_write_cmd.rc }} + STDOUT: {{ hugepage_after_write_cmd.stdout }} + STDERR: {{ hugepage_after_write_cmd.stderr }} + ignore_errors: true + changed_when: false + +... diff --git a/roles/pgsql/tasks/util/start_pg.yml b/roles/pgsql/tasks/util/start_pg.yml new file mode 100644 index 000000000..b9734f319 --- /dev/null +++ b/roles/pgsql/tasks/util/start_pg.yml @@ -0,0 +1,26 @@ +#!/usr/bin/ansible-playbook +--- +#--------------------------------------------------------------# +# start postgres +#--------------------------------------------------------------# + +- name: start postgres + vars: + dbsu: "{{ pg_dbsu|default('postgres') }}" + block: + - name: check if postmaster.pid exists and is not empty + stat: + path: "{{ pg_data }}/postmaster.pid" + register: postmaster_pid_stat + changed_when: false + + - name: start postgres member of {{ pg_cluster }} + become_user: "{{ dbsu }}" + when: (postmaster_pid_stat.stat.exists == false or postmaster_pid_stat.stat.size == 0) + args: { executable: /bin/bash } + shell: | + {{ pg_bin_dir }}/pg_ctl -D {{ pg_data }} start + sync; sync; + sleep 3 + +...