-
Notifications
You must be signed in to change notification settings - Fork 0
fix(elasticsearch): roll config restarts #137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| --- | ||
| - name: Trigger config-change rolling restart | ||
| hosts: all | ||
| vars: | ||
| elasticstack_full_stack: false | ||
| elasticstack_release: "{{ lookup('env', 'ELASTIC_RELEASE') | default('9', true) | int }}" | ||
| elasticsearch_security: true | ||
| elasticsearch_http_protocol: https | ||
| elasticsearch_heap: "1" | ||
| elasticstack_no_log: false | ||
| elasticsearch_elastic_password: "TestPassword123!" | ||
| elasticsearch_jvm_custom_parameters: | ||
| - "-Des.config.restart.marker=true" | ||
| elasticsearch_config_restart_health_retries: 20 | ||
| elasticsearch_config_restart_health_delay: 3 | ||
| elasticsearch_cluster_settings: | ||
| action.destructive_requires_name: "true" | ||
| tasks: | ||
| - name: Reset shared role guard for config restart test | ||
| ansible.builtin.set_fact: | ||
| _elasticstack_role_imported: false | ||
|
|
||
| - name: Record ES PID before config change | ||
| ansible.builtin.command: pgrep -f 'org.elasticsearch.bootstrap.Elasticsearch' | ||
| register: _es_pid_before_config_restart | ||
| changed_when: false | ||
|
|
||
| - name: Include Elasticsearch with changed JVM config | ||
| ansible.builtin.include_role: | ||
| name: oddly.elasticstack.elasticsearch | ||
|
|
||
| - name: Record ES PID after config change | ||
| ansible.builtin.command: pgrep -f 'org.elasticsearch.bootstrap.Elasticsearch' | ||
| register: _es_pid_after_config_restart | ||
| changed_when: false | ||
|
|
||
| - name: Verify config change restarted Elasticsearch | ||
| ansible.builtin.assert: | ||
| that: | ||
| - _es_pid_before_config_restart.stdout != _es_pid_after_config_restart.stdout | ||
| fail_msg: >- | ||
| Elasticsearch was not restarted after config change on {{ inventory_hostname }}. | ||
| PID remained {{ _es_pid_after_config_restart.stdout }}. | ||
| success_msg: >- | ||
| Elasticsearch restarted after config change on {{ inventory_hostname }}. | ||
|
|
||
| - name: Build config restart rolling timeline # noqa: run-once[task] | ||
| ansible.builtin.set_fact: | ||
| _es_first_restart_host: "{{ groups['elasticsearch'][0] }}" | ||
| _es_second_restart_host: "{{ groups['elasticsearch'][1] }}" | ||
| _es_first_restart_started_usec: "{{ hostvars[groups['elasticsearch'][0]]._elasticsearch_config_restart_started_usec | int }}" | ||
| _es_first_restart_health_complete_usec: "{{ hostvars[groups['elasticsearch'][0]]._elasticsearch_config_restart_health_complete_usec | int }}" | ||
| _es_second_restart_started_usec: "{{ hostvars[groups['elasticsearch'][1]]._elasticsearch_config_restart_started_usec | int }}" | ||
| _es_second_restart_health_complete_usec: "{{ hostvars[groups['elasticsearch'][1]]._elasticsearch_config_restart_health_complete_usec | int }}" | ||
| run_once: true | ||
|
|
||
| - name: Verify config restart was rolling # noqa: run-once[task] | ||
| ansible.builtin.assert: | ||
| that: | ||
| - groups['elasticsearch'] | length == 2 | ||
| - _es_first_restart_started_usec | int > 0 | ||
| - _es_first_restart_health_complete_usec | int > 0 | ||
| - _es_second_restart_started_usec | int > 0 | ||
| - _es_second_restart_health_complete_usec | int > 0 | ||
| - _es_first_restart_started_usec | int <= _es_first_restart_health_complete_usec | int | ||
| - _es_second_restart_started_usec | int <= _es_second_restart_health_complete_usec | int | ||
| - _es_first_restart_health_complete_usec | int <= _es_second_restart_started_usec | int | ||
| fail_msg: >- | ||
| Elasticsearch config restart did not wait for the first node's | ||
| post-restart health gate before starting the next node. | ||
| {{ _es_first_restart_host }} started={{ _es_first_restart_started_usec }} | ||
| health_complete={{ _es_first_restart_health_complete_usec }}; | ||
| {{ _es_second_restart_host }} started={{ _es_second_restart_started_usec }} | ||
| health_complete={{ _es_second_restart_health_complete_usec }}. | ||
| success_msg: >- | ||
| Elasticsearch config restart was rolling and health-gated. | ||
| {{ _es_first_restart_host }} completed health before | ||
| {{ _es_second_restart_host }} started. | ||
| run_once: true |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| --- | ||
|
|
||
| - name: elasticsearch-cluster-settings | Build effective cluster settings | ||
| ansible.builtin.set_fact: | ||
| _elasticsearch_effective_cluster_settings: >- | ||
| {{ (elasticsearch_logsdb | default(false) | bool) | ||
| | ternary({'cluster.logsdb.enabled': 'true'}, {}) | ||
| | combine(elasticsearch_cluster_settings | default({})) }} | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| - name: elasticsearch-cluster-settings | Apply persistent cluster settings # noqa: run-once[task] | ||
| when: | ||
| - _elasticsearch_effective_cluster_settings | length > 0 | ||
| - elasticsearch_security | bool | ternary(elasticstack_password is defined and (elasticstack_password.stdout | default('') | length > 0), true) | ||
| - not ansible_check_mode | ||
| run_once: true | ||
| delegate_to: "{{ elasticstack_ca_host | default(inventory_hostname) }}" | ||
| block: | ||
| - name: elasticsearch-cluster-settings | Read current persistent cluster settings | ||
| ansible.builtin.uri: | ||
| url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings?flat_settings=true" | ||
| method: GET | ||
| user: "{{ 'elastic' if elasticsearch_security | bool else omit }}" | ||
| password: "{{ elasticstack_password.stdout if elasticsearch_security | bool else omit }}" | ||
| force_basic_auth: "{{ elasticsearch_security | bool }}" | ||
| validate_certs: "{{ elasticsearch_validate_api_certs }}" | ||
| return_content: true | ||
| register: _elasticsearch_current_cluster_settings | ||
| no_log: "{{ elasticstack_no_log }}" | ||
|
|
||
| - name: elasticsearch-cluster-settings | Check if settings already match | ||
| ansible.builtin.set_fact: | ||
| _elasticsearch_cluster_settings_changed: "{{ _needs_update | trim }}" | ||
| vars: | ||
| _current: "{{ _elasticsearch_current_cluster_settings.json.persistent }}" | ||
| _needs_update: >- | ||
| {% set ns = namespace(changed=false) %} | ||
| {% for key, value in _elasticsearch_effective_cluster_settings.items() %} | ||
| {% if _current.get(key) is none or _current[key] | string != value | string %} | ||
| {% set ns.changed = true %} | ||
|
Comment on lines
+35
to
+39
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Boolean setting values can break idempotent diff detection. On Line 38, string comparison can mis-detect changes when desired values are YAML booleans ( Proposed fix- _needs_update: >-
- {% set ns = namespace(changed=false) %}
- {% for key, value in _elasticsearch_effective_cluster_settings.items() %}
- {% if _current.get(key) is none or _current[key] | string != value | string %}
- {% set ns.changed = true %}
- {% endif %}
- {% endfor %}
- {{ ns.changed }}
+ _needs_update: >-
+ {% set ns = namespace(changed=false) %}
+ {% for key, value in _elasticsearch_effective_cluster_settings.items() %}
+ {% if value is boolean %}
+ {% set expected = 'true' if value else 'false' %}
+ {% else %}
+ {% set expected = value | string %}
+ {% endif %}
+ {% if _current.get(key) is none or (_current[key] | string) != expected %}
+ {% set ns.changed = true %}
+ {% endif %}
+ {% endfor %}
+ {{ ns.changed }}As per coding guidelines, "Review for Ansible best practices: idempotency (command/shell needs creates/removes guards), correct handler notifications, proper use of become/become_user, and platform conditionals (Debian vs RHEL)." 🤖 Prompt for AI Agents |
||
| {% endif %} | ||
| {% endfor %} | ||
| {{ ns.changed }} | ||
|
|
||
| - name: elasticsearch-cluster-settings | Apply cluster settings | ||
| ansible.builtin.uri: | ||
| url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings" | ||
| method: PUT | ||
| body_format: json | ||
| body: | ||
| persistent: "{{ _elasticsearch_effective_cluster_settings }}" | ||
| user: "{{ 'elastic' if elasticsearch_security | bool else omit }}" | ||
| password: "{{ elasticstack_password.stdout if elasticsearch_security | bool else omit }}" | ||
| force_basic_auth: "{{ elasticsearch_security | bool }}" | ||
| validate_certs: "{{ elasticsearch_validate_api_certs }}" | ||
| status_code: 200 | ||
| no_log: "{{ elasticstack_no_log }}" | ||
| when: _elasticsearch_cluster_settings_changed | bool | ||
| changed_when: true | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| --- | ||
|
|
||
| # Detect whether a rolling upgrade is needed. Any version change — major, | ||
| # minor, or patch — should restart nodes one at a time with shard allocation | ||
| # management rather than restarting all nodes simultaneously. | ||
| # | ||
| # Pre-install detection covers cases where we know the target version: | ||
| # 1. Pinned version higher than installed (elasticstack_version: "9.2.0") | ||
| # 2. Major version change (elasticstack_release differs from installed) | ||
| # | ||
| # For "latest" mode, we can't know pre-install whether a newer version is | ||
| # available. The normal package tasks handle installation with state: latest, | ||
| # and if the package changed, a rolling restart is triggered post-install. | ||
| - name: elasticsearch-upgrade-detection | Detect if rolling upgrade is needed | ||
| ansible.builtin.set_fact: | ||
| _elasticsearch_needs_rolling_upgrade: >- | ||
| {{ ansible_facts.packages['elasticsearch'] is defined and | ||
| ansible_facts.packages['elasticsearch'][0].version is defined and | ||
| ((elasticstack_version is defined and | ||
| elasticstack_version != 'latest' and | ||
| elasticstack_version is version(ansible_facts.packages['elasticsearch'][0].version, '>')) | ||
| or | ||
| (elasticstack_release | default(8) | int != (ansible_facts.packages['elasticsearch'][0].version.split('.')[0] | int))) }} | ||
|
|
||
| - name: elasticsearch-upgrade-detection | Check upgrade path requirement for ES 9.x | ||
| ansible.builtin.fail: | ||
| msg: | | ||
| UPGRADE PATH VIOLATION: Elasticsearch 9.x requires 8.19.x first. | ||
| Current version: {{ ansible_facts.packages['elasticsearch'][0].version }} | ||
| You must upgrade to 8.19.x before upgrading to 9.x. | ||
| See: https://www.elastic.co/docs/deploy-manage/upgrade/deployment-or-cluster | ||
| when: | ||
| - elasticstack_release | default(8) | int >= 9 | ||
| - ansible_facts.packages['elasticsearch'] is defined | ||
| - ansible_facts.packages['elasticsearch'][0].version is version('8.0.0', '>=') | ||
| - ansible_facts.packages['elasticsearch'][0].version is version('8.19.0', '<') |
Uh oh!
There was an error while loading. Please reload this page.