diff --git a/ansible/files/adminapi.service.j2 b/ansible/files/adminapi.service.j2 index cc1e9dc2a..305d1ac62 100644 --- a/ansible/files/adminapi.service.j2 +++ b/ansible/files/adminapi.service.j2 @@ -3,9 +3,8 @@ Description=AdminAPI Requires=network-online.target After=network-online.target -# Move this to the Service section if on systemd >=250 -StartLimitIntervalSec=60 -StartLimitBurst=10 +StartLimitIntervalSec=0 +StartLimitBurst=0 [Service] Type=simple diff --git a/ansible/files/gotrue.service.j2 b/ansible/files/gotrue.service.j2 index 144448cc6..dbcbd03fe 100644 --- a/ansible/files/gotrue.service.j2 +++ b/ansible/files/gotrue.service.j2 @@ -40,9 +40,19 @@ After=network-online.target systemd-resolved.service Wants=postgresql.service After=postgresql.service -# Lower start limit ival and burst to prevent the noisy flapping -StartLimitIntervalSec=10 -StartLimitBurst=5 +# The systemd default is 10s / 5 for these values with a DefaultRestartUSec of +# 100ms. Most services set a RestartSec limit of 3, under most circumstances it +# takes 15s to restart 5 times so the limit of 10s is not exceeded. However if +# other system processes (salt, cloud init) restart it explicitly, or recovering +# system services within the --before chain trigger a restart the limit can be +# exceeded causing it to be marked as failed. Since no services mark +# gotrue.service as required it will remain offline until the next explicit +# restart is issued. +# +# Setting these values to 0 with Restart=always and RestartSec=3 will prevent +# gotrue from being marked as failed. +StartLimitIntervalSec=0 +StartLimitBurst=0 [Service] Type=exec diff --git a/ansible/files/nginx.service.j2 b/ansible/files/nginx.service.j2 index 872e3346a..a43c3df60 100644 --- a/ansible/files/nginx.service.j2 +++ b/ansible/files/nginx.service.j2 @@ -3,6 +3,9 @@ Description=nginx server After=postgrest.service gotrue.service adminapi.service Wants=postgrest.service gotrue.service adminapi.service +StartLimitIntervalSec=0 +StartLimitBurst=0 + [Service] Type=forking ExecStart=/usr/local/nginx/sbin/nginx -c /etc/nginx/nginx.conf diff --git a/ansible/files/pg_egress_collect.service.j2 b/ansible/files/pg_egress_collect.service.j2 index 7ac04f47d..36e1b2074 100644 --- a/ansible/files/pg_egress_collect.service.j2 +++ b/ansible/files/pg_egress_collect.service.j2 @@ -1,6 +1,9 @@ [Unit] Description=Postgres Egress Collector +StartLimitIntervalSec=0 +StartLimitBurst=0 + [Service] Type=simple ExecStart=/bin/bash -c "tcpdump -s 128 -Q out -nn -tt -vv -p -l 'tcp and (port 5432 or port 6543)' | perl /root/pg_egress_collect.pl" diff --git a/ansible/files/postgres_exporter.service.j2 b/ansible/files/postgres_exporter.service.j2 index 6baa18c0d..dcb107cb7 100644 --- a/ansible/files/postgres_exporter.service.j2 +++ b/ansible/files/postgres_exporter.service.j2 @@ -1,6 +1,9 @@ [Unit] Description=Postgres Exporter +StartLimitIntervalSec=0 +StartLimitBurst=0 + [Service] Type=simple ExecStart=/opt/postgres_exporter/postgres_exporter --disable-settings-metrics --extend.query-path="/opt/postgres_exporter/queries.yml" --disable-default-metrics --no-collector.locks --no-collector.replication --no-collector.replication_slot --no-collector.stat_bgwriter --no-collector.stat_database --no-collector.stat_user_tables --no-collector.statio_user_tables --no-collector.wal {% if qemu_mode is defined and qemu_mode %}--no-collector.database {% endif %} diff --git a/ansible/files/postgrest.service.j2 b/ansible/files/postgrest.service.j2 index 290f07720..61102cb42 100644 --- a/ansible/files/postgrest.service.j2 +++ b/ansible/files/postgrest.service.j2 @@ -3,6 +3,9 @@ Description=PostgREST Requires=postgrest-optimizations.service After=postgrest-optimizations.service +StartLimitIntervalSec=0 +StartLimitBurst=0 + [Service] Type=simple # We allow the base config (sent from the worker) to override the generated config diff --git a/ansible/files/vector.service.j2 b/ansible/files/vector.service.j2 index 1c88baa20..05c11e453 100644 --- a/ansible/files/vector.service.j2 +++ b/ansible/files/vector.service.j2 @@ -4,6 +4,9 @@ Documentation=https://vector.dev After=network-online.target Requires=network-online.target +StartLimitIntervalSec=0 +StartLimitBurst=0 + [Service] User=vector Group=vector