-
Notifications
You must be signed in to change notification settings - Fork 234
Description
Having
Server db:
PostgreSQL: OK
superuser or standard user with backup privileges: OK
PostgreSQL streaming: OK
wal_level: OK
PostgreSQL server is standby: OK
Primary server is not a standby: OK
Primary and standby have same system ID: OK
replication slot: FAILED (slot 'slot_name' not active: is 'receive-wal' running?)
directories: OK
retention policy settings: OK
backup maximum age: FAILED (interval provided: 7 days, latest backup age: No available backups)
backup minimum size: OK (0 B)
wal maximum age: OK (no last_wal_maximum_age provided)
wal size: OK (0 B)
compression settings: OK
failed backups: OK (there are 0 failed backups)
minimum redundancy requirements: FAILED (have 0 non-incremental backups, expected at least 1)
ssh: OK (PostgreSQL server)
systemid coherence: OK (no system Id stored on disk)
pg_receivexlog: OK
pg_receivexlog compatible: OK
receive-wal running: OK
archiver errors: OK
barman diagnose
...
"replication_slot": [
"slot_name",
false,
"3AE/C2000000"
],
...
psql -h primary_db -U barman -d postgres -c "SELECT slot_name, active, restart_lsn FROM pg_replication_slots WHERE slot_type = 'physical' AND slot_name = 'slot_name';"
slot_name | active | restart_lsn
-----------+--------+--------------
slot_name | t | 3AE/C2000000
(1 row)
Changed line 1612 in postgres.py to:
slotstatus = cur.fetchone()
_logger.debug(
"self: %s", self
)
_logger.debug(
"replication_slots: %s", slotstatus
)
return slotstatus
and results in the log show that it is using Standby connection for that check opposed to primary one.
2025-12-16 13:07:25,103 [234077] barman.postgres DEBUG: self: <barman.postgres.StandbyPostgreSQLConnection object at 0x7fb7064fc7d0>
2025-12-16 13:07:25,104 [234077] barman.postgres DEBUG: replication_slots: Record(slot_name='slot_name', active=False, restart_lsn='3AE/C2000000')
Initially I had conninfo pointing to whole cluster and primary conninfo pointing to read-write node using target_session_attrs, but barman started to complain about not having conninfo poining towards standby node, so changed config to:
conninfo = "host=primary_db,replica_db port=5432 user=barman dbname=postgres target_session_attrs=standby connect_timeout=2"
primary_conninfo = "host=primary_db,replica_db port=5432 user=barman dbname=postgres target_session_attrs=read-write connect_timeout=2"
streaming_conninfo = "host=primary_db,replica_db port=5432 user=streaming_barman dbname=postgres target_session_attrs=read-write connect_timeout=2 application_name=slot_name"
seems to be quite same issue as in: #1024
Full barman diagnose (without null values):
barman diagnose | grep -v null
{
"global": {
"config": {
"barman_home": "/var/lib/barman",
"barman_lock_directory": "/var/lib/barman",
"barman_user": "barman",
"compression": "bzip2",
"config_changes_queue": "/var/lib/barman/cfg_changes.queue",
"configuration_files_directory": "/etc/barman.d",
"errors_list": [],
"lock_directory_cleanup": true,
"log_file": "/var/log/barman/barman.log",
"log_level": "DEBUG",
"path_prefix": "\"/usr/pgsql-15/bin\""
},
"system_info": {
"barman_ver": "3.16.1",
"kernel_ver": "Linux barman 4.18.0-553.79.1.el8_10.x86_64 #1 SMP Mon Oct 13 11:48:41 EDT 2025 x86_64 x86_64 x86_64 GNU/Linux",
"python_executable": "/usr/bin/python3.12",
"python_ver": "Python 3.12.11",
"release": "RedHat Linux AlmaLinux release 8.10 (Cerulean Leopard)",
"rsync_ver": "rsync version 3.1.3 protocol version 31",
"ssh_ver": "",
"timestamp": "2025-12-16T13:20:57.845788+02:00"
}
},
"models": {},
"servers": {
"db": {
"backups": {},
"config": {
"active": true,
"archiver": false,
"archiver_batch_size": 0,
"autogenerate_manifest": false,
"aws_await_snapshots_timeout": 3600,
"backup_directory": "/srv/barman/db",
"backup_method": "rsync",
"backup_options": "concurrent_backup",
"basebackup_retry_sleep": 30,
"basebackup_retry_times": 0,
"basebackups_directory": "/srv/barman/db/base",
"check_timeout": 30,
"cluster": "db",
"combine_mode": "copy",
"compression": "bzip2",
"compression_level": "medium",
"conninfo": "host=primary_db,replica_db port=5432 user=barman dbname=postgres target_session_attrs=standby connect_timeout=2",
"create_slot": "auto",
"description": "DB v16 pgBarman backup (SSH)",
"disabled": false,
"errors_directory": "/srv/barman/db/errors",
"forward_config_path": false,
"immediate_checkpoint": true,
"incoming_wals_directory": "/srv/barman/db/incoming",
"keepalive_interval": 60,
"last_backup_maximum_age": "7 days",
"minimum_redundancy": 1,
"network_compression": false,
"parallel_jobs": 1,
"parallel_jobs_start_batch_period": 1,
"parallel_jobs_start_batch_size": 10,
"path_prefix": "/usr/pgsql-16/bin",
"primary_checkpoint_timeout": 0,
"primary_conninfo": "host=primary_db,replica_db port=5432 user=barman dbname=postgres target_session_attrs=read-write connect_timeout=2",
"recovery_options": "",
"retention_policy": "REDUNDANCY 1",
"retention_policy_mode": "auto",
"reuse_backup": "link",
"slot_name": "slot_name",
"ssh_command": "ssh postgres@primary_db -p 22",
"staging_location": "local",
"streaming_archiver": true,
"streaming_archiver_batch_size": 0,
"streaming_archiver_name": "barman_receive_wal",
"streaming_backup_name": "barman_streaming_backup",
"streaming_conninfo": "host=primary_db,replica_db port=5432 user=streaming_barman dbname=postgres target_session_attrs=read-write connect_timeout=2 application_name=slot_name",
"streaming_wals_directory": "/srv/barman/db/streaming",
"wal_retention_policy": "MAIN",
"wals_directory": "/srv/barman/db/wals",
"worm_mode": false,
"xlogdb_directory": "/srv/barman/db/wals"
},
"msg_list": [],
"status": {
"archive_timeout": 0,
"checkpoint_timeout": 300,
"config_file": "/var/lib/pgsql/16/data/postgresql.conf",
"current_lsn": "3AE/C3E65C40",
"current_size": 31861036364.0,
"data_checksums": "on",
"data_directory": "/var/lib/pgsql/16/data",
"has_backup_privileges": true,
"has_monitoring_privileges": true,
"hba_file": "/var/lib/pgsql/16/data/pg_hba.conf",
"hot_standby": "on",
"ident_file": "/var/lib/pgsql/16/data/pg_ident.conf",
"included_files": [
"/var/lib/pgsql/16/data/postgresql.base.conf"
],
"is_in_recovery": true,
"is_superuser": true,
"max_replication_slots": "10",
"max_wal_senders": "10",
"pg_receivexlog_compatible": true,
"pg_receivexlog_installed": true,
"pg_receivexlog_path": "/usr/pgsql-16/bin/pg_receivewal",
"pg_receivexlog_supports_slots": true,
"pg_receivexlog_synchronous": false,
"pg_receivexlog_version": "16.10",
"postgres_systemid": "7439841939786640555",
"replication_slot": [
"slot_name",
false,
"3AE/C3000000"
],
"replication_slot_support": true,
"server_txt_version": "16.9",
"streaming": true,
"streaming_supported": true,
"streaming_systemid": "7439841939786640555",
"synchronous_standby_names": [
"node1",
"node2",
"node3"
],
"timeline": 11,
"version_supported": true,
"wal_compression": "off",
"wal_keep_size": "128MB",
"wal_level": "replica",
"xlog_segment_size": 16777216,
"xlogpos": "3AE/C3E65C40"
},
"system_info": {
"kernel_ver": "Linux primary_db 4.18.0-553.66.1.el8_10.x86_64 #1 SMP Wed Aug 6 13:33:37 EDT 2025 x86_64 x86_64 x86_64 GNU/Linux",
"python_executable": "/usr/bin/python3.12",
"python_ver": "Python 3.12.11",
"release": "RedHat Linux AlmaLinux release 8.10 (Cerulean Leopard)",
"rsync_ver": "rsync version 3.1.3 protocol version 31",
"ssh_ver": ""
},
"wals": {
"last_archived_wal_per_timeline": {
"0000000B": {
"compression": "bzip2",
"name": "0000000B000003AE000000C2",
"size": 8989593,
"time": 1765883600.383711
}
}
}
}
}
}
I would expect it to query from primary server, or at least not nag when conninfo and primary_conninfo connect to the same host
https://docs.pgbarman.org/release/3.16.2/user_guide/configuration.html#general
conninfo
Specifies the connection string used by Barman to connect to the Postgres server. This is a libpq connection string. Commonly used keys include: host, hostaddr, port, dbname, user and password. See the libpq-connstring PostgreSQL documentation for details.
Documentation doesn't say anything about it having to point to standby node.
Also https://docs.pgbarman.org/release/3.16.2/user_guide/configuration.html#write-ahead-logs-wal section has additional wal_conninfo and wal_streaming_conninfo set up there. Their descriptions are not easy to read. I would think that maybe there should be some table or matrix there to show different conninfo parameters, what are their purpose (which functions use them), how the override priority is and what is fallback value if not filled in.