From 1fc1fae18d89f1d6781281a2cf52cc26144d1616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Wed, 20 Dec 2017 21:40:12 +0100 Subject: [PATCH 1/9] Add options --dn, --sort and --sort-prefix If ldap-git-backup is used to backup the OpenLDAP configuration stored in cn=config, the order of entries is critical for a restore scenario. Therefore adding the possibility to have the entries' file names prefixed with a zero-padded number will allow to concatenate the files using their filename in the correct order. This may also be useful if one needs to restore only some parts of the database using ldapadd to a running slapd instance. It may also be useful to have the DN of the entry in the filename to concatenate only parts of the database backup. --- README.mdown | 4 ++++ ldap-git-backup.in | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/README.mdown b/README.mdown index d039c8c..da9b7dc 100644 --- a/README.mdown +++ b/README.mdown @@ -48,6 +48,10 @@ The script ldap-git-backup is a convenient way to automate this pro The backup location will be /var/backups/ldap or an alternative directory given by the --backup-dir option. This directory will also contain the Git repository. The directory and the Git repository will be created if needed when the first backup is made. +If you want to also have the DN of the entry in the filename, use the option --dn. This option will prefix the filename with the entry's DN. Using this option could break the backup if you have very long DNs and the resulting filename exceeds the maximum filename length of the underlying filesystem. + +To preserve the order of entries in an ldif backup with slapcat or ldapsearch, use the option --sort, which will prefix the filename with an incremental hexadecimal number padded with zeros. This option is useful when the order of entries is important for the restore scenario, for example if you want to restore a backup of cn=config or if you want to restore only parts of a database using ldapadd while slapd is running. Consider using ldapsearch as --ldif-cmd in such scenarios for optimal sorting. This option can also be combined with the option --dn. The default padding length of the prefix is 5, which is suitable for correct sorting of up to ~1M entries. You can use the option --sort-prefix to specify a different value. + ## Backup Strategies The simplest backup strategy would just call ldap-git-backup once per day via cron. Pick a quiet time for the LDAP directory and add a command like the following to your crontab (e.g., crontab -e or in /etc/cron.d/ldap-git-backup): diff --git a/ldap-git-backup.in b/ldap-git-backup.in index ae2974e..e88da07 100644 --- a/ldap-git-backup.in +++ b/ldap-git-backup.in @@ -21,6 +21,9 @@ sub main { my $commit_msg = 'ldap-git-backup'; my $commit_date = time(); my $gc = 1; + my $dn = 0; + my $sort = 0; + my $sort_prefix = 5; my $help; GetOptions( 'ldif-cmd=s' => \$ldif_cmd, @@ -28,6 +31,9 @@ sub main { 'commit-msg=s' => \$commit_msg, 'commit-date=s' => \$commit_date, 'gc!' => \$gc, + 'dn' => \$dn, + 'sort' => \$sort, + 'sort-prefix=s' => \$sort_prefix, 'help' => \$help, ); pod2usage('-verbose' => 2, '-exit_status' => 0) if $help; @@ -41,6 +47,11 @@ sub main { $backup_dir = getcwd() . '/' . $backup_dir; } + if ((!($sort_prefix =~ m/^\d+\z/)) || ($sort_prefix == 0)) { + die("Error: invalid --sort_prefix_length\n" . + "Please only use whole numbers > 0.\n"); + } + my $ldif_aref = LDAP::Utils::read_ldif($ldif_cmd); make_path($backup_dir, {mode => 0700}); my $mode = stat($backup_dir)->mode; @@ -55,13 +66,22 @@ sub main { for my $f (@filelist) { $files_before{$f} = 1 } @filelist = (); + my $i=0; for my $ldif (@$ldif_aref) { my $filename = LDAP::Utils::filename($ldif); + if ($dn && $sort) { + $filename = LDAP::Utils::counter($i, $sort_prefix) . '_' . LDAP::Utils::dn($ldif) . '_' . $filename; + } elsif ($sort) { + $filename = LDAP::Utils::counter($i, $sort_prefix) . '_' . $filename; + } elsif ($dn) { + $filename = LDAP::Utils::dn($ldif) . '_' . $filename; + } open(my $fh, '>', "$backup_dir/$filename"); print {$fh} $ldif; close($fh); push(@filelist, $filename); delete($files_before{$filename}); + $i++; } $repo->command('add', @filelist) if @filelist; $repo->command('rm', (keys %files_before)) if %files_before; @@ -125,6 +145,12 @@ sub filename { return $filename . '.ldif'; } +sub counter { + my ($i, $len) = @_; + my $counter = sprintf("%0." . $len . "x",$i); + return $counter +} + sub dn { my ($ldif) = @_; $ldif =~ s{\n }{}gs; # combine multiline attributes @@ -269,6 +295,25 @@ By default gc is done so as to keep the size of the backup down. You may want to skip gc for the occasional backup run but leave it on for the scheduled backups. +=item B<--dn> + +Prefix the filename with the entry's DN. Warning: using this option may break +the backup if you have very long DNs and the resulting filename exceeds the +maximum filename length of the underlying filesystem. + +=item B<--sort> + +Prefix the filename with an incremental hexadecimal number padded with zeros. +This option is useful when the order of entries is important for the restore +scenario, for example if you want to restore a backup of cn=config or if you +want to restore only parts of a database using ldapadd while slapd is running. +Consider using ldapsearch as --ldif-cmd in such scenarios for optimal sorting. + +=item B<--sort-prefix> + +The number of zeros the hexadecimal prefix is padded with when using --sort. +Default: 5 (suitable for correct sorting of up to ~1M entries). + =item B<--help> Prints this page. From 9b43379df4a8b32c002f4fec1921e477e8c8d8fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Thu, 21 Dec 2017 15:09:12 +0100 Subject: [PATCH 2/9] Remove / from DN in filename --- ldap-git-backup.in | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/ldap-git-backup.in b/ldap-git-backup.in index e88da07..a26f9ec 100644 --- a/ldap-git-backup.in +++ b/ldap-git-backup.in @@ -70,11 +70,15 @@ sub main { for my $ldif (@$ldif_aref) { my $filename = LDAP::Utils::filename($ldif); if ($dn && $sort) { - $filename = LDAP::Utils::counter($i, $sort_prefix) . '_' . LDAP::Utils::dn($ldif) . '_' . $filename; + my $dn = LDAP::Utils::file_dn($ldif); + my $cnt = LDAP::Utils::counter($i, $sort_prefix); + $filename = $cnt . '_' . $dn . '_' . $filename; } elsif ($sort) { - $filename = LDAP::Utils::counter($i, $sort_prefix) . '_' . $filename; + my $cnt = LDAP::Utils::counter($i, $sort_prefix); + $filename = $cnt . '_' . $filename; } elsif ($dn) { - $filename = LDAP::Utils::dn($ldif) . '_' . $filename; + my $dn = LDAP::Utils::file_dn($ldif); + $filename = $dn . '_' . $filename; } open(my $fh, '>', "$backup_dir/$filename"); print {$fh} $ldif; @@ -164,6 +168,13 @@ sub dn { return ''; } +sub file_dn { + my ($ldif) = @_; + my $dn = dn($ldif); + $dn =~ s#/##; + return $dn; +} + sub canonicalize_dn { my ($dn) = @_; @@ -309,7 +320,7 @@ scenario, for example if you want to restore a backup of cn=config or if you want to restore only parts of a database using ldapadd while slapd is running. Consider using ldapsearch as --ldif-cmd in such scenarios for optimal sorting. -=item B<--sort-prefix> +=item B<--sort-prefix> Eprefix_lengthE> The number of zeros the hexadecimal prefix is padded with when using --sort. Default: 5 (suitable for correct sorting of up to ~1M entries). From 1c22e0171d2c2589d52058b3d8f2ebc043bee80e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Thu, 21 Dec 2017 15:16:09 +0100 Subject: [PATCH 3/9] Add option --exclude-attrs --- ldap-git-backup.in | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/ldap-git-backup.in b/ldap-git-backup.in index a26f9ec..6174e6f 100644 --- a/ldap-git-backup.in +++ b/ldap-git-backup.in @@ -24,6 +24,7 @@ sub main { my $dn = 0; my $sort = 0; my $sort_prefix = 5; + my $exclude_attrs = ""; my $help; GetOptions( 'ldif-cmd=s' => \$ldif_cmd, @@ -34,6 +35,7 @@ sub main { 'dn' => \$dn, 'sort' => \$sort, 'sort-prefix=s' => \$sort_prefix, + 'exclude-attrs=s' => \$exclude_attrs, 'help' => \$help, ); pod2usage('-verbose' => 2, '-exit_status' => 0) if $help; @@ -81,7 +83,21 @@ sub main { $filename = $dn . '_' . $filename; } open(my $fh, '>', "$backup_dir/$filename"); - print {$fh} $ldif; + if ($exclude_attrs ne "") { + my @lines = split /\n/, $ldif; + my $n = 0; + while (my $l = $lines[$n]) { + if ($l =~ m/^$exclude_attrs: /) { + while ($lines[++$n] && $lines[$n] =~ /^ /) {}; + } else { + print {$fh} $l."\n"; + $n++; + } + } + print {$fh} "\n"; + } else { + print {$fh} $ldif; + } close($fh); push(@filelist, $filename); delete($files_before{$filename}); @@ -320,11 +336,17 @@ scenario, for example if you want to restore a backup of cn=config or if you want to restore only parts of a database using ldapadd while slapd is running. Consider using ldapsearch as --ldif-cmd in such scenarios for optimal sorting. -=item B<--sort-prefix> Eprefix_lengthE> +=item B<--sort-prefix Eprefix_lengthE> The number of zeros the hexadecimal prefix is padded with when using --sort. Default: 5 (suitable for correct sorting of up to ~1M entries). +=item B<--exclude-attrs Eexclude_regexE> + +Exclude all attributes matching the regular expression C. The +regex is effectively enclosed with prefix C<^> and suffix C<: > to only match +attribute names and results in C<^exclude_regex: >. + =item B<--help> Prints this page. From bbf3558bfd39c494341da29b4835d8b05b52b50b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Thu, 21 Dec 2017 15:36:27 +0100 Subject: [PATCH 4/9] Reverse DN for correct sorting without --sort --- ldap-git-backup.in | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/ldap-git-backup.in b/ldap-git-backup.in index 6174e6f..9993810 100644 --- a/ldap-git-backup.in +++ b/ldap-git-backup.in @@ -79,7 +79,7 @@ sub main { my $cnt = LDAP::Utils::counter($i, $sort_prefix); $filename = $cnt . '_' . $filename; } elsif ($dn) { - my $dn = LDAP::Utils::file_dn($ldif); + my $dn = LDAP::Utils::file_dn_rev($ldif); $filename = $dn . '_' . $filename; } open(my $fh, '>', "$backup_dir/$filename"); @@ -191,6 +191,13 @@ sub file_dn { return $dn; } +sub file_dn_rev { + my ($ldif) = @_; + my $dn = file_dn($ldif); + $dn = join ",", reverse split /,/, $dn; + return $dn; +} + sub canonicalize_dn { my ($dn) = @_; @@ -324,22 +331,23 @@ backups. =item B<--dn> -Prefix the filename with the entry's DN. Warning: using this option may break -the backup if you have very long DNs and the resulting filename exceeds the -maximum filename length of the underlying filesystem. +Prefix the filename with the entry's reversed DN. Or if used in conjunction with +--sort using the entry's normal DN. This option is useful when the order of +entries is important for the restore scenario, for example if you want to restore +only parts of a database using ldapadd while slapd is running. Consider using +ldapsearch as --ldif-cmd in such scenarios for optimal sorting. Warning: using +this option may break the backup if you have very long DNs and the resulting +filename exceeds the maximum filename length of the underlying filesystem. =item B<--sort> Prefix the filename with an incremental hexadecimal number padded with zeros. -This option is useful when the order of entries is important for the restore -scenario, for example if you want to restore a backup of cn=config or if you -want to restore only parts of a database using ldapadd while slapd is running. -Consider using ldapsearch as --ldif-cmd in such scenarios for optimal sorting. +This option is useful to restore a backup of cn=config. =item B<--sort-prefix Eprefix_lengthE> The number of zeros the hexadecimal prefix is padded with when using --sort. -Default: 5 (suitable for correct sorting of up to ~1M entries). +Default: 5 =item B<--exclude-attrs Eexclude_regexE> From 8cd2f60829679817cc2048c1d29bc3177670adfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Thu, 21 Dec 2017 15:49:01 +0100 Subject: [PATCH 5/9] Add option --suffix to hide the DN suffix of the backup --- ldap-git-backup.in | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ldap-git-backup.in b/ldap-git-backup.in index 9993810..44362da 100644 --- a/ldap-git-backup.in +++ b/ldap-git-backup.in @@ -25,6 +25,7 @@ sub main { my $sort = 0; my $sort_prefix = 5; my $exclude_attrs = ""; + my $suffix = ""; my $help; GetOptions( 'ldif-cmd=s' => \$ldif_cmd, @@ -36,6 +37,7 @@ sub main { 'sort' => \$sort, 'sort-prefix=s' => \$sort_prefix, 'exclude-attrs=s' => \$exclude_attrs, + 'suffix=s' => \$suffix, 'help' => \$help, ); pod2usage('-verbose' => 2, '-exit_status' => 0) if $help; @@ -72,14 +74,14 @@ sub main { for my $ldif (@$ldif_aref) { my $filename = LDAP::Utils::filename($ldif); if ($dn && $sort) { - my $dn = LDAP::Utils::file_dn($ldif); + my $dn = LDAP::Utils::file_dn($ldif, $suffix); my $cnt = LDAP::Utils::counter($i, $sort_prefix); $filename = $cnt . '_' . $dn . '_' . $filename; } elsif ($sort) { my $cnt = LDAP::Utils::counter($i, $sort_prefix); $filename = $cnt . '_' . $filename; } elsif ($dn) { - my $dn = LDAP::Utils::file_dn_rev($ldif); + my $dn = LDAP::Utils::file_dn_rev($ldif, $suffix); $filename = $dn . '_' . $filename; } open(my $fh, '>', "$backup_dir/$filename"); @@ -185,15 +187,18 @@ sub dn { } sub file_dn { - my ($ldif) = @_; + my ($ldif, $suffix) = @_; my $dn = dn($ldif); + if ($suffix ne "") { + $dn =~ s#$suffix$#suffix#; + } $dn =~ s#/##; return $dn; } sub file_dn_rev { - my ($ldif) = @_; - my $dn = file_dn($ldif); + my ($ldif, $suffix) = @_; + my $dn = file_dn($ldif, $suffix); $dn = join ",", reverse split /,/, $dn; return $dn; } @@ -355,6 +360,10 @@ Exclude all attributes matching the regular expression C. The regex is effectively enclosed with prefix C<^> and suffix C<: > to only match attribute names and results in C<^exclude_regex: >. +=item B<--suffix Edn_suffixE> + +Replace the DN suffix of the backup with the string C in the file name. + =item B<--help> Prints this page. From 55d5d7e0702cc8cdddae5531f6dae5d71560e595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Thu, 21 Dec 2017 18:01:41 +0100 Subject: [PATCH 6/9] Fix a bug where ARG_MAX is exceeded during git-add When the argument length in the git-add operation, resulting from all the filenames of all ldif files is greater than ARG_MAX, an error occurs similar as when trying to list these files in the backup directory after that using a wildcard `ls /backupdir/*`: -bash: /bin/ls: Argument list too long On my testsystem a `getconf ARG_MAX` reports 2097152, but the actual limit was about 1880000. Maybe this value has to be deter- mined dynamically during runtime. See also: https://unix.stackexchange.com/a/120842/60293 --- ldap-git-backup.in | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/ldap-git-backup.in b/ldap-git-backup.in index 44362da..f2a005e 100644 --- a/ldap-git-backup.in +++ b/ldap-git-backup.in @@ -26,6 +26,7 @@ sub main { my $sort_prefix = 5; my $exclude_attrs = ""; my $suffix = ""; + my $arg_max = 1048576; my $help; GetOptions( 'ldif-cmd=s' => \$ldif_cmd, @@ -38,6 +39,7 @@ sub main { 'sort-prefix=s' => \$sort_prefix, 'exclude-attrs=s' => \$exclude_attrs, 'suffix=s' => \$suffix, + 'arg-max=s' => \$arg_max, 'help' => \$help, ); pod2usage('-verbose' => 2, '-exit_status' => 0) if $help; @@ -52,8 +54,13 @@ sub main { } if ((!($sort_prefix =~ m/^\d+\z/)) || ($sort_prefix == 0)) { - die("Error: invalid --sort_prefix_length\n" . - "Please only use whole numbers > 0.\n"); + die("Error: invalid --sort_prefix\n" . + "Please only use whole numbers > 0\n"); + } + + if ((!($arg_max =~ m/^\d+\z/)) || ($arg_max < 1024)) { + die("Error: invalid --arg-max\n" . + "Please only use whole numbers >= 1024\n"); } my $ldif_aref = LDAP::Utils::read_ldif($ldif_cmd); @@ -105,7 +112,24 @@ sub main { delete($files_before{$filename}); $i++; } - $repo->command('add', @filelist) if @filelist; + my @filelist_junks; + if (@filelist) { + my $len = 0; + my $j = 0; + for my $filename (@filelist) { + if (($len + length $filename) <= $arg_max) { + push @{ $filelist_junks[$j] }, $filename; + $len = $len + length $filename; + } else { + $j++; + push @{ $filelist_junks[$j] }, $filename; + $len = length $filename; + } + } + } + for my $junk (@filelist_junks) { + $repo->command('add', @$junk) if @$junk; + } $repo->command('rm', (keys %files_before)) if %files_before; $repo->command('commit', "--message=$commit_msg", "--date=$commit_date"); @@ -347,7 +371,8 @@ filename exceeds the maximum filename length of the underlying filesystem. =item B<--sort> Prefix the filename with an incremental hexadecimal number padded with zeros. -This option is useful to restore a backup of cn=config. +This option is useful to backup cn=config with the exact order of entries. It is +not recommended for regular DITs. =item B<--sort-prefix Eprefix_lengthE> @@ -364,6 +389,12 @@ attribute names and results in C<^exclude_regex: >. Replace the DN suffix of the backup with the string C in the file name. +=item B<--arg-max ElenghtE> + +Set the maximal supported argument length. If the length of all resulting +filenames is greater than C, the files are split up into multiple junks +and staged using multiple C operations. Default: 1048576 + =item B<--help> Prints this page. From 24642fd88010cf3da105e7d108ca40f14f2c899b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Thu, 21 Dec 2017 18:42:40 +0100 Subject: [PATCH 7/9] Update README --- README.mdown | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.mdown b/README.mdown index da9b7dc..c2f895b 100644 --- a/README.mdown +++ b/README.mdown @@ -48,9 +48,11 @@ The script ldap-git-backup is a convenient way to automate this pro The backup location will be /var/backups/ldap or an alternative directory given by the --backup-dir option. This directory will also contain the Git repository. The directory and the Git repository will be created if needed when the first backup is made. -If you want to also have the DN of the entry in the filename, use the option --dn. This option will prefix the filename with the entry's DN. Using this option could break the backup if you have very long DNs and the resulting filename exceeds the maximum filename length of the underlying filesystem. +If you want to also have the DN of the entry in the filename, use the option --dn. This option will prefix the filename with the entry's reversed DN. Or if used in conjunction with --sort using the entry's normal DN. This option is useful when the order of entries is important for the restore scenario, for example if you want to restore only parts of a database using ldapadd while slapd is running. Consider using ldapsearch as --ldif-cmd in such scenarios for optimal sorting. Using this option could break the backup if you have very long DNs and the resulting filename exceeds the maximum filename length of the underlying filesystem. If you wish to hide a long DN suffix of a backup, you can use the option --suffix to specify the part at the end of the DN to be replace by the string suffix. -To preserve the order of entries in an ldif backup with slapcat or ldapsearch, use the option --sort, which will prefix the filename with an incremental hexadecimal number padded with zeros. This option is useful when the order of entries is important for the restore scenario, for example if you want to restore a backup of cn=config or if you want to restore only parts of a database using ldapadd while slapd is running. Consider using ldapsearch as --ldif-cmd in such scenarios for optimal sorting. This option can also be combined with the option --dn. The default padding length of the prefix is 5, which is suitable for correct sorting of up to ~1M entries. You can use the option --sort-prefix to specify a different value. +To preserve the exact order of entries in a backup, use the option --sort, which will prefix the filename with an incremental hexadecimal number padded with zeros. This option is useful to backup cn=config. It is not recommended for regular DITs. This option can also be combined with the option --dn. The default padding length of the prefix is 5, which is suitable for correct sorting of up to ~1M entries. You can use the option --sort-prefix to specify a different value. + +If you want to exclude some attributes from the backup, use the option --exclude-attrs exclude_regex, where exclude_regex is a regular expression to be matched against the attribute names. This will Exclude all attributes matching the regular expression. The regex is effectively enclosed with prefix ^ and suffix : to only match attribute names and results in ^exclude_regex: . ## Backup Strategies From 9246040025bdf8a231f4fcdce6058215b8afd2f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Fri, 22 Dec 2017 14:52:09 +0100 Subject: [PATCH 8/9] Split filelist into chunks for git add, git rm --- ldap-git-backup.in | 57 ++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/ldap-git-backup.in b/ldap-git-backup.in index f2a005e..21134f8 100644 --- a/ldap-git-backup.in +++ b/ldap-git-backup.in @@ -26,7 +26,6 @@ sub main { my $sort_prefix = 5; my $exclude_attrs = ""; my $suffix = ""; - my $arg_max = 1048576; my $help; GetOptions( 'ldif-cmd=s' => \$ldif_cmd, @@ -39,7 +38,6 @@ sub main { 'sort-prefix=s' => \$sort_prefix, 'exclude-attrs=s' => \$exclude_attrs, 'suffix=s' => \$suffix, - 'arg-max=s' => \$arg_max, 'help' => \$help, ); pod2usage('-verbose' => 2, '-exit_status' => 0) if $help; @@ -58,11 +56,6 @@ sub main { "Please only use whole numbers > 0\n"); } - if ((!($arg_max =~ m/^\d+\z/)) || ($arg_max < 1024)) { - die("Error: invalid --arg-max\n" . - "Please only use whole numbers >= 1024\n"); - } - my $ldif_aref = LDAP::Utils::read_ldif($ldif_cmd); make_path($backup_dir, {mode => 0700}); my $mode = stat($backup_dir)->mode; @@ -112,25 +105,15 @@ sub main { delete($files_before{$filename}); $i++; } - my @filelist_junks; - if (@filelist) { - my $len = 0; - my $j = 0; - for my $filename (@filelist) { - if (($len + length $filename) <= $arg_max) { - push @{ $filelist_junks[$j] }, $filename; - $len = $len + length $filename; - } else { - $j++; - push @{ $filelist_junks[$j] }, $filename; - $len = length $filename; - } - } + my @filelist_rm = keys %files_before; + my $filelist_chunks_ref = LDAP::Utils::get_chunks(\@filelist); + my $filelist_rm_chunks_ref = LDAP::Utils::get_chunks(\@filelist_rm); + foreach my $chunk_ref (@$filelist_chunks_ref) { + $repo->command('add', @$chunk_ref) if $chunk_ref; } - for my $junk (@filelist_junks) { - $repo->command('add', @$junk) if @$junk; + foreach my $chunk_ref (@$filelist_rm_chunks_ref) { + $repo->command('rm', @$chunk_ref) if $chunk_ref; } - $repo->command('rm', (keys %files_before)) if %files_before; $repo->command('commit', "--message=$commit_msg", "--date=$commit_date"); $repo->command('gc', '--quiet') if $gc; @@ -271,6 +254,26 @@ sub get_value_from_attribute { return $value; } +sub get_chunks { + my $filelist_ref = shift; + my $MAXARGS = 1048576; + my @filelist_chunks; + my $len = 0; + my $temp_array_ref; + for my $filename (@$filelist_ref) { + if (($len + length $filename) > $MAXARGS) { + push @filelist_chunks, $temp_array_ref; + $len = 0; + my $new_array_ref; + $temp_array_ref = $new_array_ref; + } + push @$temp_array_ref, $filename; + $len = $len + length $filename; + } + push @filelist_chunks, $temp_array_ref; + return \@filelist_chunks; +} + 1; __END__ @@ -389,12 +392,6 @@ attribute names and results in C<^exclude_regex: >. Replace the DN suffix of the backup with the string C in the file name. -=item B<--arg-max ElenghtE> - -Set the maximal supported argument length. If the length of all resulting -filenames is greater than C, the files are split up into multiple junks -and staged using multiple C operations. Default: 1048576 - =item B<--help> Prints this page. From d3d4bb2ab73241b2c3a0b1d8b04e552304a326c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= Date: Thu, 4 Jan 2018 15:15:59 +0100 Subject: [PATCH 9/9] Revert new-features changes (move to branch) This reverts the following commits: 24642fd88010cf3da105e7d108ca40f14f2c899b 8cd2f60829679817cc2048c1d29bc3177670adfe bbf3558bfd39c494341da29b4835d8b05b52b50b 1c22e0171d2c2589d52058b3d8f2ebc043bee80e 9b43379df4a8b32c002f4fec1921e477e8c8d8fb 1fc1fae18d89f1d6781281a2cf52cc26144d1616 While just keeping the bugfix in the master branch, the removed changes will be put into separate feature branches. --- README.mdown | 6 --- ldap-git-backup.in | 98 +--------------------------------------------- 2 files changed, 1 insertion(+), 103 deletions(-) diff --git a/README.mdown b/README.mdown index c2f895b..d039c8c 100644 --- a/README.mdown +++ b/README.mdown @@ -48,12 +48,6 @@ The script ldap-git-backup is a convenient way to automate this pro The backup location will be /var/backups/ldap or an alternative directory given by the --backup-dir option. This directory will also contain the Git repository. The directory and the Git repository will be created if needed when the first backup is made. -If you want to also have the DN of the entry in the filename, use the option --dn. This option will prefix the filename with the entry's reversed DN. Or if used in conjunction with --sort using the entry's normal DN. This option is useful when the order of entries is important for the restore scenario, for example if you want to restore only parts of a database using ldapadd while slapd is running. Consider using ldapsearch as --ldif-cmd in such scenarios for optimal sorting. Using this option could break the backup if you have very long DNs and the resulting filename exceeds the maximum filename length of the underlying filesystem. If you wish to hide a long DN suffix of a backup, you can use the option --suffix to specify the part at the end of the DN to be replace by the string suffix. - -To preserve the exact order of entries in a backup, use the option --sort, which will prefix the filename with an incremental hexadecimal number padded with zeros. This option is useful to backup cn=config. It is not recommended for regular DITs. This option can also be combined with the option --dn. The default padding length of the prefix is 5, which is suitable for correct sorting of up to ~1M entries. You can use the option --sort-prefix to specify a different value. - -If you want to exclude some attributes from the backup, use the option --exclude-attrs exclude_regex, where exclude_regex is a regular expression to be matched against the attribute names. This will Exclude all attributes matching the regular expression. The regex is effectively enclosed with prefix ^ and suffix : to only match attribute names and results in ^exclude_regex: . - ## Backup Strategies The simplest backup strategy would just call ldap-git-backup once per day via cron. Pick a quiet time for the LDAP directory and add a command like the following to your crontab (e.g., crontab -e or in /etc/cron.d/ldap-git-backup): diff --git a/ldap-git-backup.in b/ldap-git-backup.in index 21134f8..829bbe2 100644 --- a/ldap-git-backup.in +++ b/ldap-git-backup.in @@ -21,11 +21,6 @@ sub main { my $commit_msg = 'ldap-git-backup'; my $commit_date = time(); my $gc = 1; - my $dn = 0; - my $sort = 0; - my $sort_prefix = 5; - my $exclude_attrs = ""; - my $suffix = ""; my $help; GetOptions( 'ldif-cmd=s' => \$ldif_cmd, @@ -33,11 +28,6 @@ sub main { 'commit-msg=s' => \$commit_msg, 'commit-date=s' => \$commit_date, 'gc!' => \$gc, - 'dn' => \$dn, - 'sort' => \$sort, - 'sort-prefix=s' => \$sort_prefix, - 'exclude-attrs=s' => \$exclude_attrs, - 'suffix=s' => \$suffix, 'help' => \$help, ); pod2usage('-verbose' => 2, '-exit_status' => 0) if $help; @@ -51,11 +41,6 @@ sub main { $backup_dir = getcwd() . '/' . $backup_dir; } - if ((!($sort_prefix =~ m/^\d+\z/)) || ($sort_prefix == 0)) { - die("Error: invalid --sort_prefix\n" . - "Please only use whole numbers > 0\n"); - } - my $ldif_aref = LDAP::Utils::read_ldif($ldif_cmd); make_path($backup_dir, {mode => 0700}); my $mode = stat($backup_dir)->mode; @@ -70,40 +55,13 @@ sub main { for my $f (@filelist) { $files_before{$f} = 1 } @filelist = (); - my $i=0; for my $ldif (@$ldif_aref) { my $filename = LDAP::Utils::filename($ldif); - if ($dn && $sort) { - my $dn = LDAP::Utils::file_dn($ldif, $suffix); - my $cnt = LDAP::Utils::counter($i, $sort_prefix); - $filename = $cnt . '_' . $dn . '_' . $filename; - } elsif ($sort) { - my $cnt = LDAP::Utils::counter($i, $sort_prefix); - $filename = $cnt . '_' . $filename; - } elsif ($dn) { - my $dn = LDAP::Utils::file_dn_rev($ldif, $suffix); - $filename = $dn . '_' . $filename; - } open(my $fh, '>', "$backup_dir/$filename"); - if ($exclude_attrs ne "") { - my @lines = split /\n/, $ldif; - my $n = 0; - while (my $l = $lines[$n]) { - if ($l =~ m/^$exclude_attrs: /) { - while ($lines[++$n] && $lines[$n] =~ /^ /) {}; - } else { - print {$fh} $l."\n"; - $n++; - } - } - print {$fh} "\n"; - } else { - print {$fh} $ldif; - } + print {$fh} $ldif; close($fh); push(@filelist, $filename); delete($files_before{$filename}); - $i++; } my @filelist_rm = keys %files_before; my $filelist_chunks_ref = LDAP::Utils::get_chunks(\@filelist); @@ -174,12 +132,6 @@ sub filename { return $filename . '.ldif'; } -sub counter { - my ($i, $len) = @_; - my $counter = sprintf("%0." . $len . "x",$i); - return $counter -} - sub dn { my ($ldif) = @_; $ldif =~ s{\n }{}gs; # combine multiline attributes @@ -193,23 +145,6 @@ sub dn { return ''; } -sub file_dn { - my ($ldif, $suffix) = @_; - my $dn = dn($ldif); - if ($suffix ne "") { - $dn =~ s#$suffix$#suffix#; - } - $dn =~ s#/##; - return $dn; -} - -sub file_dn_rev { - my ($ldif, $suffix) = @_; - my $dn = file_dn($ldif, $suffix); - $dn = join ",", reverse split /,/, $dn; - return $dn; -} - sub canonicalize_dn { my ($dn) = @_; @@ -361,37 +296,6 @@ By default gc is done so as to keep the size of the backup down. You may want to skip gc for the occasional backup run but leave it on for the scheduled backups. -=item B<--dn> - -Prefix the filename with the entry's reversed DN. Or if used in conjunction with ---sort using the entry's normal DN. This option is useful when the order of -entries is important for the restore scenario, for example if you want to restore -only parts of a database using ldapadd while slapd is running. Consider using -ldapsearch as --ldif-cmd in such scenarios for optimal sorting. Warning: using -this option may break the backup if you have very long DNs and the resulting -filename exceeds the maximum filename length of the underlying filesystem. - -=item B<--sort> - -Prefix the filename with an incremental hexadecimal number padded with zeros. -This option is useful to backup cn=config with the exact order of entries. It is -not recommended for regular DITs. - -=item B<--sort-prefix Eprefix_lengthE> - -The number of zeros the hexadecimal prefix is padded with when using --sort. -Default: 5 - -=item B<--exclude-attrs Eexclude_regexE> - -Exclude all attributes matching the regular expression C. The -regex is effectively enclosed with prefix C<^> and suffix C<: > to only match -attribute names and results in C<^exclude_regex: >. - -=item B<--suffix Edn_suffixE> - -Replace the DN suffix of the backup with the string C in the file name. - =item B<--help> Prints this page.