diff --git a/apport/packaging_impl/apt_dpkg.py b/apport/packaging_impl/apt_dpkg.py index 1042b5f6e..5e1ab927f 100644 --- a/apport/packaging_impl/apt_dpkg.py +++ b/apport/packaging_impl/apt_dpkg.py @@ -57,6 +57,35 @@ import apport.logging from apport.package_info import PackageInfo +# The Contents-*.gz files are huge. Loading all data into memory would result in a +# dictionary with millions of entries consuming several gigabytes of memory. +# Therefore exclude unneeded paths with 100k entries or more. +# Data from 2026-04-23 on amd64: +# +# | Path | jammy | noble | questing | resolute | +# |----------------------------|------------|------------|------------|------------| +# | lib/modules | 10,206,350 | 5,492,830 | 475,055 | 9,120 | +# | usr/include | 278,938 | 271,587 | 295,470 | 298,389 | +# | usr/src | 39,533,381 | 21,538,896 | 2,201,315 | 420,916 | +# | usr/[^/]+/include | 107,568 | 170,800 | 103,340 | 127,294 | +# | usr/lib/modules | 0 | 0 | 0 | 73,589 | +# | usr/share/cargo/registry | 28,202 | 74,721 | 102,428 | 127,537 | +# | usr/share/doc | 3,025,577 | 2,938,577 | 2,636,718 | 2,766,784 | +# | usr/share/gimp/.../help | 38,220 | 73,845 | 78,120 | 85,216 | +# | usr/share/gocode | 112,802 | 168,941 | 199,405 | 227,626 | +# | usr/share/help | 121,470 | 124,824 | 118,948 | 119,346 | +# | usr/share/icons | 605,862 | 659,375 | 715,984 | 753,986 | +# | usr/share/libreoffice/help | 93,359 | 98,690 | 100,540 | 100,580 | +# | usr/share/locale | 95,312 | 110,122 | 123,230 | 123,842 | +# | usr/share/man | 149,353 | 165,310 | 175,262 | 177,041 | +# | usr/share/texlive | 145,420 | 168,740 | 170,265 | 171,546 | +# | **remaining paths** | 3,365,393 | 3,839,911 | 3,725,751 | 3,909,851 | +_EXCLUDED_PATHS = ( + "^(lib/modules|usr/(include|src|[^/]+/include|lib/modules|share/" + "(cargo/registry|doc|gimp/.../help|gocode|help" + "|icons|libreoffice/help|locale|man|texlive)))/" +) + # pylint: disable-next=too-many-arguments,too-many-positional-arguments def _extract_downloaded_debs( @@ -1671,10 +1700,7 @@ def _get_contents_file(self, map_cachedir: str, dist: str, arch: str) -> str | N def _update_given_file2pkg_mapping( file2pkg: dict[bytes, bytes], contents_filename: str, dist: str ) -> None: - path_exclude_pattern = re.compile( - rb"^:|(boot|var|usr/(include|src|[^/]+/include" - rb"|share/(doc|gocode|help|icons|locale|man|texlive)))/" - ) + path_exclude_pattern = re.compile(_EXCLUDED_PATHS.encode()) with gzip.open(contents_filename, "rb") as contents: if dist in {"trusty", "xenial"}: # the first 32 lines are descriptive only for these diff --git a/tests/unit/test_packaging_apt_dpkg.py b/tests/unit/test_packaging_apt_dpkg.py index e3657e718..3df8ab51b 100644 --- a/tests/unit/test_packaging_apt_dpkg.py +++ b/tests/unit/test_packaging_apt_dpkg.py @@ -244,7 +244,12 @@ def test_contents_skip_xenial_header(self) -> None: impl._update_given_file2pkg_mapping(file2pkg, "/fake_Contents", "xenial") self.assertEqual( - file2pkg, {b"bin/afio": b"afio", b"bin/archdetect": b"archdetect-deb"} + file2pkg, + { + b":sexsend:sexget:": b"fex", + b"bin/afio": b"afio", + b"bin/archdetect": b"archdetect-deb", + }, ) open_mock.assert_called_once_with("/fake_Contents", "rb") @@ -256,6 +261,8 @@ def test_contents_path_filering(self) -> None: bin/ip net/iproute2 boot/ipxe.efi admin/grub-ipxe etc/dput.cf devel/dput +lib/modules/6.8.0-31-generic/kernel/crypto/lz4.ko.zst \ + kernel/linux-modules-6.8.0-31-generic lib/nut/clone admin/nut-server sbin/hdparm admin/hdparm usr/Brother/inf/braddprinter multiverse/text/brother-lpr-drivers-laser @@ -270,12 +277,15 @@ def test_contents_path_filering(self) -> None: usr/libexec/coreutils/libstdbuf.so utils/coreutils usr/libx32/ld.so libs/libc6-x32 usr/sbin/zic libs/libc-bin +usr/share/cargo/registry/libc-0.2.152/src/lib.rs universe/rust/librust-libc-dev usr/share/dicom3tools/gen.so universe/graphics/dicom3tools usr/share/doc/0install universe/admin/0install +usr/share/gimp/2.0/help/de/glossary.html universe/doc/gimp-help-de usr/share/gocode/src/launchpad.net/mgo universe/devel/golang-gopkg-mgo.v2-dev usr/share/help/C/eog/default.page gnome/eog usr/share/icons/gnome-colors-common/32x32/apps/konsole.png\ universe/gnome/gnome-colors-common +usr/share/libreoffice/help/en-US/noscript.html doc/libreoffice-help-en-us usr/share/locale/de/LC_MESSAGES/apt.mo admin/apt usr/share/man/de/man1/man.1.gz doc/man-db usr/share/texlive/index.html universe/tex/texlive-base @@ -293,6 +303,7 @@ def test_contents_path_filering(self) -> None: {k.decode(): v.decode() for k, v in file2pkg.items()}, { "bin/ip": "iproute2", + "boot/ipxe.efi": "grub-ipxe", "etc/dput.cf": "dput", "lib/nut/clone": "nut-server", "sbin/hdparm": "hdparm", @@ -307,6 +318,7 @@ def test_contents_path_filering(self) -> None: "usr/libx32/ld.so": "libc6-x32", "usr/sbin/zic": "libc-bin", "usr/share/dicom3tools/gen.so": "dicom3tools", + "var/lib/ieee-data/iab.txt": "ieee-data", }, ) open_mock.assert_called_once_with("Contents-amd64", "rb")