diff --git a/volatility3/framework/constants/linux/__init__.py b/volatility3/framework/constants/linux/__init__.py index 3f8c52b43b..72440e23ef 100644 --- a/volatility3/framework/constants/linux/__init__.py +++ b/volatility3/framework/constants/linux/__init__.py @@ -432,6 +432,17 @@ def flags(self) -> str: VMCOREINFO_MAGIC_ALIGNED = VMCOREINFO_MAGIC + b"\x00" OSRELEASE_TAG = b"OSRELEASE=" +ATTRIBUTE_NAME_MAX_SIZE = 255 +""" +In 5.9-rc1+, the Linux kernel limits the READ size of a section bin_attribute name to MODULE_SECT_READ_SIZE: + +- https://elixir.bootlin.com/linux/v6.15-rc4/source/kernel/module/sysfs.c#L106 +- https://github.com/torvalds/linux/commit/11990a5bd7e558e9203c1070fc52fb6f0488e75b + +However, the raw section name loaded from the .ko ELF can in theory be thousands of characters, +and unless we do a NULL terminated search we can't set a perfect value. +""" + @dataclass class TaintFlag: diff --git a/volatility3/framework/objects/utility.py b/volatility3/framework/objects/utility.py index 59ac0ee55f..799639a671 100644 --- a/volatility3/framework/objects/utility.py +++ b/volatility3/framework/objects/utility.py @@ -3,11 +3,13 @@ # import re - +import logging from typing import Optional, Union from volatility3.framework import interfaces, objects, constants, exceptions +vollog = logging.getLogger(__name__) + def rol(value: int, count: int, max_bits: int = 64) -> int: """A rotate-left instruction in Python""" @@ -250,3 +252,63 @@ def array_of_pointers( ).clone() subtype_pointer.update_vol(subtype=subtype) return array.cast("array", count=count, subtype=subtype_pointer) + + +def dynamically_sized_array_of_pointers( + context: interfaces.context.ContextInterface, + array: interfaces.objects.ObjectInterface, + subtype: Union[str, interfaces.objects.Template], + iterator_guard_value: int, + stop_value: int = 0, + stop_on_invalid_pointers: bool = True, +) -> interfaces.objects.ObjectInterface: + """Iterates over a dynamically sized array of pointers (e.g. NULL-terminated). + Array iteration should always be performed with an arbitrary guard value as maximum size, + to prevent running forever in case something unexpected happens. + + Args: + context: The context on which to operate. + array: The object to cast to an array. + iterator_guard_value: Stop iterating when the iterator index is greater than this value. This is an extra-safety against smearing. + subtype: The subtype of the array's pointers. + stop_value: Stop value used to determine when to terminate iteration once it is encountered. Defaults to 0 (NULL-terminated arrays). + stop_on_invalid_pointers: Determines whether to stop iterating or not when an invalid pointer is encountered. This can be useful for arrays + that are known to have smeared entries before the end. + + Returns: + An array of pointer objects + """ + new_count = 0 + sym_table_name = array.get_symbol_table_name() + sym_table = context.symbol_space[sym_table_name] + ptr_size = sym_table.get_type("pointer").size + layer_name = array.vol.layer_name + + offset = array.vol.offset + entry = None + while entry != stop_value and new_count < iterator_guard_value: + try: + entry = context.object( + sym_table_name + constants.BANG + "pointer", + offset=offset, + layer_name=layer_name, + ) + except exceptions.InvalidAddressException: + break + + if not entry.is_readable() and stop_on_invalid_pointers: + break + + offset += ptr_size + new_count += 1 + else: + vollog.log( + constants.LOGLEVEL_V, + f"""Iterator guard value {iterator_guard_value} reached while iterating over array at offset {array.vol.offset:#x}.\ + This means that there is a bug (e.g. smearing) with this array, or that it may contain valid entries past the iterator guard value.""", + ) + + # Leverage the "Array" object instead of returning a Python list + return array_of_pointers( + array=array, count=new_count, subtype=subtype, context=context + ) diff --git a/volatility3/framework/plugins/linux/module_extract.py b/volatility3/framework/plugins/linux/module_extract.py index a8864281af..3b6b6f0e5f 100644 --- a/volatility3/framework/plugins/linux/module_extract.py +++ b/volatility3/framework/plugins/linux/module_extract.py @@ -4,8 +4,8 @@ import logging from typing import List +import volatility3.framework.symbols.linux.utilities.modules as linux_utilities_modules from volatility3 import framework -import volatility3.framework.symbols.linux.utilities.module_extract as linux_utilities_module_extract from volatility3.framework import interfaces, renderers from volatility3.framework.configuration import requirements from volatility3.framework.renderers import format_hints @@ -17,7 +17,7 @@ class ModuleExtract(interfaces.plugins.PluginInterface): """Recreates an ELF file from a specific address in the kernel""" - _version = (1, 0, 0) + _version = (1, 0, 1) _required_framework_version = (2, 25, 0) framework.require_interface_version(*_required_framework_version) @@ -37,9 +37,9 @@ def get_requirements(cls) -> List[interfaces.configuration.RequirementInterface] optional=False, ), requirements.VersionRequirement( - name="linux_utilities_module_extract", - version=(1, 0, 0), - component=linux_utilities_module_extract.ModuleExtract, + name="linux_utilities_modules_module_extract", + version=(1, 0, 2), + component=linux_utilities_modules.ModuleExtract, ), ] @@ -58,7 +58,7 @@ def _generator(self): module = kernel.object(object_type="module", offset=base_address, absolute=True) - elf_data = linux_utilities_module_extract.ModuleExtract.extract_module( + elf_data = linux_utilities_modules.ModuleExtract.extract_module( self.context, self.config["kernel"], module ) if not elf_data: diff --git a/volatility3/framework/symbols/linux/__init__.py b/volatility3/framework/symbols/linux/__init__.py index 19fb8f1d45..7560da3eca 100644 --- a/volatility3/framework/symbols/linux/__init__.py +++ b/volatility3/framework/symbols/linux/__init__.py @@ -52,7 +52,6 @@ def __init__(self, *args, **kwargs) -> None: self.set_type_class("idr", extensions.IDR) self.set_type_class("address_space", extensions.address_space) self.set_type_class("page", extensions.page) - self.set_type_class("module_sect_attr", extensions.module_sect_attr) # Might not exist in the current symbols self.optional_set_type_class("module", extensions.module) @@ -61,6 +60,8 @@ def __init__(self, *args, **kwargs) -> None: self.optional_set_type_class("kernel_cap_struct", extensions.kernel_cap_struct) self.optional_set_type_class("kernel_cap_t", extensions.kernel_cap_t) self.optional_set_type_class("scatterlist", extensions.scatterlist) + self.optional_set_type_class("module_sect_attr", extensions.module_sect_attr) + self.optional_set_type_class("bin_attribute", extensions.bin_attribute) # kernels >= 4.18 self.optional_set_type_class("timespec64", extensions.timespec64) diff --git a/volatility3/framework/symbols/linux/extensions/__init__.py b/volatility3/framework/symbols/linux/extensions/__init__.py index 0136d749a7..71ce82d82a 100644 --- a/volatility3/framework/symbols/linux/extensions/__init__.py +++ b/volatility3/framework/symbols/linux/extensions/__init__.py @@ -179,25 +179,45 @@ def get_name(self) -> Optional[str]: return None def _get_sect_count(self, grp: interfaces.objects.ObjectInterface) -> int: - """Try to determine the number of valid sections""" - symbol_table_name = self.get_symbol_table_name() - arr = self._context.object( - symbol_table_name + constants.BANG + "array", - layer_name=self.vol.layer_name, - offset=grp.attrs, - subtype=self._context.symbol_space.get_type( - symbol_table_name + constants.BANG + "pointer" - ), - count=25, - ) + """Try to determine the number of valid sections. Support for kernels > 6.14-rc1. + + Resources: + - https://github.com/torvalds/linux/commit/d8959b947a8dfab1047c6fd5e982808f65717bfe + - https://github.com/torvalds/linux/commit/e0349c46cb4fbbb507fa34476bd70f9c82bad359 + """ + + if grp.has_member("bin_attrs"): + arr_offset_ptr = grp.bin_attrs + arr_subtype = "bin_attribute" + else: + arr_offset_ptr = grp.attrs + arr_subtype = "attribute" + + if not arr_offset_ptr.is_readable(): + vollog.log( + constants.LOGLEVEL_V, + f"Cannot dereference the pointer to the NULL-terminated list of binary attributes for module at offset {self.vol.offset:#x}", + ) + return 0 - idx = 0 - while arr[idx] and arr[idx].is_readable(): - idx = idx + 1 - return idx + # We chose 100 as an arbitrary guard value to prevent + # looping forever in extreme cases, and because 100 is not expected + # to be a valid number of sections. If that still happens, + # Vol3 module processing will indicate that it is missing information + # with the following message: + # "Unable to reconstruct the ELF for module struct at" + # See PR #1773 for more information. + bin_attrs_list = utility.dynamically_sized_array_of_pointers( + context=self._context, + array=arr_offset_ptr.dereference(), + subtype=self.get_symbol_table_name() + constants.BANG + arr_subtype, + iterator_guard_value=100, + ) + return len(bin_attrs_list) @functools.cached_property def number_of_sections(self) -> int: + # Dropped in 6.14-rc1: d8959b947a8dfab1047c6fd5e982808f65717bfe if self.sect_attrs.has_member("nsections"): return self.sect_attrs.nsections @@ -205,15 +225,18 @@ def number_of_sections(self) -> int: def get_sections(self) -> Iterable[interfaces.objects.ObjectInterface]: """Get a list of section attributes for the given module.""" + if self.number_of_sections == 0: + vollog.debug( + f"Invalid number of sections ({self.number_of_sections}) for module at offset {self.vol.offset:#x}" + ) + return [] symbol_table_name = self.get_symbol_table_name() arr = self._context.object( symbol_table_name + constants.BANG + "array", layer_name=self.vol.layer_name, offset=self.sect_attrs.attrs.vol.offset, - subtype=self._context.symbol_space.get_type( - symbol_table_name + constants.BANG + "module_sect_attr" - ), + subtype=self.sect_attrs.attrs.vol.subtype, count=self.number_of_sections, ) @@ -3157,7 +3180,9 @@ def get_name(self) -> Optional[str]: """ if hasattr(self, "battr"): try: - return utility.pointer_to_string(self.battr.attr.name, count=32) + return utility.pointer_to_string( + self.battr.attr.name, count=linux_constants.ATTRIBUTE_NAME_MAX_SIZE + ) except exceptions.InvalidAddressException: # if battr is present then its name attribute needs to be valid vollog.debug(f"Invalid battr name for section at {self.vol.offset:#x}") @@ -3165,14 +3190,18 @@ def get_name(self) -> Optional[str]: elif self.name.vol.type_name == "array": try: - return utility.array_to_string(self.name, count=32) + return utility.array_to_string( + self.name, count=linux_constants.ATTRIBUTE_NAME_MAX_SIZE + ) except exceptions.InvalidAddressException: # specifically do not return here to give `mattr` a chance vollog.debug(f"Invalid direct name for section at {self.vol.offset:#x}") elif self.name.vol.type_name == "pointer": try: - return utility.pointer_to_string(self.name, count=32) + return utility.pointer_to_string( + self.name, count=linux_constants.ATTRIBUTE_NAME_MAX_SIZE + ) except exceptions.InvalidAddressException: # specifically do not return here to give `mattr` a chance vollog.debug( @@ -3182,10 +3211,33 @@ def get_name(self) -> Optional[str]: # if everything else failed... if hasattr(self, "mattr"): try: - return utility.pointer_to_string(self.mattr.attr.name, count=32) + return utility.pointer_to_string( + self.mattr.attr.name, count=linux_constants.ATTRIBUTE_NAME_MAX_SIZE + ) except exceptions.InvalidAddressException: vollog.debug( f"Unresolvable name for for section at {self.vol.offset:#x}" ) return None + + +class bin_attribute(objects.StructType): + def get_name(self) -> Optional[str]: + """ + Performs extraction of the bin_attribute name + """ + try: + return utility.pointer_to_string( + self.attr.name, count=linux_constants.ATTRIBUTE_NAME_MAX_SIZE + ) + except exceptions.InvalidAddressException: + vollog.debug(f"Invalid attr name for bin_attribute at {self.vol.offset:#x}") + return None + + @property + def address(self) -> int: + """Equivalent to module_sect_attr.address: + - https://github.com/torvalds/linux/commit/4b2c11e4aaf7e3d7fd9ce8e5995a32ff5e27d74f + """ + return self.private diff --git a/volatility3/framework/symbols/linux/utilities/module_extract.py b/volatility3/framework/symbols/linux/utilities/module_extract.py index e55f776682..5ec2543683 100644 --- a/volatility3/framework/symbols/linux/utilities/module_extract.py +++ b/volatility3/framework/symbols/linux/utilities/module_extract.py @@ -11,11 +11,7 @@ ) from volatility3 import framework -from volatility3.framework import ( - interfaces, - exceptions, - symbols, -) +from volatility3.framework import interfaces, exceptions, symbols, deprecation from volatility3.framework.constants import linux as linux_constants from volatility3.framework.symbols.linux import extensions @@ -35,54 +31,20 @@ # ModuleExtract.extract_module is the entry point and only visible method for plugins +# See PR #1773 +@deprecation.renamed_class( + deprecated_class_name="ModuleExtract", + removal_date="2026-06-01", + message="volatility3.framework.symbols.linux.utilities.module_extract.ModuleExtract is to be deprecated. Use volatility3.framework.symbols.linux.utilities.modules.ModuleExtract instead.", +) class ModuleExtract(interfaces.configuration.VersionableInterface): """Extracts Linux kernel module structures into an analyzable ELF file""" - _version = (1, 0, 0) + _version = (1, 0, 1) _required_framework_version = (2, 25, 0) framework.require_interface_version(*_required_framework_version) - @classmethod - def _get_module_section_count( - cls, - context: interfaces.context.ContextInterface, - vmlinux_name: str, - module: extensions.module, - grp: interfaces.objects.ObjectInterface, - ) -> int: - """ - Used to manually determine the section count for kernels that do not track - this count directly within the attribute structures - """ - kernel = context.modules[vmlinux_name] - - count = 0 - - try: - array = kernel.object( - object_type="array", - offset=grp.attrs, - sub_type=kernel.get_type("pointer"), - count=50, - absolute=True, - ) - - # Walk up to 50 sections counting until we reach the end or a page fault - for sect in array: - if sect.vol.offset == 0: - break - - count += 1 - - except exceptions.InvalidAddressException: - # Use whatever count we reached before the error - vollog.debug( - f"Exception hit counting sections for module at {module.vol.offset:#x}" - ) - - return count - @classmethod def _find_section( cls, section_lookups: List[Tuple[str, int, int, int]], sym_address: int @@ -261,54 +223,6 @@ def _fix_sym_table( return sym_table_data - @classmethod - def _enumerate_original_sections( - cls, - context: interfaces.context.ContextInterface, - vmlinux_name: str, - module: extensions.module, - ) -> Optional[Dict[int, str]]: - """ - Enumerates the module's sections as maintained by the kernel after load time - 'Early' sections like .init.text and .init.data are discarded after module - initialization, so they are not expected to be in memory during extraction - """ - if hasattr(module.sect_attrs, "nsections"): - num_sections = module.sect_attrs.nsections - else: - num_sections = cls._get_module_section_count( - context, vmlinux_name, module.sect_attrs.grp - ) - - if num_sections > 1024 or num_sections == 0: - vollog.debug( - f"Invalid number of sections ({num_sections}) for module at offset {module.vol.offset:#x}" - ) - return None - - vmlinux = context.modules[vmlinux_name] - - # This is declared as a zero sized array, so we create ourselves - attribute_type = module.sect_attrs.attrs.vol.subtype - - sect_array = vmlinux.object( - object_type="array", - subtype=attribute_type, - offset=module.sect_attrs.attrs.vol.offset, - count=num_sections, - absolute=True, - ) - - sections: Dict[int, str] = {} - - # for each section, gather its name and address - for index, section in enumerate(sect_array): - name = section.get_name() - - sections[section.address] = name - - return sections - @classmethod def _parse_sections( cls, @@ -325,10 +239,12 @@ def _parse_sections( The data of .strtab is read directly off the module structure and not its section as the section from the original module has no meaning after loading as the kernel does not reference it. """ - original_sections = cls._enumerate_original_sections( - context, vmlinux_name, module - ) - if original_sections is None: + original_sections = {} + for index, section in enumerate(module.get_sections()): + name = section.get_name() + original_sections[section.address] = name + + if not original_sections: return None kernel = context.modules[vmlinux_name] @@ -702,9 +618,10 @@ def extract_module( return None # Gather sections - updated_sections, strtab_index, symtab_index = cls._parse_sections( - context, vmlinux_name, module - ) + parse_sections_result = cls._parse_sections(context, vmlinux_name, module) + if parse_sections_result is None: + return None + updated_sections, strtab_index, symtab_index = parse_sections_result kernel = context.modules[vmlinux_name] diff --git a/volatility3/framework/symbols/linux/utilities/modules.py b/volatility3/framework/symbols/linux/utilities/modules.py index 2b16ec6e08..63b25380ae 100644 --- a/volatility3/framework/symbols/linux/utilities/modules.py +++ b/volatility3/framework/symbols/linux/utilities/modules.py @@ -1,5 +1,7 @@ import logging import warnings +import functools +import struct from abc import ABCMeta, abstractmethod from typing import ( Callable, @@ -15,7 +17,6 @@ Union, ) -import volatility3.framework.symbols.linux.utilities.module_extract as linux_utilities_module_extract from volatility3 import framework from volatility3.framework import ( constants, @@ -24,12 +25,14 @@ interfaces, objects, renderers, + symbols, ) from volatility3.framework.configuration import requirements from volatility3.framework.objects import utility from volatility3.framework.renderers import format_hints from volatility3.framework.symbols.linux import extensions from volatility3.framework.symbols.linux.utilities import tainting +from volatility3.framework.constants import linux as linux_constants vollog = logging.getLogger(__name__) @@ -71,7 +74,7 @@ def gather_modules( class Modules(interfaces.configuration.VersionableInterface): """Kernel modules related utilities.""" - _version = (3, 0, 1) + _version = (3, 0, 2) _required_framework_version = (2, 0, 0) framework.require_interface_version(*_required_framework_version) @@ -311,6 +314,7 @@ def run_modules_scanners( return run_results @staticmethod + @functools.lru_cache def get_modules_memory_boundaries( context: interfaces.context.ContextInterface, vmlinux_module_name: str, @@ -781,6 +785,740 @@ def get_load_parameters( yield name, value +# This module is responsible for producing an ELF file of a kernel module (LKM) loaded in memory +# This extraction task is quite complicated as the Linux kernel discards the ELF header at load time +# Due to this, to support static analysis, we must create an ELF header and proper file based on the sections +# There are also several other significant complications that we must deal with when trying to extract an LKM +# that can be analyzed with static analysis tools +# First, the .strtab points somewhere random and is kept off the module structure, not with the other sections +# Second, all of the symbols (.symtab) have mangled members that we must patch for anything to make sense +# Third, the section name string table (.shstrtab) is not an allocated section, meaning its not in memory +# Not having the .shstrtab makes analysis impossible-to-difficult for static analysis tools. To work around this, +# we create the .shstrtab based on the sections in memory and then glue it in as the final section + + +# ModuleExtract.extract_module is the entry point and only visible method for plugins +class ModuleExtract(interfaces.configuration.VersionableInterface): + """Extracts Linux kernel module structures into an analyzable ELF file""" + + _version = (1, 0, 2) + _required_framework_version = (2, 25, 0) + + framework.require_interface_version(*_required_framework_version) + + @classmethod + def get_requirements(cls) -> List[interfaces.configuration.RequirementInterface]: + return [ + requirements.VersionRequirement( + name="linux_utilities_modules_modules", + component=Modules, + version=(3, 0, 2), + ), + ] + + @classmethod + def _find_section( + cls, section_lookups: List[Tuple[str, int, int, int]], sym_address: int + ) -> Optional[Tuple[str, int, int, int]]: + """ + Finds the section containing `sym_address` + """ + for name, index, address, size in section_lookups: + if address <= sym_address < address + size: + return name, index, address, size + + return None + + @classmethod + def _get_st_info_for_sym( + cls, sym: interfaces.objects.ObjectInterface, sym_address: int, sect_name: str + ) -> bytes: + """ + This is a helper function called from `_fix_sym_table` + + Calculates the `st_info` value for the given symbol + + Spec: https://refspecs.linuxbase.org/elf/gabi4+/ch4.symtab.html + """ + if sym.st_name > 0: + # Global symbol + bind = linux_constants.STB_GLOBAL + + if sym_address == 0: + sect_type = linux_constants.STT_NOTYPE + elif sect_name: + # rela = relocations + if sect_name.find(".text") != -1 and sect_name.find(".rela") == -1: + sect_type = linux_constants.STT_FUNC + else: + sect_type = linux_constants.STT_OBJECT + + else: + # outside the module being extracted + sect_type = linux_constants.STT_NOTYPE + + else: + # Local symbol + bind = linux_constants.STB_LOCAL + sect_type = linux_constants.STT_SECTION + + # Build the st_info as ELF32_ST_INFO/ELF64_ST_INFO + bind_bits = (bind << 4) & 0xF0 + type_bits = sect_type & 0xF + + st_info_int = (bind_bits | type_bits) & 0xFF + + return struct.pack("B", st_info_int) + + @classmethod + def _get_fixed_sym_fields( + cls, + st_fmt: str, + sym: interfaces.objects.ObjectInterface, + sections: List[Tuple[str, int, int, int]], + ) -> Tuple[str, int, int, int]: + """ + This is a helper function called from `_fix_sym_table` + + The st_value, st_info, and st_shndx fields of each symbol are changed/mangled while loading + Static analysis tools do not understand these transformed values as they only make sense to the kernel loader + We must de-mangle these to have analysis tools understand symbols (a key aspect) + """ + # Start by trying to map a symbol to its section + sym_address = sym.st_value + sect_info = cls._find_section(sections, sym_address) + + if not sect_info: + # Symbol does not point into the module being extracted + sect_name, sect_index, sect_address = None, None, None + st_value_int = sym_address + else: + # relative address inside the section + sect_name, sect_index, sect_address, _ = sect_info + st_value_int = sym_address - sect_address + + # Get the fixed st_value, st_info, and st_shndx that are broken in the mapped file + + # formatted to be written into the extracted file + st_value = struct.pack(st_fmt, st_value_int) + + # returns formatted to be written into the extracted file + st_info = cls._get_st_info_for_sym(sym, sym_address, sect_name) + + # format to reference its section, if any + if sect_name: + st_shndx = struct.pack(" Optional[bytes]: + """ + Args: + context: The context on which to operate. + vmlinux_name: The name of the kernel module. + original_sections: Dict of module section addresses and names. + section_sizes: Dict of module section addresses and sizes. + sym_type_name: ELF symbol type name (should be one of "Elf64_Sym" or "Elf32_Sym"). + st_fmt: "struct"-like unpack format string (should be one of " Optional[Tuple[List, int, int]]: + """ + This function first parses the sections as maintained by the kernel + It then orders the sections by load address, and then gathers the data of each section + We also track the file_offset to correctly have alignment in the output file + + .symtab requires special handling as its so broken in memory as described in `_fix_sym_table` + The data of .strtab is read directly off the module structure and not its section + as the section from the original module has no meaning after loading as the kernel does not reference it. + """ + kernel = context.modules[vmlinux_name] + kernel_layer = context.layers[kernel.layer_name] + modules_addr_min, modules_addr_max = Modules.get_modules_memory_boundaries( + context, vmlinux_name + ) + modules_addr_min &= kernel_layer.address_mask + modules_addr_max &= kernel_layer.address_mask + original_sections = {} + for index, section in enumerate(module.get_sections()): + # Extra sanity check, to prevent OOM on heavily smeared samples at line + # "size = next_address - address" + if not ( + modules_addr_min + <= section.address & kernel_layer.address_mask + < modules_addr_max + ): + continue + + name = section.get_name() + original_sections[section.address] = name + + if not original_sections: + return None + + if symbols.symbol_table_is_64bit(context, kernel.symbol_table_name): + sym_type = "Elf64_Sym" + elf_hdr_type = "Elf64_Ehdr" + st_fmt = " Optional[bytes]: + """ + Creates a `bits` bit ELF header for the file based on recovered values + Called last as it needs information computed from the sections + + Spec: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html + """ + if bits == 32: + fmt = " Optional[int]: + """ + This function makes a best effort to map common section names + to their attributes + """ + known_sections = { + ".note.gnu.build-id": linux_constants.SHT_NOTE, + ".text": linux_constants.SHT_PROGBITS, + ".init.text": linux_constants.SHT_PROGBITS, + ".exit.text": linux_constants.SHT_PROGBITS, + ".static_call.text": linux_constants.SHT_PROGBITS, + ".rodata": linux_constants.SHT_PROGBITS, + ".modinfo": linux_constants.SHT_PROGBITS, + "__param": linux_constants.SHT_PROGBITS, + ".data": linux_constants.SHT_PROGBITS, + ".gnu.linkonce.this_module": linux_constants.SHT_PROGBITS, + ".comment": linux_constants.SHT_PROGBITS, + ".shstrtab": linux_constants.SHT_STRTAB, + ".symtab": linux_constants.SHT_SYMTAB, + ".strtab": linux_constants.SHT_STRTAB, + } + + sect_type_val = linux_constants.SHT_PROGBITS + + if section_name.find(".rela.") != -1: + sect_type_val = linux_constants.SHT_RELA + + elif section_name in known_sections: + sect_type_val = known_sections[section_name] + + return sect_type_val + + # all sections from memory are allocated (SHF_ALLOC) + # special check certain other sections to try and ensure extra flags are added where needed + @classmethod + def _calc_sect_flags(cls, name: str) -> int: + """ + Make a best effort to map common section names to their permissions + If we miss a section here, users of common static analysis tools can mark the + sections are writable or executable manually, but that becomes very cumbersome + and breaks initial analysis by the tool + """ + # All sections in memory are allocated (`A` in readelf -S) + flags = linux_constants.SHF_ALLOC + + if name in [".text", ".init.text", ".exit.text", ".static_call.text"]: + flags = flags | linux_constants.SHF_EXECINSTR + + elif name in [ + ".data", + ".init.data", + ".exit.data", + ".bss", + "__tracepoints", + ".data.once", + "_ftrace_events", + ".gnu.linkonce.this_module", + ]: + flags = flags | linux_constants.SHF_WRITE + + return flags + + @classmethod + def _calc_link( + cls, name: str, strtab_index: int, symtab_index: int, sect_type: int + ) -> int: + """ + Calculates the link value for a section + + The most important ones are symtab indexes for relocations + and to point the symbol table to the string tab + + Spec: https://refspecs.linuxbase.org/elf/gabi4+/ch4.sheader.html + """ + # looking for RELA sections + if name.find(".rela.") != -1: + return symtab_index + + # per spec: "The section header index of the associated string table." + elif sect_type == linux_constants.SHT_SYMTAB: + return strtab_index + + return 0 + + @classmethod + def _calc_entsize(cls, name: str, sect_type: int, bits: int) -> int: + """ + Calculates the entsize for relocation sections and the symbol table section + + Spec: https://refspecs.linuxbase.org/elf/gabi4+/ch4.sheader.html + """ + # looking for RELA sections + if name.find(".rela.") != -1: + return 24 + + # per spec: "The section header index of the associated string table." + elif sect_type == linux_constants.SHT_SYMTAB: + if bits == 32: + return 16 + else: + return 24 + + return 0 + + @classmethod + def _make_section_header( + cls, + bits: int, + name_index: int, + name: str, + address: int, + size: int, + file_offset: int, + strtab_index: int, + symtab_index: int, + ) -> Optional[bytes]: + """ + Creates a section header (Elf32_Shdr or Elf64_Shdr) for the given section + """ + if bits == 32: + fmt = " Optional[bytes]: + # Bail early if bad address sent in + try: + hasattr(module.sect_attrs, "nsections") + except exceptions.InvalidAddressException: + vollog.debug(f"module at offset {module.vol.offset:#x} is paged out.") + return None + + # Gather sections + parse_sections_result = cls._parse_sections(context, vmlinux_name, module) + if parse_sections_result is None: + return None + updated_sections, strtab_index, symtab_index = parse_sections_result + + kernel = context.modules[vmlinux_name] + + # Figure out header sizes + if symbols.symbol_table_is_64bit(context, kernel.symbol_table_name): + header_type = "Elf64_Ehdr" + section_type = "Elf64_Shdr" + bits = 64 + else: + header_type = "Elf32_Ehdr" + section_type = "Elf32_Shdr" + bits = 32 + + header_type_size = kernel.get_type(header_type).size + section_type_size = kernel.get_type(section_type).size + + # Per Linux-spec, all LKMs must start with a null section header + # This buffer is used to hold the headers as they are built + sections_headers = b"\x00" * section_type_size + + # Holder of the data of the sections + sections_data = b"" + + # the .shstrtab section is "\x00" + section name for each section + # followed by a terminating null. + # It starts with the null string (\x00) + shstrtab_data = b"\x00" + + # Track where we end the sections and data to glue `.shstrtab` after + last_file_offset = None + last_sect_size = None + + # Start at 1 in the string table + name_index = 1 + + # Create the actual section headers + for index, (name, address, file_offset, section_data) in enumerate( + updated_sections + ): + # Make the section header + header_bytes = cls._make_section_header( + bits, + name_index, + name, + address, + len(section_data), + file_offset, + strtab_index, + symtab_index, + ) + if not header_bytes: + vollog.debug(f"make_section_header failed for section {name}") + return None + + # ndex into the string table + name_index += len(name) + 1 + + # concatenate the header and section bytes + sections_headers += header_bytes + sections_data += section_data + + # track where we are so .shstrtab goes into correct offset + last_file_offset = file_offset + last_sect_size = len(section_data) + + # append each section name to what will become .shstrtab + shstrtab_data += bytes(name, encoding="utf8") + b"\x00" + + # stick our own section reference string at end + # name_index points to the end of the last section string after the loop ends + shstrtab_data += b".shstrtab\x00" + + # create our .shstrtab section so sections have names + sections_headers += cls._make_section_header( + bits, + name_index, + ".shstrtab", + 0, + len(shstrtab_data), + last_file_offset + last_sect_size, + strtab_index, + symtab_index, + ) + + sections_data += shstrtab_data + + num_sections = len(updated_sections) + 1 + + header = cls._make_elf_header( + bits, + header_type_size + len(sections_data), + num_sections, + ) + + if not header: + vollog.error( + f"Hit error creating Elf header for module at {module.vol.offset:#x}" + ) + return None + + # Return our beautiful, hand-crafted, farm raised ELF file + return header + sections_data + sections_headers + + class ModuleGathererLsmod(ModuleGathererInterface): """ Gathers modules from the main kernel list @@ -976,7 +1714,7 @@ def generate_results( file_name = renderers.NotApplicableValue() if dump and open_implementation: - elf_data = linux_utilities_module_extract.ModuleExtract.extract_module( + elf_data = ModuleExtract.extract_module( context, kernel_module_name, module ) if not elf_data: