diff --git a/src/auditwheel/audit.rs b/src/auditwheel/audit.rs index 1c34a68a3..1f7fc3cbe 100644 --- a/src/auditwheel/audit.rs +++ b/src/auditwheel/audit.rs @@ -1,78 +1,8 @@ -use super::musllinux::{find_musl_libc, get_musl_version}; -use super::policy::{MANYLINUX_POLICIES, MUSLLINUX_POLICIES, Policy}; -use crate::auditwheel::{PlatformTag, find_external_libs}; -use crate::compile::BuildArtifact; -use crate::target::{Arch, Target}; +use crate::target::Target; use anyhow::{Context, Result, bail}; -use fs_err::File; -use goblin::elf::{Elf, sym::STB_WEAK, sym::STT_FUNC}; -use lddtree::Library; -use once_cell::sync::Lazy; -use regex::Regex; use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet}; -use std::io::Read; +use std::fmt; use std::path::{Path, PathBuf}; -use std::{fmt, io}; -use thiserror::Error; -use tracing::debug; - -static IS_LIBPYTHON: Lazy = - Lazy::new(|| Regex::new(r"^libpython3\.\d+m?u?t?\.so\.\d+\.\d+$").unwrap()); - -/// Returns `true` if the given shared-library name is a dynamic linker -/// (e.g. `ld-linux-x86-64.so.2`, `ld64.so.2`, `ld-musl-*.so.1`). -pub(crate) fn is_dynamic_linker(name: &str) -> bool { - name.starts_with("ld-linux") - || name == "ld64.so.2" - || name == "ld64.so.1" - || name.starts_with("ld-musl") -} - -/// Error raised during auditing an elf file for manylinux/musllinux compatibility -#[derive(Error, Debug)] -#[error("Ensuring manylinux/musllinux compliance failed")] -pub enum AuditWheelError { - /// The wheel couldn't be read - #[error("Failed to read the wheel")] - IoError(#[source] io::Error), - /// Reexports goblin parsing errors - #[error("Goblin failed to parse the elf file")] - GoblinError(#[source] goblin::error::Error), - /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending - /// libraries. - #[error( - "Your library links libpython ({0}), which libraries must not do. Have you forgotten to activate the extension-module feature?" - )] - LinksLibPythonError(String), - /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending - /// libraries. - #[error( - "Your library is not {0} compliant because it links the following forbidden libraries: {1:?}" - )] - LinksForbiddenLibrariesError(Policy, Vec), - /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending - /// libraries. - #[error( - "Your library is not {0} compliant because of the presence of too-recent versioned symbols: {1:?}. Consider building in a manylinux docker container" - )] - VersionedSymbolTooNewError(Policy, Vec), - /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending - /// libraries with blacked-list symbols. - #[error("Your library is not {0} compliant because it depends on black-listed symbols: {1:?}")] - BlackListedSymbolsError(Policy, Vec), - /// The elf file isn't manylinux/musllinux compatible. Contains unsupported architecture - #[error("Your library is not {0} compliant because it has unsupported architecture: {1}")] - UnsupportedArchitecture(Policy, String), - /// This platform tag isn't defined by auditwheel yet - #[error( - "{0} compatibility policy is not defined by auditwheel yet, pass `--auditwheel=skip` to proceed anyway" - )] - UndefinedPolicy(PlatformTag), - /// Failed to analyze external shared library dependencies of the wheel - #[error("Failed to analyze external shared library dependencies of the wheel")] - DependencyAnalysisError(#[source] lddtree::Error), -} /// Auditwheel mode #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, clap::ValueEnum)] @@ -98,354 +28,6 @@ impl fmt::Display for AuditWheelMode { } } -#[derive(Clone, Debug)] -pub struct VersionedLibrary { - /// library name - pub name: String, - /// versions needed - versions: HashSet, -} - -impl VersionedLibrary { - /// Parse version strings (e.g. "GLIBC_2.17") into a map of name -> set of versions. - /// e.g. {"GLIBC" -> {"2.17", "2.5"}, "GCC" -> {"3.0"}} - /// - fn parsed_versions(&self) -> HashMap> { - let mut result: HashMap> = HashMap::new(); - for v in &self.versions { - if let Some((name, version)) = v.split_once('_') { - result - .entry(name.to_string()) - .or_default() - .insert(version.to_string()); - } - } - result - } -} - -/// Find required dynamic linked libraries with version information -pub fn find_versioned_libraries(elf: &Elf) -> Vec { - let mut symbols = Vec::new(); - if let Some(verneed) = &elf.verneed { - for need_file in verneed.iter() { - if let Some(name) = elf.dynstrtab.get_at(need_file.vn_file) { - // Skip dynamic linker/loader - if is_dynamic_linker(name) { - continue; - } - let mut versions = HashSet::new(); - for need_ver in need_file.iter() { - if let Some(aux_name) = elf.dynstrtab.get_at(need_ver.vna_name) { - versions.insert(aux_name.to_string()); - } - } - symbols.push(VersionedLibrary { - name: name.to_string(), - versions, - }); - } - } - } - symbols -} - -/// Find incompliant symbols from symbol versions -#[allow(clippy::result_large_err)] -fn find_incompliant_symbols( - elf: &Elf, - symbol_versions: &[String], -) -> Result, AuditWheelError> { - let mut symbols = Vec::new(); - let strtab = &elf.strtab; - for sym in &elf.syms { - if sym.st_type() == STT_FUNC { - let name = strtab.get_at(sym.st_name).unwrap_or("BAD NAME"); - for symbol_version in symbol_versions { - if name.ends_with(&format!("@{symbol_version}")) { - symbols.push(name.to_string()); - } - } - } - } - Ok(symbols) -} - -#[allow(clippy::result_large_err)] -fn policy_is_satisfied( - policy: &Policy, - elf: &Elf, - arch: &str, - deps: &[String], - versioned_libraries: &[VersionedLibrary], - allow_linking_libpython: bool, -) -> Result<(), AuditWheelError> { - let arch_versions = &policy.symbol_versions.get(arch).ok_or_else(|| { - AuditWheelError::UnsupportedArchitecture(policy.clone(), arch.to_string()) - })?; - let mut offending_libs = HashSet::new(); - let mut offending_versioned_syms = HashSet::new(); - let mut offending_blacklist_syms = HashMap::new(); - let undef_symbols: HashSet = elf - .dynsyms - .iter() - .filter_map(|sym| { - // Do not consider weak symbols as undefined, they are optional at runtime. - if sym.st_shndx == goblin::elf::section_header::SHN_UNDEF as usize - && sym.st_bind() != STB_WEAK - { - elf.dynstrtab.get_at(sym.st_name).map(ToString::to_string) - } else { - None - } - }) - .collect(); - - for dep in deps { - if is_dynamic_linker(dep) { - continue; - } - if !policy.lib_whitelist.contains(dep) { - if allow_linking_libpython && IS_LIBPYTHON.is_match(dep) { - continue; - } - offending_libs.insert(dep.clone()); - } - if let Some(sym_list) = policy.blacklist.get(dep) { - let mut intersection: Vec<_> = sym_list.intersection(&undef_symbols).cloned().collect(); - if !intersection.is_empty() { - intersection.sort(); - offending_blacklist_syms.insert(dep, intersection); - } - } - } - for library in versioned_libraries { - if !policy.lib_whitelist.contains(&library.name) { - offending_libs.insert(library.name.clone()); - continue; - } - for (name, versions_needed) in library.parsed_versions() { - let Some(versions_allowed) = arch_versions.get(&name) else { - offending_versioned_syms.insert(format!( - "{} offending versions: unknown symbol namespace {name}", - library.name, - )); - continue; - }; - if !versions_needed.is_subset(versions_allowed) { - let offending_versions: Vec<&str> = versions_needed - .difference(versions_allowed) - .map(|v| v.as_ref()) - .collect(); - let offending_symbol_versions: Vec = offending_versions - .iter() - .map(|v| format!("{name}_{v}")) - .collect(); - let offending_symbols = find_incompliant_symbols(elf, &offending_symbol_versions)?; - let offender = if offending_symbols.is_empty() { - format!( - "{} offending versions: {}", - library.name, - offending_symbol_versions.join(", ") - ) - } else { - format!( - "{} offending symbols: {}", - library.name, - offending_symbols.join(", ") - ) - }; - offending_versioned_syms.insert(offender); - } - } - } - // Check for black-listed symbols - if !offending_blacklist_syms.is_empty() { - let offenders = offending_blacklist_syms - .into_iter() - .map(|(lib, syms)| format!("{}: {}", lib, syms.join(", "))) - .collect(); - return Err(AuditWheelError::BlackListedSymbolsError( - policy.clone(), - offenders, - )); - } - // Check for too-recent versioned symbols - if !offending_versioned_syms.is_empty() { - return Err(AuditWheelError::VersionedSymbolTooNewError( - policy.clone(), - offending_versioned_syms.into_iter().collect(), - )); - } - // Check for libpython and forbidden libraries - let offenders: Vec = offending_libs.into_iter().collect(); - match offenders.as_slice() { - [] => Ok(()), - [lib] if IS_LIBPYTHON.is_match(lib) => { - Err(AuditWheelError::LinksLibPythonError(lib.clone())) - } - offenders => Err(AuditWheelError::LinksForbiddenLibrariesError( - policy.clone(), - offenders.to_vec(), - )), - } -} - -fn get_default_platform_policies() -> Vec { - if let Ok(Some(musl_libc)) = find_musl_libc() - && let Ok(Some((major, minor))) = get_musl_version(musl_libc) - { - return MUSLLINUX_POLICIES - .iter() - .filter(|policy| { - policy.name == "linux" || policy.name == format!("musllinux_{major}_{minor}") - }) - .cloned() - .collect(); - } - MANYLINUX_POLICIES.clone() -} - -/// An reimplementation of auditwheel, which checks elf files for -/// manylinux/musllinux compliance. -/// -/// If `platform_tag`, is None, it returns the the highest matching manylinux/musllinux policy -/// and whether we need to repair with patchelf,, or `linux` if nothing else matches. -/// It will error for bogus cases, e.g. if libpython is linked. -/// -/// If a specific manylinux/musllinux version is given, compliance is checked and a warning printed if -/// a higher version would be possible. -/// -/// Does nothing for `platform_tag` set to `Off`/`Linux` or non-linux platforms. -#[allow(clippy::result_large_err)] -pub fn auditwheel_rs( - artifact: &BuildArtifact, - target: &Target, - platform_tag: Option, - allow_linking_libpython: bool, -) -> Result<(Policy, bool), AuditWheelError> { - if !target.is_linux() || platform_tag == Some(PlatformTag::Linux) { - return Ok((Policy::default(), false)); - } - let path = &artifact.path; - let arch = target.target_arch().to_string(); - let mut file = File::open(path).map_err(AuditWheelError::IoError)?; - let mut buffer = Vec::new(); - file.read_to_end(&mut buffer) - .map_err(AuditWheelError::IoError)?; - let elf = Elf::parse(&buffer).map_err(AuditWheelError::GoblinError)?; - // This returns essentially the same as ldd - let deps: Vec = elf.libraries.iter().map(ToString::to_string).collect(); - let versioned_libraries = find_versioned_libraries(&elf); - - // Find the highest possible policy, if any - let platform_policies = match platform_tag { - Some(PlatformTag::Manylinux { .. }) => MANYLINUX_POLICIES.clone(), - Some(PlatformTag::Musllinux { major, minor }) => MUSLLINUX_POLICIES - .clone() - .into_iter() - .filter(|policy| { - policy.name == "linux" || policy.name == format!("musllinux_{major}_{minor}") - }) - .map(|mut policy| { - policy.fixup_musl_libc_so_name(target.target_arch()); - policy - }) - .collect(), - None | Some(PlatformTag::Pypi) => { - // Using the default for the `pypi` tag means we're correctly using manylinux where - // possible. - let mut policies = get_default_platform_policies(); - for policy in &mut policies { - policy.fixup_musl_libc_so_name(target.target_arch()); - } - policies - } - Some(PlatformTag::Linux) => unreachable!(), - }; - let mut highest_policy = None; - let mut should_repair = false; - for policy in platform_policies.iter() { - let result = policy_is_satisfied( - policy, - &elf, - &arch, - &deps, - &versioned_libraries, - allow_linking_libpython, - ); - match result { - Ok(_) => { - highest_policy = Some(policy.clone()); - should_repair = false; - break; - } - Err(AuditWheelError::LinksForbiddenLibrariesError(..)) => { - highest_policy = Some(policy.clone()); - should_repair = true; - break; - } - Err(AuditWheelError::VersionedSymbolTooNewError(..)) - | Err(AuditWheelError::BlackListedSymbolsError(..)) - // UnsupportedArchitecture happens when trying 2010 with aarch64 - | Err(AuditWheelError::UnsupportedArchitecture(..)) => continue, - // If there was an error parsing the symbols or libpython was linked, - // we error no matter what the requested policy was - Err(err) => return Err(err), - } - } - - let policy = if let Some(platform_tag) = platform_tag { - let mut policy = Policy::from_tag(&platform_tag) - .ok_or(AuditWheelError::UndefinedPolicy(platform_tag))?; - policy.fixup_musl_libc_so_name(target.target_arch()); - - if let Some(highest_policy) = highest_policy { - // Don't recommend manylinux1 because rust doesn't support it anymore - if policy.priority < highest_policy.priority && highest_policy.name != "manylinux_2_5" { - eprintln!( - "📦 Wheel is eligible for a higher priority tag. \ - You requested {policy} but this wheel is eligible for {highest_policy}", - ); - } - } - - match policy_is_satisfied( - &policy, - &elf, - &arch, - &deps, - &versioned_libraries, - allow_linking_libpython, - ) { - Ok(_) => { - should_repair = false; - Ok(policy) - } - Err(AuditWheelError::LinksForbiddenLibrariesError(..)) => { - should_repair = true; - Ok(policy) - } - Err(err) => Err(err), - } - } else if let Some(policy) = highest_policy { - Ok(policy) - } else if target.target_arch() == Arch::Armv6L || target.target_arch() == Arch::Armv7L { - // Old arm versions - // https://github.com/pypi/warehouse/blob/556e1e3390999381c382873b003a779a1363cb4d/warehouse/forklift/legacy.py#L122-L123 - Ok(Policy::default()) - } else { - eprintln!( - "⚠️ Warning: No compatible platform tag found, using the linux tag instead. \ - You won't be able to upload those wheels to PyPI." - ); - - // Fallback to linux - Ok(Policy::default()) - }?; - Ok((policy, should_repair)) -} - /// Get sysroot path from target C compiler /// /// Currently only gcc is supported, clang doesn't have a `--print-sysroot` option @@ -501,208 +83,6 @@ pub fn get_sysroot_path(target: &Target) -> Result { Ok(PathBuf::from("/")) } -/// For the given compilation result, return the manylinux platform and the external libs -/// we need to add to repair it -pub fn get_policy_and_libs( - artifact: &BuildArtifact, - platform_tag: Option, - target: &Target, - manifest_path: &Path, - allow_linking_libpython: bool, -) -> Result<(Policy, Vec)> { - let (policy, should_repair) = - auditwheel_rs(artifact, target, platform_tag, allow_linking_libpython).with_context( - || { - if let Some(platform_tag) = platform_tag { - format!("Error ensuring {platform_tag} compliance") - } else { - "Error checking for manylinux/musllinux compliance".to_string() - } - }, - )?; - let external_libs = if should_repair { - let sysroot = get_sysroot_path(target).unwrap_or_else(|_| PathBuf::from("/")); - let mut ld_paths: Vec = artifact.linked_paths.iter().map(PathBuf::from).collect(); - - // Add library search paths from RUSTFLAGS - if let Some(rustflags_paths) = extract_rustflags_library_paths(manifest_path, target) { - ld_paths.extend(rustflags_paths); - } - - let external_libs = find_external_libs(&artifact.path, &policy, sysroot, ld_paths) - .with_context(|| { - if let Some(platform_tag) = platform_tag { - format!("Error repairing wheel for {platform_tag} compliance") - } else { - "Error repairing wheel for manylinux/musllinux compliance".to_string() - } - })?; - if allow_linking_libpython { - external_libs - .into_iter() - .filter(|lib| !IS_LIBPYTHON.is_match(&lib.name)) - .collect() - } else { - external_libs - } - } else { - Vec::new() - }; - - // Check external libraries for versioned symbol requirements that may - // require a stricter (less compatible, e.g. newer manylinux) policy than what - // the main artifact alone would need. See https://github.com/PyO3/maturin/issues/1490 - let policy = if !external_libs.is_empty() { - let (adjusted, offenders) = check_external_libs_policy(&policy, &external_libs, target)?; - if platform_tag.is_some() && !offenders.is_empty() { - let tag_kind = if policy.name.starts_with("musllinux") { - "musllinux" - } else { - "manylinux" - }; - bail!( - "External libraries {offenders:?} require newer symbol versions than {policy} allows. \ - Consider using --compatibility {adjusted} or a newer {tag_kind} tag" - ); - } - adjusted - } else { - policy - }; - - Ok((policy, external_libs)) -} - -/// Return the symbol versions required by external libraries that are not -/// allowed by the given policy, e.g. `["GLIBC_2.29", "GLIBC_2.33"]`. -fn unsatisfied_symbol_versions( - policy: &Policy, - arch: &str, - versioned_libraries: &[VersionedLibrary], -) -> Vec { - let arch_versions = match policy.symbol_versions.get(arch) { - Some(v) => v, - None => return vec!["(unsupported arch)".to_string()], - }; - let mut unsatisfied = Vec::new(); - for library in versioned_libraries { - if !policy.lib_whitelist.contains(&library.name) { - continue; - } - for (name, versions_needed) in library.parsed_versions() { - match arch_versions.get(&name) { - Some(versions_allowed) => { - for v in versions_needed.difference(versions_allowed) { - unsatisfied.push(format!("{name}_{v}")); - } - } - None => { - for v in &versions_needed { - unsatisfied.push(format!("{name}_{v}")); - } - } - } - } - } - unsatisfied.sort(); - unsatisfied -} - -/// Check if external libraries require a newer glibc than the current policy allows. -/// Returns the adjusted policy and a list of `"libfoo.so (GLIBC_2.29, GLIBC_2.33)"` -/// descriptions for libraries that caused a downgrade. -fn check_external_libs_policy( - policy: &Policy, - external_libs: &[Library], - target: &Target, -) -> Result<(Policy, Vec)> { - let arch = target.target_arch().to_string(); - let mut platform_policies = if policy.name.starts_with("musllinux") { - MUSLLINUX_POLICIES.clone() - } else if policy.name.starts_with("manylinux") { - MANYLINUX_POLICIES.clone() - } else { - get_default_platform_policies() - }; - for p in &mut platform_policies { - p.fixup_musl_libc_so_name(target.target_arch()); - } - // Policies must be sorted from highest to lowest priority so we find the - // best (most compatible) match first when iterating. - debug_assert!( - platform_policies - .windows(2) - .all(|w| w[0].priority >= w[1].priority) - ); - - let mut result = policy.clone(); - let mut offenders = Vec::new(); - for lib in external_libs { - let lib_path = match lib.realpath.as_ref() { - Some(path) => path, - None => continue, - }; - let buffer = fs_err::read(lib_path) - .with_context(|| format!("Failed to read external library {}", lib_path.display()))?; - let elf = match Elf::parse(&buffer) { - Ok(elf) => elf, - Err(_) => continue, - }; - let versioned_libraries = find_versioned_libraries(&elf); - if versioned_libraries.is_empty() { - continue; - } - - // Find the highest policy that this external library satisfies - let unsatisfied = unsatisfied_symbol_versions(&result, &arch, &versioned_libraries); - if unsatisfied.is_empty() { - continue; - } - for candidate in platform_policies.iter() { - if candidate.priority > result.priority { - continue; - } - if unsatisfied_symbol_versions(candidate, &arch, &versioned_libraries).is_empty() { - if candidate.priority < result.priority { - debug!( - "Downgrading tag to {candidate} because external library {} requires {}", - lib.name, - unsatisfied.join(", "), - ); - offenders.push(format!("{} ({})", lib.name, unsatisfied.join(", "))); - result = candidate.clone(); - } - break; - } - } - } - Ok((result, offenders)) -} - -/// Extract library search paths from RUSTFLAGS configuration -#[cfg_attr(test, allow(dead_code))] -fn extract_rustflags_library_paths(manifest_path: &Path, target: &Target) -> Option> { - let manifest_dir = manifest_path.parent()?; - let config = cargo_config2::Config::load_with_cwd(manifest_dir).ok()?; - let rustflags = config.rustflags(target.target_triple()).ok()??; - - // Encode the rustflags for parsing with the rustflags crate - let encoded = rustflags.encode().ok()?; - - let mut library_paths = Vec::new(); - for flag in rustflags::from_encoded(encoded.as_ref()) { - if let rustflags::Flag::LibrarySearchPath { kind: _, path } = flag { - library_paths.push(path); - } - } - - if library_paths.is_empty() { - None - } else { - Some(library_paths) - } -} - pub fn relpath(to: &Path, from: &Path) -> PathBuf { let mut suffix_pos = 0; for (f, t) in from.components().zip(to.components()) { @@ -726,7 +106,6 @@ pub fn relpath(to: &Path, from: &Path) -> PathBuf { #[cfg(test)] mod tests { - use crate::Target; use crate::auditwheel::audit::relpath; use pretty_assertions::assert_eq; use std::path::Path; @@ -745,85 +124,4 @@ mod tests { assert_eq!(result, Path::new(expected)); } } - - #[test] - fn test_extract_rustflags_library_paths() { - // Create a temporary directory with a Cargo.toml and .cargo/config.toml - let temp_dir = tempfile::tempdir().unwrap(); - let manifest_path = temp_dir.path().join("Cargo.toml"); - let cargo_dir = temp_dir.path().join(".cargo"); - let config_path = cargo_dir.join("config.toml"); - - // Create the directories - fs_err::create_dir_all(&cargo_dir).unwrap(); - - // Create a minimal Cargo.toml - fs_err::write( - &manifest_path, - r#" -[package] -name = "test-package" -version = "0.1.0" -edition = "2021" -"#, - ) - .unwrap(); - - // Create a config.toml with rustflags containing -L options - fs_err::write( - &config_path, - r#" -[build] -rustflags = ["-L", "dependency=/usr/local/lib", "-L", "/some/other/path", "-C", "opt-level=3"] -"#, - ) - .unwrap(); - - // Test the function - let target = Target::from_target_triple(None).unwrap(); - let paths = super::extract_rustflags_library_paths(&manifest_path, &target); - - if let Some(paths) = paths { - assert_eq!(paths.len(), 2); - assert!( - paths - .iter() - .any(|p| p.to_string_lossy() == "/usr/local/lib") - ); - assert!( - paths - .iter() - .any(|p| p.to_string_lossy() == "/some/other/path") - ); - } else { - // It's possible that rustflags parsing fails in some environments, - // so we just verify the function doesn't panic - println!("No rustflags library paths found, which is acceptable"); - } - } - - #[test] - fn test_extract_rustflags_library_paths_no_config() { - // Test with a directory that has no cargo config - let temp_dir = tempfile::tempdir().unwrap(); - let manifest_path = temp_dir.path().join("Cargo.toml"); - - // Create a minimal Cargo.toml - fs_err::write( - &manifest_path, - r#" -[package] -name = "test-package" -version = "0.1.0" -edition = "2021" -"#, - ) - .unwrap(); - - let target = Target::from_target_triple(None).unwrap(); - let paths = super::extract_rustflags_library_paths(&manifest_path, &target); - - // Should return None when there's no cargo config with rustflags - assert!(paths.is_none()); - } } diff --git a/src/auditwheel/linux.rs b/src/auditwheel/linux.rs new file mode 100644 index 000000000..7866c0254 --- /dev/null +++ b/src/auditwheel/linux.rs @@ -0,0 +1,886 @@ +//! Linux/ELF wheel audit and repair. +//! +//! This module implements [`WheelRepairer`] for Linux ELF binaries, +//! providing the Rust equivalent of [auditwheel](https://github.com/pypa/auditwheel). +//! +//! It contains all ELF-specific logic: manylinux/musllinux compliance +//! auditing, external dependency discovery via lddtree, versioned symbol +//! checking, and binary patching via `patchelf` (SONAME, DT_NEEDED, RPATH). + +use super::audit::{get_sysroot_path, relpath}; +use super::musllinux::{find_musl_libc, get_musl_version}; +use super::policy::{MANYLINUX_POLICIES, MUSLLINUX_POLICIES, Policy}; +use super::repair::{AuditedArtifact, GraftedLib, WheelRepairer}; +use super::{PlatformTag, patchelf}; +use crate::compile::BuildArtifact; +use crate::target::{Arch, Target}; +use anyhow::{Context, Result, bail}; +use fs_err::File; +use goblin::elf::{Elf, sym::STB_WEAK, sym::STT_FUNC}; +use lddtree::Library; +use once_cell::sync::Lazy; +use regex::Regex; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::io; +use std::io::Read; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use tracing::debug; + +pub(crate) static IS_LIBPYTHON: Lazy = + Lazy::new(|| Regex::new(r"^libpython3\.\d+m?u?t?\.so\.\d+\.\d+$").unwrap()); + +/// Returns `true` if the given shared-library name is a dynamic linker +/// (e.g. `ld-linux-x86-64.so.2`, `ld64.so.2`, `ld-musl-*.so.1`). +fn is_dynamic_linker(name: &str) -> bool { + name.starts_with("ld-linux") + || name == "ld64.so.2" + || name == "ld64.so.1" + || name.starts_with("ld-musl") +} + +/// Error raised during auditing an elf file for manylinux/musllinux compatibility +#[derive(Error, Debug)] +#[error("Ensuring manylinux/musllinux compliance failed")] +pub enum AuditWheelError { + /// The wheel couldn't be read + #[error("Failed to read the wheel")] + IoError(#[source] io::Error), + /// Reexports goblin parsing errors + #[error("Goblin failed to parse the elf file")] + GoblinError(#[source] goblin::error::Error), + /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending + /// libraries. + #[error( + "Your library links libpython ({0}), which libraries must not do. Have you forgotten to activate the extension-module feature?" + )] + LinksLibPythonError(String), + /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending + /// libraries. + #[error( + "Your library is not {0} compliant because it links the following forbidden libraries: {1:?}" + )] + LinksForbiddenLibrariesError(Policy, Vec), + /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending + /// libraries. + #[error( + "Your library is not {0} compliant because of the presence of too-recent versioned symbols: {1:?}. Consider building in a manylinux docker container" + )] + VersionedSymbolTooNewError(Policy, Vec), + /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending + /// libraries with blacked-list symbols. + #[error("Your library is not {0} compliant because it depends on black-listed symbols: {1:?}")] + BlackListedSymbolsError(Policy, Vec), + /// The elf file isn't manylinux/musllinux compatible. Contains unsupported architecture + #[error("Your library is not {0} compliant because it has unsupported architecture: {1}")] + UnsupportedArchitecture(Policy, String), + /// This platform tag isn't defined by auditwheel yet + #[error( + "{0} compatibility policy is not defined by auditwheel yet, pass `--auditwheel=skip` to proceed anyway" + )] + UndefinedPolicy(PlatformTag), + /// Failed to analyze external shared library dependencies of the wheel + #[error("Failed to analyze external shared library dependencies of the wheel")] + DependencyAnalysisError(#[source] lddtree::Error), +} + +#[derive(Clone, Debug)] +struct VersionedLibrary { + /// library name + name: String, + /// versions needed + versions: HashSet, +} + +impl VersionedLibrary { + /// Parse version strings (e.g. "GLIBC_2.17") into a map of name -> set of versions. + /// e.g. {"GLIBC" -> {"2.17", "2.5"}, "GCC" -> {"3.0"}} + /// + fn parsed_versions(&self) -> HashMap> { + let mut result: HashMap> = HashMap::new(); + for v in &self.versions { + if let Some((name, version)) = v.split_once('_') { + result + .entry(name.to_string()) + .or_default() + .insert(version.to_string()); + } + } + result + } +} + +/// Find required dynamic linked libraries with version information +fn find_versioned_libraries(elf: &Elf) -> Vec { + let mut symbols = Vec::new(); + if let Some(verneed) = &elf.verneed { + for need_file in verneed.iter() { + if let Some(name) = elf.dynstrtab.get_at(need_file.vn_file) { + // Skip dynamic linker/loader + if is_dynamic_linker(name) { + continue; + } + let mut versions = HashSet::new(); + for need_ver in need_file.iter() { + if let Some(aux_name) = elf.dynstrtab.get_at(need_ver.vna_name) { + versions.insert(aux_name.to_string()); + } + } + symbols.push(VersionedLibrary { + name: name.to_string(), + versions, + }); + } + } + } + symbols +} + +/// Find incompliant symbols from symbol versions +#[allow(clippy::result_large_err)] +fn find_incompliant_symbols( + elf: &Elf, + symbol_versions: &[String], +) -> Result, AuditWheelError> { + let mut symbols = Vec::new(); + let strtab = &elf.strtab; + for sym in &elf.syms { + if sym.st_type() == STT_FUNC { + let name = strtab.get_at(sym.st_name).unwrap_or("BAD NAME"); + for symbol_version in symbol_versions { + if name.ends_with(&format!("@{symbol_version}")) { + symbols.push(name.to_string()); + } + } + } + } + Ok(symbols) +} + +#[allow(clippy::result_large_err)] +fn policy_is_satisfied( + policy: &Policy, + elf: &Elf, + arch: &str, + deps: &[String], + versioned_libraries: &[VersionedLibrary], + allow_linking_libpython: bool, +) -> Result<(), AuditWheelError> { + let arch_versions = &policy.symbol_versions.get(arch).ok_or_else(|| { + AuditWheelError::UnsupportedArchitecture(policy.clone(), arch.to_string()) + })?; + let mut offending_libs = HashSet::new(); + let mut offending_versioned_syms = HashSet::new(); + let mut offending_blacklist_syms = HashMap::new(); + let undef_symbols: HashSet = elf + .dynsyms + .iter() + .filter_map(|sym| { + // Do not consider weak symbols as undefined, they are optional at runtime. + if sym.st_shndx == goblin::elf::section_header::SHN_UNDEF as usize + && sym.st_bind() != STB_WEAK + { + elf.dynstrtab.get_at(sym.st_name).map(ToString::to_string) + } else { + None + } + }) + .collect(); + + for dep in deps { + if is_dynamic_linker(dep) { + continue; + } + if !policy.lib_whitelist.contains(dep) { + if allow_linking_libpython && IS_LIBPYTHON.is_match(dep) { + continue; + } + offending_libs.insert(dep.clone()); + } + if let Some(sym_list) = policy.blacklist.get(dep) { + let mut intersection: Vec<_> = sym_list.intersection(&undef_symbols).cloned().collect(); + if !intersection.is_empty() { + intersection.sort(); + offending_blacklist_syms.insert(dep, intersection); + } + } + } + for library in versioned_libraries { + if !policy.lib_whitelist.contains(&library.name) { + offending_libs.insert(library.name.clone()); + continue; + } + for (name, versions_needed) in library.parsed_versions() { + let Some(versions_allowed) = arch_versions.get(&name) else { + offending_versioned_syms.insert(format!( + "{} offending versions: unknown symbol namespace {name}", + library.name, + )); + continue; + }; + if !versions_needed.is_subset(versions_allowed) { + let offending_versions: Vec<&str> = versions_needed + .difference(versions_allowed) + .map(|v| v.as_ref()) + .collect(); + let offending_symbol_versions: Vec = offending_versions + .iter() + .map(|v| format!("{name}_{v}")) + .collect(); + let offending_symbols = find_incompliant_symbols(elf, &offending_symbol_versions)?; + let offender = if offending_symbols.is_empty() { + format!( + "{} offending versions: {}", + library.name, + offending_symbol_versions.join(", ") + ) + } else { + format!( + "{} offending symbols: {}", + library.name, + offending_symbols.join(", ") + ) + }; + offending_versioned_syms.insert(offender); + } + } + } + // Check for black-listed symbols + if !offending_blacklist_syms.is_empty() { + let offenders = offending_blacklist_syms + .into_iter() + .map(|(lib, syms)| format!("{}: {}", lib, syms.join(", "))) + .collect(); + return Err(AuditWheelError::BlackListedSymbolsError( + policy.clone(), + offenders, + )); + } + // Check for too-recent versioned symbols + if !offending_versioned_syms.is_empty() { + return Err(AuditWheelError::VersionedSymbolTooNewError( + policy.clone(), + offending_versioned_syms.into_iter().collect(), + )); + } + // Check for libpython and forbidden libraries + let offenders: Vec = offending_libs.into_iter().collect(); + match offenders.as_slice() { + [] => Ok(()), + [lib] if IS_LIBPYTHON.is_match(lib) => { + Err(AuditWheelError::LinksLibPythonError(lib.clone())) + } + offenders => Err(AuditWheelError::LinksForbiddenLibrariesError( + policy.clone(), + offenders.to_vec(), + )), + } +} + +fn get_default_platform_policies() -> Vec { + if let Ok(Some(musl_libc)) = find_musl_libc() + && let Ok(Some((major, minor))) = get_musl_version(musl_libc) + { + return MUSLLINUX_POLICIES + .iter() + .filter(|policy| { + policy.name == "linux" || policy.name == format!("musllinux_{major}_{minor}") + }) + .cloned() + .collect(); + } + MANYLINUX_POLICIES.clone() +} + +/// An reimplementation of auditwheel, which checks elf files for +/// manylinux/musllinux compliance. +/// +/// If `platform_tag`, is None, it returns the the highest matching manylinux/musllinux policy +/// and whether we need to repair with patchelf,, or `linux` if nothing else matches. +/// It will error for bogus cases, e.g. if libpython is linked. +/// +/// If a specific manylinux/musllinux version is given, compliance is checked and a warning printed if +/// a higher version would be possible. +/// +/// Does nothing for `platform_tag` set to `Off`/`Linux` or non-linux platforms. +#[allow(clippy::result_large_err)] +fn auditwheel_rs( + artifact: &BuildArtifact, + target: &Target, + platform_tag: Option, + allow_linking_libpython: bool, +) -> Result<(Policy, bool), AuditWheelError> { + if !target.is_linux() || platform_tag == Some(PlatformTag::Linux) { + return Ok((Policy::default(), false)); + } + let path = &artifact.path; + let arch = target.target_arch().to_string(); + let mut file = File::open(path).map_err(AuditWheelError::IoError)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer) + .map_err(AuditWheelError::IoError)?; + let elf = Elf::parse(&buffer).map_err(AuditWheelError::GoblinError)?; + // This returns essentially the same as ldd + let deps: Vec = elf.libraries.iter().map(ToString::to_string).collect(); + let versioned_libraries = find_versioned_libraries(&elf); + + // Find the highest possible policy, if any + let platform_policies = match platform_tag { + Some(PlatformTag::Manylinux { .. }) => MANYLINUX_POLICIES.clone(), + Some(PlatformTag::Musllinux { major, minor }) => MUSLLINUX_POLICIES + .clone() + .into_iter() + .filter(|policy| { + policy.name == "linux" || policy.name == format!("musllinux_{major}_{minor}") + }) + .map(|mut policy| { + policy.fixup_musl_libc_so_name(target.target_arch()); + policy + }) + .collect(), + None | Some(PlatformTag::Pypi) => { + // Using the default for the `pypi` tag means we're correctly using manylinux where + // possible. + let mut policies = get_default_platform_policies(); + for policy in &mut policies { + policy.fixup_musl_libc_so_name(target.target_arch()); + } + policies + } + Some(PlatformTag::Linux) => unreachable!(), + }; + let mut highest_policy = None; + let mut should_repair = false; + for policy in platform_policies.iter() { + let result = policy_is_satisfied( + policy, + &elf, + &arch, + &deps, + &versioned_libraries, + allow_linking_libpython, + ); + match result { + Ok(_) => { + highest_policy = Some(policy.clone()); + should_repair = false; + break; + } + Err(AuditWheelError::LinksForbiddenLibrariesError(..)) => { + highest_policy = Some(policy.clone()); + should_repair = true; + break; + } + Err(AuditWheelError::VersionedSymbolTooNewError(..)) + | Err(AuditWheelError::BlackListedSymbolsError(..)) + // UnsupportedArchitecture happens when trying 2010 with aarch64 + | Err(AuditWheelError::UnsupportedArchitecture(..)) => continue, + // If there was an error parsing the symbols or libpython was linked, + // we error no matter what the requested policy was + Err(err) => return Err(err), + } + } + + let policy = if let Some(platform_tag) = platform_tag { + let mut policy = Policy::from_tag(&platform_tag) + .ok_or(AuditWheelError::UndefinedPolicy(platform_tag))?; + policy.fixup_musl_libc_so_name(target.target_arch()); + + if let Some(highest_policy) = highest_policy { + // Don't recommend manylinux1 because rust doesn't support it anymore + if policy.priority < highest_policy.priority && highest_policy.name != "manylinux_2_5" { + eprintln!( + "📦 Wheel is eligible for a higher priority tag. \ + You requested {policy} but this wheel is eligible for {highest_policy}", + ); + } + } + + match policy_is_satisfied( + &policy, + &elf, + &arch, + &deps, + &versioned_libraries, + allow_linking_libpython, + ) { + Ok(_) => { + should_repair = false; + Ok(policy) + } + Err(AuditWheelError::LinksForbiddenLibrariesError(..)) => { + should_repair = true; + Ok(policy) + } + Err(err) => Err(err), + } + } else if let Some(policy) = highest_policy { + Ok(policy) + } else if target.target_arch() == Arch::Armv6L || target.target_arch() == Arch::Armv7L { + // Old arm versions + // https://github.com/pypi/warehouse/blob/556e1e3390999381c382873b003a779a1363cb4d/warehouse/forklift/legacy.py#L122-L123 + Ok(Policy::default()) + } else { + eprintln!( + "⚠️ Warning: No compatible platform tag found, using the linux tag instead. \ + You won't be able to upload those wheels to PyPI." + ); + + // Fallback to linux + Ok(Policy::default()) + }?; + Ok((policy, should_repair)) +} + +/// Linux/ELF wheel repairer (auditwheel equivalent). +/// +/// Bundles external `.so` files and rewrites ELF metadata (SONAME, DT_NEEDED, +/// RPATH) using `patchelf` so that `$ORIGIN`-relative references resolve to +/// the bundled copies in the `.libs/` directory. +/// +/// Unlike the macOS repairer, `audit()` performs full +/// manylinux/musllinux compliance checking — the returned [`Policy`] +/// determines which `manylinux_X_Y` / `musllinux_X_Y` platform tag the wheel +/// qualifies for. +pub struct ElfRepairer { + /// The requested platform tag (e.g., manylinux_2_17), if any. + pub platform_tag: Option, + /// The build target (architecture + OS). + pub target: Target, + /// Path to the project's Cargo.toml (used to extract RUSTFLAGS library paths). + pub manifest_path: PathBuf, + /// Whether the artifact is allowed to link libpython (bin bindings only). + pub allow_linking_libpython: bool, +} + +impl WheelRepairer for ElfRepairer { + fn audit( + &self, + artifact: &BuildArtifact, + mut ld_paths: Vec, + ) -> Result<(Policy, Vec)> { + // Extend caller-provided paths with RUSTFLAGS library search paths + if let Some(rustflags_paths) = + extract_rustflags_library_paths(&self.manifest_path, &self.target) + { + ld_paths.extend(rustflags_paths); + } + get_policy_and_libs( + artifact, + self.platform_tag, + &self.target, + ld_paths, + self.allow_linking_libpython, + ) + } + + fn patch( + &self, + audited: &[AuditedArtifact], + grafted: &[GraftedLib], + libs_dir: &Path, + artifact_dir: &Path, + ) -> Result<()> { + patchelf::verify_patchelf()?; + + // Build a lookup from original name → new soname for rewriting references. + let mut name_map: BTreeMap<&str, &str> = BTreeMap::new(); + for l in grafted { + name_map.insert(l.original_name.as_str(), l.new_name.as_str()); + for alias in &l.aliases { + name_map.insert(alias.as_str(), l.new_name.as_str()); + } + } + + // Set soname and rpath on each grafted library. + for lib in grafted { + patchelf::set_soname(&lib.dest_path, &lib.new_name)?; + if !lib.rpath.is_empty() { + patchelf::set_rpath(&lib.dest_path, &"$ORIGIN".to_string())?; + } + } + + // Rewrite DT_NEEDED in each artifact to reference new sonames. + // Only replace entries that the artifact actually depends on to avoid + // unnecessary patchelf invocations and errors when an old name is + // absent from a given binary. + for aa in audited { + let artifact_deps: HashSet<&str> = aa + .external_libs + .iter() + .map(|lib| lib.name.as_str()) + .collect(); + let replacements: Vec<_> = name_map + .iter() + .filter(|(old, _)| artifact_deps.contains(**old)) + .map(|(k, v)| (*k, v.to_string())) + .collect(); + if !replacements.is_empty() { + patchelf::replace_needed(&aa.artifact.path, &replacements)?; + } + } + + // Update cross-references between grafted libraries + for lib in grafted { + let lib_replacements: Vec<_> = lib + .needed + .iter() + .filter_map(|n| { + name_map + .get(n.as_str()) + .map(|new| (n.as_str(), new.to_string())) + }) + .collect(); + if !lib_replacements.is_empty() { + patchelf::replace_needed(&lib.dest_path, &lib_replacements)?; + } + } + + // Set RPATH on artifacts to find the libs directory + for aa in audited { + let mut new_rpaths = patchelf::get_rpath(&aa.artifact.path)?; + let new_rpath = Path::new("$ORIGIN").join(relpath(libs_dir, artifact_dir)); + new_rpaths.push(new_rpath.to_str().unwrap().to_string()); + let new_rpath = new_rpaths.join(":"); + patchelf::set_rpath(&aa.artifact.path, &new_rpath)?; + } + + Ok(()) + } + + fn patch_editable(&self, audited: &[AuditedArtifact]) -> Result<()> { + for aa in audited { + if aa.artifact.linked_paths.is_empty() { + continue; + } + let old_rpaths = patchelf::get_rpath(&aa.artifact.path)?; + let mut new_rpaths = old_rpaths.clone(); + for path in &aa.artifact.linked_paths { + if !old_rpaths.contains(path) { + new_rpaths.push(path.to_string()); + } + } + let new_rpath = new_rpaths.join(":"); + if let Err(err) = patchelf::set_rpath(&aa.artifact.path, &new_rpath) { + eprintln!( + "⚠️ Warning: Failed to set rpath for {}: {}", + aa.artifact.path.display(), + err + ); + } + } + Ok(()) + } +} + +/// Find external shared library dependencies (Linux/ELF specific). +/// +/// Uses lddtree to resolve dependencies, then filters out the dynamic linker, +/// musl libc, and libraries on the policy whitelist. +#[allow(clippy::result_large_err)] +fn find_external_libs( + artifact: impl AsRef, + policy: &Policy, + sysroot: PathBuf, + ld_paths: Vec, +) -> Result, AuditWheelError> { + let dep_analyzer = lddtree::DependencyAnalyzer::new(sysroot).library_paths(ld_paths); + let deps = dep_analyzer + .analyze(artifact) + .map_err(AuditWheelError::DependencyAnalysisError)?; + let mut ext_libs = Vec::new(); + for (_, lib) in deps.libraries { + let name = &lib.name; + // Skip dynamic linker/loader, musl libc, and white-listed libs + if is_dynamic_linker(name) + || name.starts_with("libc.") + || policy.lib_whitelist.contains(name) + { + continue; + } + ext_libs.push(lib); + } + Ok(ext_libs) +} + +/// For the given compilation result, return the manylinux/musllinux policy and +/// the external libs we need to add to repair it. +fn get_policy_and_libs( + artifact: &BuildArtifact, + platform_tag: Option, + target: &Target, + ld_paths: Vec, + allow_linking_libpython: bool, +) -> Result<(Policy, Vec)> { + let (policy, should_repair) = + auditwheel_rs(artifact, target, platform_tag, allow_linking_libpython).with_context( + || { + if let Some(platform_tag) = platform_tag { + format!("Error ensuring {platform_tag} compliance") + } else { + "Error checking for manylinux/musllinux compliance".to_string() + } + }, + )?; + let external_libs = if should_repair { + let sysroot = get_sysroot_path(target).unwrap_or_else(|_| PathBuf::from("/")); + + let external_libs = find_external_libs(&artifact.path, &policy, sysroot, ld_paths) + .with_context(|| { + if let Some(platform_tag) = platform_tag { + format!("Error repairing wheel for {platform_tag} compliance") + } else { + "Error repairing wheel for manylinux/musllinux compliance".to_string() + } + })?; + if allow_linking_libpython { + external_libs + .into_iter() + .filter(|lib| !IS_LIBPYTHON.is_match(&lib.name)) + .collect() + } else { + external_libs + } + } else { + Vec::new() + }; + + // Check external libraries for versioned symbol requirements that may + // require a stricter (less compatible, e.g. newer manylinux) policy than what + // the main artifact alone would need. See https://github.com/PyO3/maturin/issues/1490 + let policy = if !external_libs.is_empty() { + let (adjusted, offenders) = check_external_libs_policy(&policy, &external_libs, target)?; + if platform_tag.is_some() && !offenders.is_empty() { + let tag_kind = if policy.name.starts_with("musllinux") { + "musllinux" + } else { + "manylinux" + }; + bail!( + "External libraries {offenders:?} require newer symbol versions than {policy} allows. \ + Consider using --compatibility {adjusted} or a newer {tag_kind} tag" + ); + } + adjusted + } else { + policy + }; + + Ok((policy, external_libs)) +} + +/// Return the symbol versions required by external libraries that are not +/// allowed by the given policy. +fn unsatisfied_symbol_versions( + policy: &Policy, + arch: &str, + versioned_libraries: &[VersionedLibrary], +) -> Vec { + let arch_versions = match policy.symbol_versions.get(arch) { + Some(v) => v, + None => return vec!["(unsupported arch)".to_string()], + }; + let mut unsatisfied = Vec::new(); + for library in versioned_libraries { + if !policy.lib_whitelist.contains(&library.name) { + continue; + } + for (name, versions_needed) in library.parsed_versions() { + match arch_versions.get(&name) { + Some(versions_allowed) => { + for v in versions_needed.difference(versions_allowed) { + unsatisfied.push(format!("{name}_{v}")); + } + } + None => { + for v in &versions_needed { + unsatisfied.push(format!("{name}_{v}")); + } + } + } + } + } + unsatisfied.sort(); + unsatisfied +} + +/// Check if external libraries require a newer glibc than the current policy allows. +/// Returns the adjusted policy and a list of descriptions for libraries that caused +/// a downgrade. +fn check_external_libs_policy( + policy: &Policy, + external_libs: &[Library], + target: &Target, +) -> Result<(Policy, Vec)> { + let arch = target.target_arch().to_string(); + let mut platform_policies = if policy.name.starts_with("musllinux") { + MUSLLINUX_POLICIES.clone() + } else if policy.name.starts_with("manylinux") { + MANYLINUX_POLICIES.clone() + } else { + get_default_platform_policies() + }; + for p in &mut platform_policies { + p.fixup_musl_libc_so_name(target.target_arch()); + } + // Policies must be sorted from highest to lowest priority so we find the + // best (most compatible) match first when iterating. + debug_assert!( + platform_policies + .windows(2) + .all(|w| w[0].priority >= w[1].priority) + ); + + let mut result = policy.clone(); + let mut offenders = Vec::new(); + for lib in external_libs { + let lib_path = match lib.realpath.as_ref() { + Some(path) => path, + None => continue, + }; + let buffer = fs_err::read(lib_path) + .with_context(|| format!("Failed to read external library {}", lib_path.display()))?; + let elf = match Elf::parse(&buffer) { + Ok(elf) => elf, + Err(_) => continue, + }; + let versioned_libraries = find_versioned_libraries(&elf); + if versioned_libraries.is_empty() { + continue; + } + + // Find the highest policy that this external library satisfies + let unsatisfied = unsatisfied_symbol_versions(&result, &arch, &versioned_libraries); + if unsatisfied.is_empty() { + continue; + } + for candidate in platform_policies.iter() { + if candidate.priority > result.priority { + continue; + } + if unsatisfied_symbol_versions(candidate, &arch, &versioned_libraries).is_empty() { + if candidate.priority < result.priority { + debug!( + "Downgrading tag to {candidate} because external library {} requires {}", + lib.name, + unsatisfied.join(", "), + ); + offenders.push(format!("{} ({})", lib.name, unsatisfied.join(", "))); + result = candidate.clone(); + } + break; + } + } + } + Ok((result, offenders)) +} + +/// Extract library search paths from RUSTFLAGS configuration. +#[cfg_attr(test, allow(dead_code))] +fn extract_rustflags_library_paths(manifest_path: &Path, target: &Target) -> Option> { + let manifest_dir = manifest_path.parent()?; + let config = cargo_config2::Config::load_with_cwd(manifest_dir).ok()?; + let rustflags = config.rustflags(target.target_triple()).ok()??; + + // Encode the rustflags for parsing with the rustflags crate + let encoded = rustflags.encode().ok()?; + + let mut library_paths = Vec::new(); + for flag in rustflags::from_encoded(encoded.as_ref()) { + if let rustflags::Flag::LibrarySearchPath { kind: _, path } = flag { + library_paths.push(path); + } + } + + if library_paths.is_empty() { + None + } else { + Some(library_paths) + } +} + +#[cfg(test)] +mod tests { + use crate::Target; + + #[test] + fn test_extract_rustflags_library_paths() { + // Create a temporary directory with a Cargo.toml and .cargo/config.toml + let temp_dir = tempfile::tempdir().unwrap(); + let manifest_path = temp_dir.path().join("Cargo.toml"); + let cargo_dir = temp_dir.path().join(".cargo"); + let config_path = cargo_dir.join("config.toml"); + + // Create the directories + fs_err::create_dir_all(&cargo_dir).unwrap(); + + // Create a minimal Cargo.toml + fs_err::write( + &manifest_path, + r#" +[package] +name = "test-package" +version = "0.1.0" +edition = "2021" +"#, + ) + .unwrap(); + + // Create a config.toml with rustflags containing -L options + fs_err::write( + &config_path, + r#" +[build] +rustflags = ["-L", "dependency=/usr/local/lib", "-L", "/some/other/path", "-C", "opt-level=3"] +"#, + ) + .unwrap(); + + // Test the function + let target = Target::from_target_triple(None).unwrap(); + let paths = super::extract_rustflags_library_paths(&manifest_path, &target); + + if let Some(paths) = paths { + assert_eq!(paths.len(), 2); + assert!( + paths + .iter() + .any(|p| p.to_string_lossy() == "/usr/local/lib") + ); + assert!( + paths + .iter() + .any(|p| p.to_string_lossy() == "/some/other/path") + ); + } else { + // It's possible that rustflags parsing fails in some environments, + // so we just verify the function doesn't panic + println!("No rustflags library paths found, which is acceptable"); + } + } + + #[test] + fn test_extract_rustflags_library_paths_no_config() { + // Test with a directory that has no cargo config + let temp_dir = tempfile::tempdir().unwrap(); + let manifest_path = temp_dir.path().join("Cargo.toml"); + + // Create a minimal Cargo.toml + fs_err::write( + &manifest_path, + r#" +[package] +name = "test-package" +version = "0.1.0" +edition = "2021" +"#, + ) + .unwrap(); + + let target = Target::from_target_triple(None).unwrap(); + let paths = super::extract_rustflags_library_paths(&manifest_path, &target); + + // Should return None when there's no cargo config with rustflags + assert!(paths.is_none()); + } +} diff --git a/src/auditwheel/mod.rs b/src/auditwheel/mod.rs index 860c3a39c..43776c474 100644 --- a/src/auditwheel/mod.rs +++ b/src/auditwheel/mod.rs @@ -1,4 +1,5 @@ mod audit; +mod linux; mod musllinux; pub mod patchelf; mod platform_tag; @@ -10,6 +11,7 @@ pub mod sbom; mod whichprovides; pub use audit::*; +pub use linux::ElfRepairer; pub use platform_tag::PlatformTag; pub use policy::Policy; -pub use repair::find_external_libs; +pub use repair::{AuditedArtifact, WheelRepairer, log_grafted_libs, prepare_grafted_libs}; diff --git a/src/auditwheel/repair.rs b/src/auditwheel/repair.rs index ff0c42a70..9b8038ba2 100644 --- a/src/auditwheel/repair.rs +++ b/src/auditwheel/repair.rs @@ -1,32 +1,271 @@ -use super::audit::{AuditWheelError, is_dynamic_linker}; -use crate::auditwheel::Policy; -use anyhow::Result; -use lddtree::DependencyAnalyzer; +//! Shared wheel repair infrastructure. +//! +//! This module contains the [`WheelRepairer`] trait and the shared utilities +//! for preparing external libraries for grafting into wheels. +//! +//! Platform-specific implementations live in: +//! - [`super::linux::ElfRepairer`] +//! - [`super::macos::MacOSRepairer`] + +use crate::compile::BuildArtifact; +use crate::util::hash_file; +use anyhow::{Context, Result}; +use std::borrow::Borrow; +use std::collections::HashSet; use std::path::{Path, PathBuf}; -/// Find external shared library dependencies -#[allow(clippy::result_large_err)] -pub fn find_external_libs( - artifact: impl AsRef, - policy: &Policy, - sysroot: PathBuf, - ld_paths: Vec, -) -> Result, AuditWheelError> { - let dep_analyzer = DependencyAnalyzer::new(sysroot).library_paths(ld_paths); - let deps = dep_analyzer - .analyze(artifact) - .map_err(AuditWheelError::DependencyAnalysisError)?; - let mut ext_libs = Vec::new(); - for (_, lib) in deps.libraries { - let name = &lib.name; - // Skip dynamic linker/loader, musl libc, and white-listed libs - if is_dynamic_linker(name) - || name.starts_with("libc.") - || policy.lib_whitelist.contains(name) - { +use fs_err as fs; + +/// A build artifact bundled with the external shared libraries it depends on. +/// +/// Keeps the artifact and its per-artifact dependency list together so they +/// cannot accidentally get out of sync when passed through the wheel-writing +/// pipeline. +pub struct AuditedArtifact { + /// The build artifact. + pub artifact: BuildArtifact, + /// External shared libraries this artifact depends on that must be + /// bundled into the wheel. + pub external_libs: Vec, +} + +impl Borrow for AuditedArtifact { + fn borrow(&self) -> &BuildArtifact { + &self.artifact + } +} + +/// A library prepared for grafting into a wheel. +/// +/// Created by [`prepare_grafted_libs`] with a hash-suffixed filename and a +/// writable temporary copy ready for platform-specific patching. +pub struct GraftedLib { + /// Original library name as it appears in dependency records. + /// For ELF this is a leaf name like `libfoo.so.1`. + /// For Mach-O this can be a full install name like `/usr/local/lib/libfoo.dylib` + /// or `@rpath/libfoo.dylib`. + pub original_name: String, + /// Additional install names that resolve to the same file on disk. + /// These need the same rewriting as `original_name` → `new_name`. + pub aliases: Vec, + /// New filename with hash suffix (e.g., `libfoo-ab12cd34.so.1`) + pub new_name: String, + /// Path to the writable temporary copy (ready for patching). + pub dest_path: PathBuf, + /// Libraries this one depends on (from lddtree's `needed` field). + pub needed: Vec, + /// Runtime library search paths from the original library. + pub rpath: Vec, +} + +/// Platform-specific wheel repair operations. +/// +/// Each platform (Linux/ELF, macOS/Mach-O) implements this trait to provide +/// its own dependency discovery and binary patching logic. +pub trait WheelRepairer { + /// Audit an artifact for platform compliance and find external libraries + /// that need to be bundled. + /// + /// Returns the determined platform policy and the list of external shared + /// library dependencies. + fn audit( + &self, + artifact: &BuildArtifact, + ld_paths: Vec, + ) -> Result<(super::Policy, Vec)>; + + /// Patch binary references after libraries have been grafted. + /// + /// This is called after [`prepare_grafted_libs`] has copied and + /// hash-renamed all external libraries. Implementations should: + /// + /// 1. Rewrite references in each artifact to point to the new names + /// 2. Set appropriate metadata on grafted libraries (soname, install ID, etc.) + /// 3. Update cross-references between grafted libraries + /// 4. Perform any final steps (e.g., code signing on macOS) + fn patch( + &self, + audited: &[AuditedArtifact], + grafted: &[GraftedLib], + libs_dir: &Path, + artifact_dir: &Path, + ) -> Result<()>; + + /// Patch artifacts for editable installs (e.g., set RPATH to Cargo target dir). + /// + /// The default implementation is a no-op. Platform-specific repairers can + /// override this to add runtime library search paths for editable mode. + fn patch_editable(&self, _audited: &[AuditedArtifact]) -> Result<()> { + Ok(()) + } + + /// Return the wheel-internal directory name for grafted libraries. + /// + /// macOS uses `.dylibs` (matching delocate convention), + /// Linux uses `.libs` (matching auditwheel convention). + fn libs_dir(&self, dist_name: &str) -> PathBuf { + PathBuf::from(format!("{dist_name}.libs")) + } +} + +/// Prepare external libraries for grafting into a wheel. +/// +/// For each library: +/// 1. Resolves the real path on disk (fails if not found) +/// 2. Generates a hash-suffixed filename to avoid DLL hell +/// 3. Copies to `temp_dir` and makes the copy writable +/// +/// Returns the prepared libraries and the set of original paths that were copied. +/// +/// Deduplication is by `realpath` (the actual file on disk). When the same +/// file is referenced via multiple install names (common on macOS), only one +/// copy is made, but all original names are recorded as aliases. +pub fn prepare_grafted_libs( + audited: &[AuditedArtifact], + temp_dir: &Path, +) -> Result<(Vec, HashSet)> { + let mut grafted = Vec::new(); + let mut libs_copied = HashSet::new(); + let mut realpath_to_idx: std::collections::HashMap = + std::collections::HashMap::new(); + + for lib in audited.iter().flat_map(|a| &a.external_libs) { + let source_path = lib.realpath.clone().with_context(|| { + format!( + "Cannot repair wheel, because required library {} could not be located.", + lib.path.display() + ) + })?; + + // Check if we've already copied this exact file (by realpath). + if let Some(&idx) = realpath_to_idx.get(&source_path) { + let existing: &mut GraftedLib = &mut grafted[idx]; + if lib.name != existing.original_name && !existing.aliases.contains(&lib.name) { + existing.aliases.push(lib.name.clone()); + } continue; } - ext_libs.push(lib); + + let new_name = hashed_lib_name(&lib.name, &source_path)?; + let dest_path = temp_dir.join(&new_name); + + fs::copy(&source_path, &dest_path)?; + // Make the copy writable so platform-specific tools can modify it + let mut perms = fs::metadata(&dest_path)?.permissions(); + #[allow(clippy::permissions_set_readonly_false)] + perms.set_readonly(false); + fs::set_permissions(&dest_path, perms)?; + + let idx = grafted.len(); + realpath_to_idx.insert(source_path.clone(), idx); + libs_copied.insert(source_path); + + grafted.push(GraftedLib { + original_name: lib.name.clone(), + aliases: Vec::new(), + new_name, + dest_path, + needed: lib.needed.clone(), + rpath: lib.rpath.clone(), + }); + } + + Ok((grafted, libs_copied)) +} + +/// Extract the leaf filename from a library name. +/// +/// Library names can be full paths on macOS (e.g., `/usr/local/lib/libfoo.dylib` +/// or `@rpath/libfoo.dylib`). This returns just the filename component. +pub(crate) fn leaf_filename(lib_name: &str) -> &str { + Path::new(lib_name) + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or(lib_name) +} + +/// Generate a hash-suffixed filename for a library to avoid collisions. +/// +/// Takes the leaf filename from `lib_name` (which may be a full path on macOS), +/// computes a short hash of the file content, and inserts it before the first +/// extension dot. +/// +/// Examples: +/// - `libfoo.so.1` + hash `ab12cd34` → `libfoo-ab12cd34.so.1` +/// - `/usr/local/lib/libbar.dylib` + hash `ef56gh78` → `libbar-ef56gh78.dylib` +pub(crate) fn hashed_lib_name(lib_name: &str, lib_path: &Path) -> Result { + let short_hash = &hash_file(lib_path) + .with_context(|| format!("Failed to hash library {}", lib_path.display()))?[..8]; + + let leaf = leaf_filename(lib_name); + + Ok(if let Some(pos) = leaf.find('.') { + let (stem, ext) = leaf.split_at(pos); + if stem.ends_with(&format!("-{short_hash}")) { + leaf.to_string() + } else { + format!("{stem}-{short_hash}{ext}") + } + } else { + format!("{leaf}-{short_hash}") + }) +} + +/// Log which libraries were grafted into the wheel. +pub fn log_grafted_libs(libs_copied: &HashSet, libs_dir: &Path) { + let mut grafted_paths: Vec<&PathBuf> = libs_copied.iter().collect(); + grafted_paths.sort(); + + eprintln!( + "🖨 Copied external shared libraries to package {} directory:", + libs_dir.display() + ); + for lib_path in &grafted_paths { + eprintln!(" {}", lib_path.display()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn test_leaf_filename() { + assert_eq!(leaf_filename("libfoo.so.1"), "libfoo.so.1"); + assert_eq!(leaf_filename("/usr/local/lib/libfoo.dylib"), "libfoo.dylib"); + assert_eq!(leaf_filename("@rpath/libfoo.dylib"), "libfoo.dylib"); + } + + #[test] + fn test_hashed_lib_name() { + let tmp_dir = tempfile::tempdir().unwrap(); + let lib_path = tmp_dir.path().join("libfoo.so.1"); + { + let mut f = fs_err::File::create(&lib_path).unwrap(); + f.write_all(b"fake library content").unwrap(); + } + let name = hashed_lib_name("libfoo.so.1", &lib_path).unwrap(); + // Should have format: libfoo-XXXXXXXX.so.1 + assert!(name.starts_with("libfoo-")); + assert!(name.ends_with(".so.1")); + assert_eq!(name.len(), "libfoo-".len() + 8 + ".so.1".len()); + + // Idempotent: calling with already-hashed name should not double-hash + let name2 = hashed_lib_name(&name, &lib_path).unwrap(); + assert_eq!(name, name2); + } + + #[test] + fn test_hashed_lib_name_macos_path() { + let tmp_dir = tempfile::tempdir().unwrap(); + let lib_path = tmp_dir.path().join("libbar.dylib"); + { + let mut f = fs_err::File::create(&lib_path).unwrap(); + f.write_all(b"fake dylib content").unwrap(); + } + let name = hashed_lib_name("/usr/local/lib/libbar.dylib", &lib_path).unwrap(); + assert!(name.starts_with("libbar-")); + assert!(name.ends_with(".dylib")); } - Ok(ext_libs) } diff --git a/src/build_context/repair.rs b/src/build_context/repair.rs index 3e1d476a0..0ba445ec6 100644 --- a/src/build_context/repair.rs +++ b/src/build_context/repair.rs @@ -1,24 +1,60 @@ #[cfg(feature = "sbom")] use crate::auditwheel::get_sysroot_path; use crate::auditwheel::{ - AuditWheelMode, PlatformTag, Policy, get_policy_and_libs, patchelf, relpath, + AuditWheelMode, AuditedArtifact, ElfRepairer, PlatformTag, Policy, WheelRepairer, + log_grafted_libs, prepare_grafted_libs, }; #[cfg(feature = "sbom")] use crate::module_writer::ModuleWriter; use crate::module_writer::WheelWriter; -use crate::util::hash_file; use crate::{BridgeModel, BuildArtifact, PythonInterpreter, VirtualWriter}; use anyhow::{Context, Result, bail}; use fs_err as fs; use lddtree::Library; use normpath::PathExt; -use std::borrow::Borrow; -use std::collections::{BTreeMap, HashSet}; use std::path::{Path, PathBuf}; use super::BuildContext; impl BuildContext { + /// Create the appropriate platform-specific wheel repairer. + fn make_repairer(&self, platform_tag: &[PlatformTag]) -> Option> { + if self.project.target.is_linux() { + let mut musllinux: Vec<_> = platform_tag + .iter() + .filter(|tag| tag.is_musllinux()) + .copied() + .collect(); + musllinux.sort(); + let mut others: Vec<_> = platform_tag + .iter() + .filter(|tag| !tag.is_musllinux()) + .copied() + .collect(); + others.sort(); + + let allow_linking_libpython = self.project.bridge().is_bin(); + + let effective_tag = if self.project.bridge().is_bin() && !musllinux.is_empty() { + Some(musllinux[0]) + } else { + others.first().or_else(|| musllinux.first()).copied() + }; + + Some(Box::new(ElfRepairer { + platform_tag: effective_tag, + target: self.project.target.clone(), + manifest_path: self.project.manifest_path.clone(), + allow_linking_libpython, + })) + } else if self.project.target.is_macos() { + // TODO: MacOSRepairer (Phase 2) + None + } else { + None + } + } + pub(crate) fn auditwheel( &self, artifact: &BuildArtifact, @@ -40,98 +76,57 @@ impl BuildContext { return Ok((Policy::default(), Vec::new())); } - let mut musllinux: Vec<_> = platform_tag - .iter() - .filter(|tag| tag.is_musllinux()) - .copied() - .collect(); - musllinux.sort(); - let mut others: Vec<_> = platform_tag - .iter() - .filter(|tag| !tag.is_musllinux()) - .copied() - .collect(); - others.sort(); - - // only bin bindings allow linking to libpython, extension modules must not - let allow_linking_libpython = self.project.bridge().is_bin(); - if self.project.bridge().is_bin() && !musllinux.is_empty() { - return get_policy_and_libs( - artifact, - Some(musllinux[0]), - &self.project.target, - &self.project.manifest_path, - allow_linking_libpython, - ); - } + let repairer = match self.make_repairer(platform_tag) { + Some(r) => r, + None => return Ok((Policy::default(), Vec::new())), + }; - let tag = others.first().or_else(|| musllinux.first()).copied(); - get_policy_and_libs( - artifact, - tag, - &self.project.target, - &self.project.manifest_path, - allow_linking_libpython, - ) + let ld_paths: Vec = artifact.linked_paths.iter().map(PathBuf::from).collect(); + repairer.audit(artifact, ld_paths) } - /// Add library search paths in Cargo target directory rpath when building in editable mode - fn add_rpath(&self, artifacts: &[A]) -> Result<()> - where - A: Borrow, - { - if self.project.editable && self.project.target.is_linux() && !artifacts.is_empty() { - for artifact in artifacts { - let artifact = artifact.borrow(); - if artifact.linked_paths.is_empty() { - continue; - } - let old_rpaths = patchelf::get_rpath(&artifact.path)?; - let mut new_rpaths = old_rpaths.clone(); - for path in &artifact.linked_paths { - if !old_rpaths.contains(path) { - new_rpaths.push(path.to_string()); - } - } - let new_rpath = new_rpaths.join(":"); - if let Err(err) = patchelf::set_rpath(&artifact.path, &new_rpath) { - eprintln!( - "⚠️ Warning: Failed to set rpath for {}: {}", - artifact.path.display(), - err - ); - } + /// Compute the wheel-internal directory where the artifact resides. + fn get_artifact_dir(&self) -> PathBuf { + match self.project.bridge() { + // cffi bindings that contains '.' in the module name will be split into directories + BridgeModel::Cffi => self.project.module_name.split(".").collect::(), + // For namespace packages the modules reside at ${module_name}.so + // where periods are replaced with slashes so for example my.namespace.module would reside + // at my/namespace/module.so + _ if self.project.module_name.contains(".") => { + let mut path = self.project.module_name.split(".").collect::(); + path.pop(); + path } + // For other bindings artifact .so file usually resides at ${module_name}/${module_name}.so + _ => PathBuf::from(&self.project.module_name), } - Ok(()) } - pub(crate) fn add_external_libs( + pub(crate) fn add_external_libs( &self, writer: &mut VirtualWriter, - artifacts: &[A], - ext_libs: &[Vec], - ) -> Result<()> - where - A: Borrow, - { + audited: &[AuditedArtifact], + ) -> Result<()> { if self.project.editable { - return self.add_rpath(artifacts); + if let Some(repairer) = self.make_repairer(&self.python.platform_tag) { + return repairer.patch_editable(audited); + } + return Ok(()); } - if ext_libs.iter().all(|libs| libs.is_empty()) { + if audited.iter().all(|a| a.external_libs.is_empty()) { return Ok(()); } // Log which libraries need to be copied and which artifacts require them // before calling patchelf, so users can see this even if patchelf is missing. eprintln!("🔗 External shared libraries to be copied into the wheel:"); - for (artifact, artifact_ext_libs) in artifacts.iter().zip(ext_libs) { - let artifact = artifact.borrow(); - if artifact_ext_libs.is_empty() { + for aa in audited { + if aa.external_libs.is_empty() { continue; } - eprintln!(" {} requires:", artifact.path.display()); - for lib in artifact_ext_libs { + eprintln!(" {} requires:", aa.artifact.path.display()); + for lib in &aa.external_libs { if let Some(path) = lib.realpath.as_ref() { eprintln!(" {} => {}", lib.name, path.display()); } else { @@ -142,110 +137,34 @@ impl BuildContext { if matches!(self.python.auditwheel, AuditWheelMode::Check) { bail!( - "Your library is not manylinux/musllinux compliant because it requires copying the above libraries. \ + "Your library requires copying the above external libraries. \ Re-run with `--auditwheel=repair` to copy them." ); } - patchelf::verify_patchelf()?; + let repairer = self + .make_repairer(&self.python.platform_tag) + .context("No wheel repairer available for this platform")?; // Put external libs to ${distribution_name}.libs directory // See https://github.com/pypa/auditwheel/issues/89 // Use the distribution name (matching auditwheel's behavior) to avoid // conflicts with other packages in the same namespace. - let libs_dir = PathBuf::from(format!( - "{}.libs", - self.project.metadata24.get_distribution_escaped() - )); + let dist_name = self.project.metadata24.get_distribution_escaped(); + let libs_dir = repairer.libs_dir(&dist_name); let temp_dir = writer.temp_dir()?; - let mut soname_map = BTreeMap::new(); - let mut libs_copied = HashSet::new(); - for lib in ext_libs.iter().flatten() { - let lib_path = lib.realpath.clone().with_context(|| { - format!( - "Cannot repair wheel, because required library {} could not be located.", - lib.path.display() - ) - })?; - // Generate a new soname with a short hash - let short_hash = &hash_file(&lib_path)?[..8]; - let (file_stem, file_ext) = lib.name.split_once('.').with_context(|| { - format!("Unexpected library name without extension: {}", lib.name) - })?; - let new_soname = if !file_stem.ends_with(&format!("-{short_hash}")) { - format!("{file_stem}-{short_hash}.{file_ext}") - } else { - format!("{file_stem}.{file_ext}") - }; - - // Copy the original lib to a tmpdir and modify some of its properties - // for example soname and rpath - let dest_path = temp_dir.path().join(&new_soname); - fs::copy(&lib_path, &dest_path)?; - libs_copied.insert(lib_path); - - // fs::copy copies permissions as well, and the original - // file may have been read-only - let mut perms = fs::metadata(&dest_path)?.permissions(); - #[allow(clippy::permissions_set_readonly_false)] - perms.set_readonly(false); - fs::set_permissions(&dest_path, perms)?; - - patchelf::set_soname(&dest_path, &new_soname)?; - if !lib.rpath.is_empty() { - patchelf::set_rpath(&dest_path, &libs_dir)?; - } - soname_map.insert( - lib.name.clone(), - (new_soname.clone(), dest_path.clone(), lib.needed.clone()), - ); - } + let (grafted, libs_copied) = prepare_grafted_libs(audited, temp_dir.path())?; - for (artifact, artifact_ext_libs) in artifacts.iter().zip(ext_libs) { - let artifact = artifact.borrow(); - let artifact_deps: HashSet<_> = artifact_ext_libs.iter().map(|lib| &lib.name).collect(); - let replacements = soname_map - .iter() - .filter_map(|(k, v)| { - if artifact_deps.contains(k) { - Some((k, v.0.clone())) - } else { - None - } - }) - .collect::>(); - if !replacements.is_empty() { - patchelf::replace_needed(&artifact.path, &replacements[..])?; - } - } + let artifact_dir = self.get_artifact_dir(); + repairer.patch(audited, &grafted, &libs_dir, &artifact_dir)?; - // we grafted in a bunch of libraries and modified their sonames, but - // they may have internal dependencies (DT_NEEDED) on one another, so - // we need to update those records so each now knows about the new - // name of the other. - for (new_soname, path, needed) in soname_map.values() { - let mut replacements = Vec::new(); - for n in needed { - if soname_map.contains_key(n) { - replacements.push((n, soname_map[n].0.clone())); - } - } - if !replacements.is_empty() { - patchelf::replace_needed(path, &replacements[..])?; - } - // Use add_file_force to bypass exclusion checks for external shared libraries - writer.add_file_force(libs_dir.join(new_soname), path, true)?; + // Add grafted libraries to the wheel + for lib in &grafted { + writer.add_file_force(libs_dir.join(&lib.new_name), &lib.dest_path, true)?; } - // Sort for deterministic output. - let mut grafted_paths: Vec = libs_copied.into_iter().collect(); - grafted_paths.sort(); - - eprintln!( - "🖨 Copied external shared libraries to package {} directory.", - libs_dir.display() - ); + log_grafted_libs(&libs_copied, &libs_dir); // Generate auditwheel SBOM for the grafted libraries. // This mirrors Python auditwheel's behaviour of writing a CycloneDX @@ -264,6 +183,8 @@ impl BuildContext { // prefixes when querying the host package manager. let sysroot = get_sysroot_path(&self.project.target).unwrap_or_else(|_| PathBuf::from("/")); + let mut grafted_paths: Vec = libs_copied.into_iter().collect(); + grafted_paths.sort(); if let Some(sbom_json) = crate::auditwheel::sbom::create_auditwheel_sbom( &self.project.metadata24.name, &self.project.metadata24.version.to_string(), @@ -280,30 +201,6 @@ impl BuildContext { } } - let artifact_dir = match self.project.bridge() { - // cffi bindings that contains '.' in the module name will be split into directories - BridgeModel::Cffi => self.project.module_name.split(".").collect::(), - // For namespace packages the modules reside at ${module_name}.so - // where periods are replaced with slashes so for example my.namespace.module would reside - // at my/namespace/module.so - _ if self.project.module_name.contains(".") => { - let mut path = self.project.module_name.split(".").collect::(); - path.pop(); - path - } - // For other bindings artifact .so file usually resides at ${module_name}/${module_name}.so - _ => PathBuf::from(&self.project.module_name), - }; - for artifact in artifacts { - let artifact = artifact.borrow(); - let mut new_rpaths = patchelf::get_rpath(&artifact.path)?; - // TODO: clean existing rpath entries if it's not pointed to a location within the wheel - // See https://github.com/pypa/auditwheel/blob/353c24250d66951d5ac7e60b97471a6da76c123f/src/auditwheel/repair.py#L160 - let new_rpath = Path::new("$ORIGIN").join(relpath(&libs_dir, &artifact_dir)); - new_rpaths.push(new_rpath.to_str().unwrap().to_string()); - let new_rpath = new_rpaths.join(":"); - patchelf::set_rpath(&artifact.path, &new_rpath)?; - } Ok(()) } diff --git a/src/build_orchestrator.rs b/src/build_orchestrator.rs index 8d76ec2df..0682468e6 100644 --- a/src/build_orchestrator.rs +++ b/src/build_orchestrator.rs @@ -1,4 +1,4 @@ -use crate::auditwheel::{PlatformTag, Policy}; +use crate::auditwheel::{AuditedArtifact, PlatformTag, Policy}; use crate::binding_generator::{ BinBindingGenerator, BindingGenerator, CffiBindingGenerator, Pyo3BindingGenerator, UniFfiBindingGenerator, generate_binding, @@ -19,7 +19,6 @@ use cargo_metadata::CrateType; use fs_err as fs; use ignore::overrides::{Override, OverrideBuilder}; use itertools::Itertools; -use lddtree::Library; use normpath::PathExt; use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; @@ -468,8 +467,7 @@ impl<'a> BuildOrchestrator<'a> { fn write_wheel<'b, F>( &'b self, tag: &str, - artifacts: &[&BuildArtifact], - ext_libs: &[Vec], + audited: &[AuditedArtifact], make_generator: F, sbom_data: &Option, out_dirs: &HashMap, @@ -490,8 +488,7 @@ impl<'a> BuildOrchestrator<'a> { file_options, )?; let mut writer = VirtualWriter::new(writer, self.excludes(Format::Wheel)?); - self.context - .add_external_libs(&mut writer, artifacts, ext_libs)?; + self.context.add_external_libs(&mut writer, audited)?; let temp_dir = writer.temp_dir()?; let mut generator = make_generator(temp_dir)?; @@ -499,7 +496,7 @@ impl<'a> BuildOrchestrator<'a> { &mut writer, generator.as_mut(), self.context, - artifacts, + audited, out_dirs, ) .context("Failed to add the files to the wheel")?; @@ -554,10 +551,13 @@ impl<'a> BuildOrchestrator<'a> { let abi_tag = stable_abi_kind.wheel_tag(); let tag = format!("cp{major}{min_minor}-{abi_tag}-{platform}"); + let audited = [AuditedArtifact { + artifact, + external_libs, + }]; let wheel_path = self.write_wheel( &tag, - &[&artifact], - &[external_libs], + &audited, |temp_dir| { Ok(Box::new( Pyo3BindingGenerator::new(Some(stable_abi_kind), python_interpreter, temp_dir) @@ -583,9 +583,8 @@ impl<'a> BuildOrchestrator<'a> { fn write_pyo3_wheel( &self, python_interpreter: &PythonInterpreter, - artifact: BuildArtifact, + audited: &[AuditedArtifact], platform_tags: &[PlatformTag], - ext_libs: Vec, sbom_data: &Option, out_dirs: &HashMap, ) -> Result { @@ -593,8 +592,7 @@ impl<'a> BuildOrchestrator<'a> { self.write_wheel( &tag, - &[&artifact], - &[ext_libs], + audited, |temp_dir| { Ok(Box::new( Pyo3BindingGenerator::new(None, Some(python_interpreter), temp_dir) @@ -623,11 +621,14 @@ impl<'a> BuildOrchestrator<'a> { Some(python_interpreter), )?; let platform_tags = self.resolve_platform_tags(&policy); + let audited = [AuditedArtifact { + artifact, + external_libs, + }]; let wheel_path = self.write_pyo3_wheel( python_interpreter, - artifact, + &audited, &platform_tags, - external_libs, sbom_data, &out_dirs, )?; @@ -705,14 +706,11 @@ impl<'a> BuildOrchestrator<'a> { .auditwheel(&artifact, &self.context.python.platform_tag, None)?; let platform_tags = self.resolve_platform_tags(&policy); let tag = self.get_universal_tag(&platform_tags)?; - let wheel_path = self.write_wheel( - &tag, - &[&artifact], - &[external_libs], - make_generator, - sbom_data, - &out_dirs, - )?; + let audited = [AuditedArtifact { + artifact, + external_libs, + }]; + let wheel_path = self.write_wheel(&tag, &audited, make_generator, sbom_data, &out_dirs)?; Ok((wheel_path, out_dirs)) } @@ -766,9 +764,8 @@ impl<'a> BuildOrchestrator<'a> { fn write_bin_wheel( &self, python_interpreter: Option<&PythonInterpreter>, - artifacts: &[BuildArtifact], + audited: &[AuditedArtifact], platform_tags: &[PlatformTag], - ext_libs: &[Vec], sbom_data: &Option, out_dirs: &HashMap, ) -> Result { @@ -805,19 +802,11 @@ impl<'a> BuildOrchestrator<'a> { let writer = WheelWriter::new(&tag, &self.context.artifact.out, &metadata24, file_options)?; let mut writer = VirtualWriter::new(writer, self.excludes(Format::Wheel)?); - let artifact_refs: Vec<&BuildArtifact> = artifacts.iter().collect(); - self.context - .add_external_libs(&mut writer, &artifact_refs, ext_libs)?; + self.context.add_external_libs(&mut writer, audited)?; let mut generator = BinBindingGenerator::new(&mut metadata24); - generate_binding( - &mut writer, - &mut generator, - self.context, - artifacts, - out_dirs, - ) - .context("Failed to add the files to the wheel")?; + generate_binding(&mut writer, &mut generator, self.context, audited, out_dirs) + .context("Failed to add the files to the wheel")?; self.add_pth(&mut writer)?; add_data( @@ -860,8 +849,7 @@ impl<'a> BuildOrchestrator<'a> { } let mut policies = Vec::with_capacity(result.artifacts.len()); - let mut ext_libs = Vec::new(); - let mut artifact_paths = Vec::with_capacity(result.artifacts.len()); + let mut audited_artifacts = Vec::new(); for artifact in result.artifacts { let mut artifact = artifact .get(&CrateType::Bin) @@ -872,19 +860,20 @@ impl<'a> BuildOrchestrator<'a> { self.context .auditwheel(&artifact, &self.context.python.platform_tag, None)?; policies.push(policy); - ext_libs.push(external_libs); self.context.stage_artifact(&mut artifact)?; - artifact_paths.push(artifact); + audited_artifacts.push(AuditedArtifact { + artifact, + external_libs, + }); } let policy = policies.iter().min_by_key(|p| p.priority).unwrap(); let platform_tags = self.resolve_platform_tags(policy); let wheel_path = self.write_bin_wheel( python_interpreter, - &artifact_paths, + &audited_artifacts, &platform_tags, - &ext_libs, sbom_data, &result.out_dirs, )?;