From 9ce4c5ef91b9f56553b15eab31f610b2ff00f472 Mon Sep 17 00:00:00 2001 From: messense Date: Tue, 31 Mar 2026 21:46:40 +0800 Subject: [PATCH 1/7] feat: add WheelRepairer trait and shared repair utilities Rewrite src/auditwheel/repair.rs with: - WheelRepairer trait with audit(), patch(), init_py_patch(), libs_dir() - GraftedLib struct for prepared libraries with alias tracking - prepare_grafted_libs() for hash-renaming and deduplication by realpath - hashed_lib_name() and leaf_filename() helpers - log_grafted_libs() for consistent logging Move find_external_libs() into audit.rs (where it's used) since it is Linux/ELF specific. The new repair.rs is platform-agnostic shared infra. --- src/auditwheel/audit.rs | 29 ++++- src/auditwheel/mod.rs | 3 +- src/auditwheel/repair.rs | 264 +++++++++++++++++++++++++++++++++++---- 3 files changed, 268 insertions(+), 28 deletions(-) diff --git a/src/auditwheel/audit.rs b/src/auditwheel/audit.rs index 1c34a68a3..7365c91de 100644 --- a/src/auditwheel/audit.rs +++ b/src/auditwheel/audit.rs @@ -1,6 +1,6 @@ use super::musllinux::{find_musl_libc, get_musl_version}; use super::policy::{MANYLINUX_POLICIES, MUSLLINUX_POLICIES, Policy}; -use crate::auditwheel::{PlatformTag, find_external_libs}; +use crate::auditwheel::PlatformTag; use crate::compile::BuildArtifact; use crate::target::{Arch, Target}; use anyhow::{Context, Result, bail}; @@ -501,6 +501,33 @@ pub fn get_sysroot_path(target: &Target) -> Result { Ok(PathBuf::from("/")) } +/// Find external shared library dependencies (Linux/ELF specific) +#[allow(clippy::result_large_err)] +pub fn find_external_libs( + artifact: impl AsRef, + policy: &Policy, + sysroot: PathBuf, + ld_paths: Vec, +) -> Result, AuditWheelError> { + let dep_analyzer = lddtree::DependencyAnalyzer::new(sysroot).library_paths(ld_paths); + let deps = dep_analyzer + .analyze(artifact) + .map_err(AuditWheelError::DependencyAnalysisError)?; + let mut ext_libs = Vec::new(); + for (_, lib) in deps.libraries { + let name = &lib.name; + // Skip dynamic linker/loader, musl libc, and white-listed libs + if is_dynamic_linker(name) + || name.starts_with("libc.") + || policy.lib_whitelist.contains(name) + { + continue; + } + ext_libs.push(lib); + } + Ok(ext_libs) +} + /// For the given compilation result, return the manylinux platform and the external libs /// we need to add to repair it pub fn get_policy_and_libs( diff --git a/src/auditwheel/mod.rs b/src/auditwheel/mod.rs index 860c3a39c..588ccbd55 100644 --- a/src/auditwheel/mod.rs +++ b/src/auditwheel/mod.rs @@ -9,7 +9,8 @@ pub mod sbom; #[cfg(feature = "sbom")] mod whichprovides; +pub use audit::find_external_libs; pub use audit::*; pub use platform_tag::PlatformTag; pub use policy::Policy; -pub use repair::find_external_libs; +pub use repair::{WheelRepairer, log_grafted_libs, prepare_grafted_libs}; diff --git a/src/auditwheel/repair.rs b/src/auditwheel/repair.rs index ff0c42a70..b47c7cb4e 100644 --- a/src/auditwheel/repair.rs +++ b/src/auditwheel/repair.rs @@ -1,32 +1,244 @@ -use super::audit::{AuditWheelError, is_dynamic_linker}; -use crate::auditwheel::Policy; -use anyhow::Result; -use lddtree::DependencyAnalyzer; +//! Shared wheel repair infrastructure. +//! +//! This module contains the [`WheelRepairer`] trait and the shared utilities +//! for preparing external libraries for grafting into wheels. +//! +//! Platform-specific implementations live in: +//! - [`super::linux::ElfRepairer`] +//! - [`super::macos::MacOSRepairer`] + +use crate::compile::BuildArtifact; +use crate::util::hash_file; +use anyhow::{Context, Result}; +use std::collections::HashSet; use std::path::{Path, PathBuf}; -/// Find external shared library dependencies -#[allow(clippy::result_large_err)] -pub fn find_external_libs( - artifact: impl AsRef, - policy: &Policy, - sysroot: PathBuf, - ld_paths: Vec, -) -> Result, AuditWheelError> { - let dep_analyzer = DependencyAnalyzer::new(sysroot).library_paths(ld_paths); - let deps = dep_analyzer - .analyze(artifact) - .map_err(AuditWheelError::DependencyAnalysisError)?; - let mut ext_libs = Vec::new(); - for (_, lib) in deps.libraries { - let name = &lib.name; - // Skip dynamic linker/loader, musl libc, and white-listed libs - if is_dynamic_linker(name) - || name.starts_with("libc.") - || policy.lib_whitelist.contains(name) - { +use fs_err as fs; + +/// A library prepared for grafting into a wheel. +/// +/// Created by [`prepare_grafted_libs`] with a hash-suffixed filename and a +/// writable temporary copy ready for platform-specific patching. +pub struct GraftedLib { + /// Original library name as it appears in dependency records. + /// For ELF this is a leaf name like `libfoo.so.1`. + /// For Mach-O this can be a full install name like `/usr/local/lib/libfoo.dylib` + /// or `@rpath/libfoo.dylib`. + pub original_name: String, + /// Additional install names that resolve to the same file on disk. + /// These need the same rewriting as `original_name` → `new_name`. + pub aliases: Vec, + /// New filename with hash suffix (e.g., `libfoo-ab12cd34.so.1`) + pub new_name: String, + /// Path to the writable temporary copy (ready for patching). + pub dest_path: PathBuf, + /// Libraries this one depends on (from lddtree's `needed` field). + pub needed: Vec, + /// Runtime library search paths from the original library. + pub rpath: Vec, +} + +/// Platform-specific wheel repair operations. +/// +/// Each platform (Linux/ELF, macOS/Mach-O) implements this trait to provide +/// its own dependency discovery and binary patching logic. +pub trait WheelRepairer { + /// Audit an artifact for platform compliance and find external libraries + /// that need to be bundled. + /// + /// Returns the determined platform policy and the list of external shared + /// library dependencies. + fn audit( + &self, + artifact: &BuildArtifact, + ld_paths: Vec, + ) -> Result<(super::Policy, Vec)>; + + /// Patch binary references after libraries have been grafted. + /// + /// This is called after [`prepare_grafted_libs`] has copied and + /// hash-renamed all external libraries. Implementations should: + /// + /// 1. Rewrite references in each artifact to point to the new names + /// 2. Set appropriate metadata on grafted libraries (soname, install ID, etc.) + /// 3. Update cross-references between grafted libraries + /// 4. Perform any final steps (e.g., code signing on macOS) + fn patch( + &self, + artifacts: &[&BuildArtifact], + grafted: &[GraftedLib], + libs_dir: &Path, + artifact_dir: &Path, + ) -> Result<()>; + + /// Return the wheel-internal directory name for grafted libraries. + /// + /// macOS uses `.dylibs` (matching delocate convention), + /// Linux uses `.libs` (matching auditwheel convention). + fn libs_dir(&self, dist_name: &str) -> PathBuf { + PathBuf::from(format!("{dist_name}.libs")) + } +} + +/// Prepare external libraries for grafting into a wheel. +/// +/// For each library: +/// 1. Resolves the real path on disk (fails if not found) +/// 2. Generates a hash-suffixed filename to avoid DLL hell +/// 3. Copies to `temp_dir` and makes the copy writable +/// +/// Returns the prepared libraries and the set of original paths that were copied. +/// +/// Deduplication is by `realpath` (the actual file on disk). When the same +/// file is referenced via multiple install names (common on macOS), only one +/// copy is made, but all original names are recorded as aliases. +pub fn prepare_grafted_libs( + ext_libs: &[Vec], + temp_dir: &Path, +) -> Result<(Vec, HashSet)> { + let mut grafted = Vec::new(); + let mut libs_copied = HashSet::new(); + let mut realpath_to_idx: std::collections::HashMap = + std::collections::HashMap::new(); + + for lib in ext_libs.iter().flatten() { + let source_path = lib.realpath.clone().with_context(|| { + format!( + "Cannot repair wheel, because required library {} could not be located.", + lib.path.display() + ) + })?; + + // Check if we've already copied this exact file (by realpath). + if let Some(&idx) = realpath_to_idx.get(&source_path) { + let existing: &mut GraftedLib = &mut grafted[idx]; + if lib.name != existing.original_name && !existing.aliases.contains(&lib.name) { + existing.aliases.push(lib.name.clone()); + } + libs_copied.insert(source_path); continue; } - ext_libs.push(lib); + + let new_name = hashed_lib_name(&lib.name, &source_path)?; + let dest_path = temp_dir.join(&new_name); + + fs::copy(&source_path, &dest_path)?; + // Make the copy writable so platform-specific tools can modify it + let mut perms = fs::metadata(&dest_path)?.permissions(); + #[allow(clippy::permissions_set_readonly_false)] + perms.set_readonly(false); + fs::set_permissions(&dest_path, perms)?; + + let idx = grafted.len(); + realpath_to_idx.insert(source_path.clone(), idx); + libs_copied.insert(source_path); + + grafted.push(GraftedLib { + original_name: lib.name.clone(), + aliases: Vec::new(), + new_name, + dest_path, + needed: lib.needed.clone(), + rpath: lib.rpath.clone(), + }); + } + + Ok((grafted, libs_copied)) +} + +/// Extract the leaf filename from a library name. +/// +/// Library names can be full paths on macOS (e.g., `/usr/local/lib/libfoo.dylib` +/// or `@rpath/libfoo.dylib`). This returns just the filename component. +pub(crate) fn leaf_filename(lib_name: &str) -> &str { + Path::new(lib_name) + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or(lib_name) +} + +/// Generate a hash-suffixed filename for a library to avoid collisions. +/// +/// Takes the leaf filename from `lib_name` (which may be a full path on macOS), +/// computes a short hash of the file content, and inserts it before the first +/// extension dot. +/// +/// Examples: +/// - `libfoo.so.1` + hash `ab12cd34` → `libfoo-ab12cd34.so.1` +/// - `/usr/local/lib/libbar.dylib` + hash `ef56gh78` → `libbar-ef56gh78.dylib` +pub(crate) fn hashed_lib_name(lib_name: &str, lib_path: &Path) -> Result { + let short_hash = &hash_file(lib_path) + .with_context(|| format!("Failed to hash library {}", lib_path.display()))?[..8]; + + let leaf = leaf_filename(lib_name); + + Ok(if let Some(pos) = leaf.find('.') { + let (stem, ext) = leaf.split_at(pos); + if stem.ends_with(&format!("-{short_hash}")) { + leaf.to_string() + } else { + format!("{stem}-{short_hash}{ext}") + } + } else { + format!("{leaf}-{short_hash}") + }) +} + +/// Log which libraries were grafted into the wheel. +pub fn log_grafted_libs(libs_copied: &HashSet, libs_dir: &Path) { + let mut grafted_paths: Vec<&PathBuf> = libs_copied.iter().collect(); + grafted_paths.sort(); + + eprintln!( + "🖨 Copied external shared libraries to package {} directory:", + libs_dir.display() + ); + for lib_path in &grafted_paths { + eprintln!(" {}", lib_path.display()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn test_leaf_filename() { + assert_eq!(leaf_filename("libfoo.so.1"), "libfoo.so.1"); + assert_eq!(leaf_filename("/usr/local/lib/libfoo.dylib"), "libfoo.dylib"); + assert_eq!(leaf_filename("@rpath/libfoo.dylib"), "libfoo.dylib"); + } + + #[test] + fn test_hashed_lib_name() { + let tmp_dir = tempfile::tempdir().unwrap(); + let lib_path = tmp_dir.path().join("libfoo.so.1"); + { + let mut f = fs_err::File::create(&lib_path).unwrap(); + f.write_all(b"fake library content").unwrap(); + } + let name = hashed_lib_name("libfoo.so.1", &lib_path).unwrap(); + // Should have format: libfoo-XXXXXXXX.so.1 + assert!(name.starts_with("libfoo-")); + assert!(name.ends_with(".so.1")); + assert_eq!(name.len(), "libfoo-".len() + 8 + ".so.1".len()); + + // Idempotent: calling with already-hashed name should not double-hash + let name2 = hashed_lib_name(&name, &lib_path).unwrap(); + assert_eq!(name, name2); + } + + #[test] + fn test_hashed_lib_name_macos_path() { + let tmp_dir = tempfile::tempdir().unwrap(); + let lib_path = tmp_dir.path().join("libbar.dylib"); + { + let mut f = fs_err::File::create(&lib_path).unwrap(); + f.write_all(b"fake dylib content").unwrap(); + } + let name = hashed_lib_name("/usr/local/lib/libbar.dylib", &lib_path).unwrap(); + assert!(name.starts_with("libbar-")); + assert!(name.ends_with(".dylib")); } - Ok(ext_libs) } From ab099f4ab0c8000d7c60d8a921a60add054588da Mon Sep 17 00:00:00 2001 From: messense Date: Tue, 31 Mar 2026 21:47:37 +0800 Subject: [PATCH 2/7] feat: create ElfRepairer implementing WheelRepairer for Linux Add src/auditwheel/linux.rs with ElfRepairer that delegates to the existing audit logic in audit.rs and uses patchelf for binary patching (SONAME, DT_NEEDED, RPATH). Update mod.rs to export ElfRepairer. --- src/auditwheel/audit.rs | 319 +----------------------------- src/auditwheel/linux.rs | 426 ++++++++++++++++++++++++++++++++++++++++ src/auditwheel/mod.rs | 3 +- 3 files changed, 431 insertions(+), 317 deletions(-) create mode 100644 src/auditwheel/linux.rs diff --git a/src/auditwheel/audit.rs b/src/auditwheel/audit.rs index 7365c91de..623421f19 100644 --- a/src/auditwheel/audit.rs +++ b/src/auditwheel/audit.rs @@ -6,7 +6,6 @@ use crate::target::{Arch, Target}; use anyhow::{Context, Result, bail}; use fs_err::File; use goblin::elf::{Elf, sym::STB_WEAK, sym::STT_FUNC}; -use lddtree::Library; use once_cell::sync::Lazy; use regex::Regex; use serde::{Deserialize, Serialize}; @@ -15,9 +14,8 @@ use std::io::Read; use std::path::{Path, PathBuf}; use std::{fmt, io}; use thiserror::Error; -use tracing::debug; -static IS_LIBPYTHON: Lazy = +pub(crate) static IS_LIBPYTHON: Lazy = Lazy::new(|| Regex::new(r"^libpython3\.\d+m?u?t?\.so\.\d+\.\d+$").unwrap()); /// Returns `true` if the given shared-library name is a dynamic linker @@ -110,7 +108,7 @@ impl VersionedLibrary { /// Parse version strings (e.g. "GLIBC_2.17") into a map of name -> set of versions. /// e.g. {"GLIBC" -> {"2.17", "2.5"}, "GCC" -> {"3.0"}} /// - fn parsed_versions(&self) -> HashMap> { + pub(crate) fn parsed_versions(&self) -> HashMap> { let mut result: HashMap> = HashMap::new(); for v in &self.versions { if let Some((name, version)) = v.split_once('_') { @@ -291,7 +289,7 @@ fn policy_is_satisfied( } } -fn get_default_platform_policies() -> Vec { +pub(crate) fn get_default_platform_policies() -> Vec { if let Ok(Some(musl_libc)) = find_musl_libc() && let Ok(Some((major, minor))) = get_musl_version(musl_libc) { @@ -501,235 +499,6 @@ pub fn get_sysroot_path(target: &Target) -> Result { Ok(PathBuf::from("/")) } -/// Find external shared library dependencies (Linux/ELF specific) -#[allow(clippy::result_large_err)] -pub fn find_external_libs( - artifact: impl AsRef, - policy: &Policy, - sysroot: PathBuf, - ld_paths: Vec, -) -> Result, AuditWheelError> { - let dep_analyzer = lddtree::DependencyAnalyzer::new(sysroot).library_paths(ld_paths); - let deps = dep_analyzer - .analyze(artifact) - .map_err(AuditWheelError::DependencyAnalysisError)?; - let mut ext_libs = Vec::new(); - for (_, lib) in deps.libraries { - let name = &lib.name; - // Skip dynamic linker/loader, musl libc, and white-listed libs - if is_dynamic_linker(name) - || name.starts_with("libc.") - || policy.lib_whitelist.contains(name) - { - continue; - } - ext_libs.push(lib); - } - Ok(ext_libs) -} - -/// For the given compilation result, return the manylinux platform and the external libs -/// we need to add to repair it -pub fn get_policy_and_libs( - artifact: &BuildArtifact, - platform_tag: Option, - target: &Target, - manifest_path: &Path, - allow_linking_libpython: bool, -) -> Result<(Policy, Vec)> { - let (policy, should_repair) = - auditwheel_rs(artifact, target, platform_tag, allow_linking_libpython).with_context( - || { - if let Some(platform_tag) = platform_tag { - format!("Error ensuring {platform_tag} compliance") - } else { - "Error checking for manylinux/musllinux compliance".to_string() - } - }, - )?; - let external_libs = if should_repair { - let sysroot = get_sysroot_path(target).unwrap_or_else(|_| PathBuf::from("/")); - let mut ld_paths: Vec = artifact.linked_paths.iter().map(PathBuf::from).collect(); - - // Add library search paths from RUSTFLAGS - if let Some(rustflags_paths) = extract_rustflags_library_paths(manifest_path, target) { - ld_paths.extend(rustflags_paths); - } - - let external_libs = find_external_libs(&artifact.path, &policy, sysroot, ld_paths) - .with_context(|| { - if let Some(platform_tag) = platform_tag { - format!("Error repairing wheel for {platform_tag} compliance") - } else { - "Error repairing wheel for manylinux/musllinux compliance".to_string() - } - })?; - if allow_linking_libpython { - external_libs - .into_iter() - .filter(|lib| !IS_LIBPYTHON.is_match(&lib.name)) - .collect() - } else { - external_libs - } - } else { - Vec::new() - }; - - // Check external libraries for versioned symbol requirements that may - // require a stricter (less compatible, e.g. newer manylinux) policy than what - // the main artifact alone would need. See https://github.com/PyO3/maturin/issues/1490 - let policy = if !external_libs.is_empty() { - let (adjusted, offenders) = check_external_libs_policy(&policy, &external_libs, target)?; - if platform_tag.is_some() && !offenders.is_empty() { - let tag_kind = if policy.name.starts_with("musllinux") { - "musllinux" - } else { - "manylinux" - }; - bail!( - "External libraries {offenders:?} require newer symbol versions than {policy} allows. \ - Consider using --compatibility {adjusted} or a newer {tag_kind} tag" - ); - } - adjusted - } else { - policy - }; - - Ok((policy, external_libs)) -} - -/// Return the symbol versions required by external libraries that are not -/// allowed by the given policy, e.g. `["GLIBC_2.29", "GLIBC_2.33"]`. -fn unsatisfied_symbol_versions( - policy: &Policy, - arch: &str, - versioned_libraries: &[VersionedLibrary], -) -> Vec { - let arch_versions = match policy.symbol_versions.get(arch) { - Some(v) => v, - None => return vec!["(unsupported arch)".to_string()], - }; - let mut unsatisfied = Vec::new(); - for library in versioned_libraries { - if !policy.lib_whitelist.contains(&library.name) { - continue; - } - for (name, versions_needed) in library.parsed_versions() { - match arch_versions.get(&name) { - Some(versions_allowed) => { - for v in versions_needed.difference(versions_allowed) { - unsatisfied.push(format!("{name}_{v}")); - } - } - None => { - for v in &versions_needed { - unsatisfied.push(format!("{name}_{v}")); - } - } - } - } - } - unsatisfied.sort(); - unsatisfied -} - -/// Check if external libraries require a newer glibc than the current policy allows. -/// Returns the adjusted policy and a list of `"libfoo.so (GLIBC_2.29, GLIBC_2.33)"` -/// descriptions for libraries that caused a downgrade. -fn check_external_libs_policy( - policy: &Policy, - external_libs: &[Library], - target: &Target, -) -> Result<(Policy, Vec)> { - let arch = target.target_arch().to_string(); - let mut platform_policies = if policy.name.starts_with("musllinux") { - MUSLLINUX_POLICIES.clone() - } else if policy.name.starts_with("manylinux") { - MANYLINUX_POLICIES.clone() - } else { - get_default_platform_policies() - }; - for p in &mut platform_policies { - p.fixup_musl_libc_so_name(target.target_arch()); - } - // Policies must be sorted from highest to lowest priority so we find the - // best (most compatible) match first when iterating. - debug_assert!( - platform_policies - .windows(2) - .all(|w| w[0].priority >= w[1].priority) - ); - - let mut result = policy.clone(); - let mut offenders = Vec::new(); - for lib in external_libs { - let lib_path = match lib.realpath.as_ref() { - Some(path) => path, - None => continue, - }; - let buffer = fs_err::read(lib_path) - .with_context(|| format!("Failed to read external library {}", lib_path.display()))?; - let elf = match Elf::parse(&buffer) { - Ok(elf) => elf, - Err(_) => continue, - }; - let versioned_libraries = find_versioned_libraries(&elf); - if versioned_libraries.is_empty() { - continue; - } - - // Find the highest policy that this external library satisfies - let unsatisfied = unsatisfied_symbol_versions(&result, &arch, &versioned_libraries); - if unsatisfied.is_empty() { - continue; - } - for candidate in platform_policies.iter() { - if candidate.priority > result.priority { - continue; - } - if unsatisfied_symbol_versions(candidate, &arch, &versioned_libraries).is_empty() { - if candidate.priority < result.priority { - debug!( - "Downgrading tag to {candidate} because external library {} requires {}", - lib.name, - unsatisfied.join(", "), - ); - offenders.push(format!("{} ({})", lib.name, unsatisfied.join(", "))); - result = candidate.clone(); - } - break; - } - } - } - Ok((result, offenders)) -} - -/// Extract library search paths from RUSTFLAGS configuration -#[cfg_attr(test, allow(dead_code))] -fn extract_rustflags_library_paths(manifest_path: &Path, target: &Target) -> Option> { - let manifest_dir = manifest_path.parent()?; - let config = cargo_config2::Config::load_with_cwd(manifest_dir).ok()?; - let rustflags = config.rustflags(target.target_triple()).ok()??; - - // Encode the rustflags for parsing with the rustflags crate - let encoded = rustflags.encode().ok()?; - - let mut library_paths = Vec::new(); - for flag in rustflags::from_encoded(encoded.as_ref()) { - if let rustflags::Flag::LibrarySearchPath { kind: _, path } = flag { - library_paths.push(path); - } - } - - if library_paths.is_empty() { - None - } else { - Some(library_paths) - } -} - pub fn relpath(to: &Path, from: &Path) -> PathBuf { let mut suffix_pos = 0; for (f, t) in from.components().zip(to.components()) { @@ -753,7 +522,6 @@ pub fn relpath(to: &Path, from: &Path) -> PathBuf { #[cfg(test)] mod tests { - use crate::Target; use crate::auditwheel::audit::relpath; use pretty_assertions::assert_eq; use std::path::Path; @@ -772,85 +540,4 @@ mod tests { assert_eq!(result, Path::new(expected)); } } - - #[test] - fn test_extract_rustflags_library_paths() { - // Create a temporary directory with a Cargo.toml and .cargo/config.toml - let temp_dir = tempfile::tempdir().unwrap(); - let manifest_path = temp_dir.path().join("Cargo.toml"); - let cargo_dir = temp_dir.path().join(".cargo"); - let config_path = cargo_dir.join("config.toml"); - - // Create the directories - fs_err::create_dir_all(&cargo_dir).unwrap(); - - // Create a minimal Cargo.toml - fs_err::write( - &manifest_path, - r#" -[package] -name = "test-package" -version = "0.1.0" -edition = "2021" -"#, - ) - .unwrap(); - - // Create a config.toml with rustflags containing -L options - fs_err::write( - &config_path, - r#" -[build] -rustflags = ["-L", "dependency=/usr/local/lib", "-L", "/some/other/path", "-C", "opt-level=3"] -"#, - ) - .unwrap(); - - // Test the function - let target = Target::from_target_triple(None).unwrap(); - let paths = super::extract_rustflags_library_paths(&manifest_path, &target); - - if let Some(paths) = paths { - assert_eq!(paths.len(), 2); - assert!( - paths - .iter() - .any(|p| p.to_string_lossy() == "/usr/local/lib") - ); - assert!( - paths - .iter() - .any(|p| p.to_string_lossy() == "/some/other/path") - ); - } else { - // It's possible that rustflags parsing fails in some environments, - // so we just verify the function doesn't panic - println!("No rustflags library paths found, which is acceptable"); - } - } - - #[test] - fn test_extract_rustflags_library_paths_no_config() { - // Test with a directory that has no cargo config - let temp_dir = tempfile::tempdir().unwrap(); - let manifest_path = temp_dir.path().join("Cargo.toml"); - - // Create a minimal Cargo.toml - fs_err::write( - &manifest_path, - r#" -[package] -name = "test-package" -version = "0.1.0" -edition = "2021" -"#, - ) - .unwrap(); - - let target = Target::from_target_triple(None).unwrap(); - let paths = super::extract_rustflags_library_paths(&manifest_path, &target); - - // Should return None when there's no cargo config with rustflags - assert!(paths.is_none()); - } } diff --git a/src/auditwheel/linux.rs b/src/auditwheel/linux.rs new file mode 100644 index 000000000..20c65e3cc --- /dev/null +++ b/src/auditwheel/linux.rs @@ -0,0 +1,426 @@ +//! Linux/ELF wheel audit and repair. +//! +//! This module implements [`WheelRepairer`] for Linux ELF binaries, +//! providing the Rust equivalent of [auditwheel](https://github.com/pypa/auditwheel). +//! +//! Delegates to the ELF compliance audit in [`super::audit`] and uses +//! `patchelf` for binary patching (SONAME, DT_NEEDED, RPATH). + +use super::audit::{ + AuditWheelError, IS_LIBPYTHON, VersionedLibrary, auditwheel_rs, find_versioned_libraries, + get_default_platform_policies, get_sysroot_path, is_dynamic_linker, relpath, +}; +use super::policy::{MANYLINUX_POLICIES, MUSLLINUX_POLICIES}; +use super::repair::{GraftedLib, WheelRepairer}; +use super::{PlatformTag, Policy, patchelf}; +use crate::compile::BuildArtifact; +use crate::target::Target; +use anyhow::{Context, Result, bail}; +use goblin::elf::Elf; +use lddtree::Library; +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; +use tracing::debug; + +/// Linux/ELF wheel repairer (auditwheel equivalent). +/// +/// Bundles external `.so` files and rewrites ELF metadata (SONAME, DT_NEEDED, +/// RPATH) using `patchelf` so that `$ORIGIN`-relative references resolve to +/// the bundled copies in the `.libs/` directory. +/// +/// Unlike the macOS repairer, `audit()` performs full +/// manylinux/musllinux compliance checking — the returned [`Policy`] +/// determines which `manylinux_X_Y` / `musllinux_X_Y` platform tag the wheel +/// qualifies for. +pub struct ElfRepairer { + /// The requested platform tag (e.g., manylinux_2_17), if any. + pub platform_tag: Option, + /// The build target (architecture + OS). + pub target: Target, + /// Path to the project's Cargo.toml (used to extract RUSTFLAGS library paths). + pub manifest_path: PathBuf, + /// Whether the artifact is allowed to link libpython (bin bindings only). + pub allow_linking_libpython: bool, +} + +impl WheelRepairer for ElfRepairer { + fn audit( + &self, + artifact: &BuildArtifact, + _ld_paths: Vec, + ) -> Result<(Policy, Vec)> { + get_policy_and_libs( + artifact, + self.platform_tag, + &self.target, + &self.manifest_path, + self.allow_linking_libpython, + ) + } + + fn patch( + &self, + artifacts: &[&BuildArtifact], + grafted: &[GraftedLib], + libs_dir: &Path, + artifact_dir: &Path, + ) -> Result<()> { + patchelf::verify_patchelf()?; + + // Build a lookup from original name → new soname for rewriting references. + let mut name_map: BTreeMap<&str, &str> = BTreeMap::new(); + for l in grafted { + name_map.insert(l.original_name.as_str(), l.new_name.as_str()); + for alias in &l.aliases { + name_map.insert(alias.as_str(), l.new_name.as_str()); + } + } + + // Set soname and rpath on each grafted library. + for lib in grafted { + patchelf::set_soname(&lib.dest_path, &lib.new_name)?; + if !lib.rpath.is_empty() { + patchelf::set_rpath(&lib.dest_path, &"$ORIGIN".to_string())?; + } + } + + // Rewrite DT_NEEDED in each artifact to reference new sonames. + let replacements: Vec<_> = name_map.iter().map(|(k, v)| (*k, v.to_string())).collect(); + for artifact in artifacts { + if !replacements.is_empty() { + patchelf::replace_needed(&artifact.path, &replacements)?; + } + } + + // Update cross-references between grafted libraries + for lib in grafted { + let lib_replacements: Vec<_> = lib + .needed + .iter() + .filter_map(|n| { + name_map + .get(n.as_str()) + .map(|new| (n.as_str(), new.to_string())) + }) + .collect(); + if !lib_replacements.is_empty() { + patchelf::replace_needed(&lib.dest_path, &lib_replacements)?; + } + } + + // Set RPATH on artifacts to find the libs directory + for artifact in artifacts { + let mut new_rpaths = patchelf::get_rpath(&artifact.path)?; + let new_rpath = Path::new("$ORIGIN").join(relpath(libs_dir, artifact_dir)); + new_rpaths.push(new_rpath.to_str().unwrap().to_string()); + let new_rpath = new_rpaths.join(":"); + patchelf::set_rpath(&artifact.path, &new_rpath)?; + } + + Ok(()) + } +} + +/// Find external shared library dependencies (Linux/ELF specific). +/// +/// Uses lddtree to resolve dependencies, then filters out the dynamic linker, +/// musl libc, and libraries on the policy whitelist. +#[allow(clippy::result_large_err)] +fn find_external_libs( + artifact: impl AsRef, + policy: &Policy, + sysroot: PathBuf, + ld_paths: Vec, +) -> Result, AuditWheelError> { + let dep_analyzer = lddtree::DependencyAnalyzer::new(sysroot).library_paths(ld_paths); + let deps = dep_analyzer + .analyze(artifact) + .map_err(AuditWheelError::DependencyAnalysisError)?; + let mut ext_libs = Vec::new(); + for (_, lib) in deps.libraries { + let name = &lib.name; + // Skip dynamic linker/loader, musl libc, and white-listed libs + if is_dynamic_linker(name) + || name.starts_with("libc.") + || policy.lib_whitelist.contains(name) + { + continue; + } + ext_libs.push(lib); + } + Ok(ext_libs) +} + +/// For the given compilation result, return the manylinux/musllinux policy and +/// the external libs we need to add to repair it. +fn get_policy_and_libs( + artifact: &BuildArtifact, + platform_tag: Option, + target: &Target, + manifest_path: &Path, + allow_linking_libpython: bool, +) -> Result<(Policy, Vec)> { + let (policy, should_repair) = + auditwheel_rs(artifact, target, platform_tag, allow_linking_libpython).with_context( + || { + if let Some(platform_tag) = platform_tag { + format!("Error ensuring {platform_tag} compliance") + } else { + "Error checking for manylinux/musllinux compliance".to_string() + } + }, + )?; + let external_libs = if should_repair { + let sysroot = get_sysroot_path(target).unwrap_or_else(|_| PathBuf::from("/")); + let mut ld_paths: Vec = artifact.linked_paths.iter().map(PathBuf::from).collect(); + + // Add library search paths from RUSTFLAGS + if let Some(rustflags_paths) = extract_rustflags_library_paths(manifest_path, target) { + ld_paths.extend(rustflags_paths); + } + + let external_libs = find_external_libs(&artifact.path, &policy, sysroot, ld_paths) + .with_context(|| { + if let Some(platform_tag) = platform_tag { + format!("Error repairing wheel for {platform_tag} compliance") + } else { + "Error repairing wheel for manylinux/musllinux compliance".to_string() + } + })?; + if allow_linking_libpython { + external_libs + .into_iter() + .filter(|lib| !IS_LIBPYTHON.is_match(&lib.name)) + .collect() + } else { + external_libs + } + } else { + Vec::new() + }; + + // Check external libraries for versioned symbol requirements that may + // require a stricter (less compatible, e.g. newer manylinux) policy than what + // the main artifact alone would need. See https://github.com/PyO3/maturin/issues/1490 + let policy = if !external_libs.is_empty() { + let (adjusted, offenders) = check_external_libs_policy(&policy, &external_libs, target)?; + if platform_tag.is_some() && !offenders.is_empty() { + let tag_kind = if policy.name.starts_with("musllinux") { + "musllinux" + } else { + "manylinux" + }; + bail!( + "External libraries {offenders:?} require newer symbol versions than {policy} allows. \ + Consider using --compatibility {adjusted} or a newer {tag_kind} tag" + ); + } + adjusted + } else { + policy + }; + + Ok((policy, external_libs)) +} + +/// Return the symbol versions required by external libraries that are not +/// allowed by the given policy. +fn unsatisfied_symbol_versions( + policy: &Policy, + arch: &str, + versioned_libraries: &[VersionedLibrary], +) -> Vec { + let arch_versions = match policy.symbol_versions.get(arch) { + Some(v) => v, + None => return vec!["(unsupported arch)".to_string()], + }; + let mut unsatisfied = Vec::new(); + for library in versioned_libraries { + if !policy.lib_whitelist.contains(&library.name) { + continue; + } + for (name, versions_needed) in library.parsed_versions() { + match arch_versions.get(&name) { + Some(versions_allowed) => { + for v in versions_needed.difference(versions_allowed) { + unsatisfied.push(format!("{name}_{v}")); + } + } + None => { + for v in &versions_needed { + unsatisfied.push(format!("{name}_{v}")); + } + } + } + } + } + unsatisfied.sort(); + unsatisfied +} + +/// Check if external libraries require a newer glibc than the current policy allows. +/// Returns the adjusted policy and a list of descriptions for libraries that caused +/// a downgrade. +fn check_external_libs_policy( + policy: &Policy, + external_libs: &[Library], + target: &Target, +) -> Result<(Policy, Vec)> { + let arch = target.target_arch().to_string(); + let mut platform_policies = if policy.name.starts_with("musllinux") { + MUSLLINUX_POLICIES.clone() + } else if policy.name.starts_with("manylinux") { + MANYLINUX_POLICIES.clone() + } else { + get_default_platform_policies() + }; + for p in &mut platform_policies { + p.fixup_musl_libc_so_name(target.target_arch()); + } + debug_assert!( + platform_policies + .windows(2) + .all(|w| w[0].priority >= w[1].priority) + ); + + let mut result = policy.clone(); + let mut offenders = Vec::new(); + for lib in external_libs { + let lib_path = match lib.realpath.as_ref() { + Some(path) => path, + None => continue, + }; + let buffer = fs_err::read(lib_path) + .with_context(|| format!("Failed to read external library {}", lib_path.display()))?; + let elf = match Elf::parse(&buffer) { + Ok(elf) => elf, + Err(_) => continue, + }; + let versioned_libraries = find_versioned_libraries(&elf); + if versioned_libraries.is_empty() { + continue; + } + + let unsatisfied = unsatisfied_symbol_versions(&result, &arch, &versioned_libraries); + if unsatisfied.is_empty() { + continue; + } + for candidate in platform_policies.iter() { + if candidate.priority > result.priority { + continue; + } + if unsatisfied_symbol_versions(candidate, &arch, &versioned_libraries).is_empty() { + if candidate.priority < result.priority { + debug!( + "Downgrading tag to {candidate} because external library {} requires {}", + lib.name, + unsatisfied.join(", "), + ); + offenders.push(format!("{} ({})", lib.name, unsatisfied.join(", "))); + result = candidate.clone(); + } + break; + } + } + } + Ok((result, offenders)) +} + +/// Extract library search paths from RUSTFLAGS configuration. +#[cfg_attr(test, allow(dead_code))] +fn extract_rustflags_library_paths(manifest_path: &Path, target: &Target) -> Option> { + let manifest_dir = manifest_path.parent()?; + let config = cargo_config2::Config::load_with_cwd(manifest_dir).ok()?; + let rustflags = config.rustflags(target.target_triple()).ok()??; + + let encoded = rustflags.encode().ok()?; + + let mut library_paths = Vec::new(); + for flag in rustflags::from_encoded(encoded.as_ref()) { + if let rustflags::Flag::LibrarySearchPath { kind: _, path } = flag { + library_paths.push(path); + } + } + + if library_paths.is_empty() { + None + } else { + Some(library_paths) + } +} + +#[cfg(test)] +mod tests { + use crate::Target; + + #[test] + fn test_extract_rustflags_library_paths() { + let temp_dir = tempfile::tempdir().unwrap(); + let manifest_path = temp_dir.path().join("Cargo.toml"); + let cargo_dir = temp_dir.path().join(".cargo"); + let config_path = cargo_dir.join("config.toml"); + + fs_err::create_dir_all(&cargo_dir).unwrap(); + + fs_err::write( + &manifest_path, + r#" +[package] +name = "test-package" +version = "0.1.0" +edition = "2021" +"#, + ) + .unwrap(); + + fs_err::write( + &config_path, + r#" +[build] +rustflags = ["-L", "dependency=/usr/local/lib", "-L", "/some/other/path", "-C", "opt-level=3"] +"#, + ) + .unwrap(); + + let target = Target::from_target_triple(None).unwrap(); + let paths = super::extract_rustflags_library_paths(&manifest_path, &target); + + if let Some(paths) = paths { + assert_eq!(paths.len(), 2); + assert!( + paths + .iter() + .any(|p| p.to_string_lossy() == "/usr/local/lib") + ); + assert!( + paths + .iter() + .any(|p| p.to_string_lossy() == "/some/other/path") + ); + } else { + println!("No rustflags library paths found, which is acceptable"); + } + } + + #[test] + fn test_extract_rustflags_library_paths_no_config() { + let temp_dir = tempfile::tempdir().unwrap(); + let manifest_path = temp_dir.path().join("Cargo.toml"); + + fs_err::write( + &manifest_path, + r#" +[package] +name = "test-package" +version = "0.1.0" +edition = "2021" +"#, + ) + .unwrap(); + + let target = Target::from_target_triple(None).unwrap(); + let paths = super::extract_rustflags_library_paths(&manifest_path, &target); + + assert!(paths.is_none()); + } +} diff --git a/src/auditwheel/mod.rs b/src/auditwheel/mod.rs index 588ccbd55..bda1d49a0 100644 --- a/src/auditwheel/mod.rs +++ b/src/auditwheel/mod.rs @@ -1,4 +1,5 @@ mod audit; +mod linux; mod musllinux; pub mod patchelf; mod platform_tag; @@ -9,8 +10,8 @@ pub mod sbom; #[cfg(feature = "sbom")] mod whichprovides; -pub use audit::find_external_libs; pub use audit::*; +pub use linux::ElfRepairer; pub use platform_tag::PlatformTag; pub use policy::Policy; pub use repair::{WheelRepairer, log_grafted_libs, prepare_grafted_libs}; From 6c971ee5a3724f91d8f188a0ac71089d0ff10745 Mon Sep 17 00:00:00 2001 From: messense Date: Tue, 31 Mar 2026 21:49:14 +0800 Subject: [PATCH 3/7] refactor: use WheelRepairer trait in build_context/repair.rs Refactor add_external_libs() to use prepare_grafted_libs() for shared library preparation and WheelRepairer::patch() for platform-specific binary patching. Add make_repairer() that creates the appropriate repairer based on target platform. Extract get_artifact_dir() helper. The auditwheel() method now delegates to WheelRepairer::audit() via the ElfRepairer on Linux. macOS and Windows stubs return defaults (to be implemented in later commits). No behavior change for Linux builds. --- src/build_context/repair.rs | 226 +++++++++++++----------------------- 1 file changed, 79 insertions(+), 147 deletions(-) diff --git a/src/build_context/repair.rs b/src/build_context/repair.rs index 3e1d476a0..8d8dcb8d8 100644 --- a/src/build_context/repair.rs +++ b/src/build_context/repair.rs @@ -1,24 +1,61 @@ #[cfg(feature = "sbom")] use crate::auditwheel::get_sysroot_path; use crate::auditwheel::{ - AuditWheelMode, PlatformTag, Policy, get_policy_and_libs, patchelf, relpath, + AuditWheelMode, ElfRepairer, PlatformTag, Policy, WheelRepairer, log_grafted_libs, patchelf, + prepare_grafted_libs, }; #[cfg(feature = "sbom")] use crate::module_writer::ModuleWriter; use crate::module_writer::WheelWriter; -use crate::util::hash_file; use crate::{BridgeModel, BuildArtifact, PythonInterpreter, VirtualWriter}; use anyhow::{Context, Result, bail}; use fs_err as fs; use lddtree::Library; use normpath::PathExt; use std::borrow::Borrow; -use std::collections::{BTreeMap, HashSet}; use std::path::{Path, PathBuf}; use super::BuildContext; impl BuildContext { + /// Create the appropriate platform-specific wheel repairer. + fn make_repairer(&self, platform_tag: &[PlatformTag]) -> Option> { + if self.project.target.is_linux() { + let mut musllinux: Vec<_> = platform_tag + .iter() + .filter(|tag| tag.is_musllinux()) + .copied() + .collect(); + musllinux.sort(); + let mut others: Vec<_> = platform_tag + .iter() + .filter(|tag| !tag.is_musllinux()) + .copied() + .collect(); + others.sort(); + + let allow_linking_libpython = self.project.bridge().is_bin(); + + let effective_tag = if self.project.bridge().is_bin() && !musllinux.is_empty() { + Some(musllinux[0]) + } else { + others.first().or_else(|| musllinux.first()).copied() + }; + + Some(Box::new(ElfRepairer { + platform_tag: effective_tag, + target: self.project.target.clone(), + manifest_path: self.project.manifest_path.clone(), + allow_linking_libpython, + })) + } else if self.project.target.is_macos() { + // TODO: MacOSRepairer (Phase 2) + None + } else { + None + } + } + pub(crate) fn auditwheel( &self, artifact: &BuildArtifact, @@ -40,39 +77,31 @@ impl BuildContext { return Ok((Policy::default(), Vec::new())); } - let mut musllinux: Vec<_> = platform_tag - .iter() - .filter(|tag| tag.is_musllinux()) - .copied() - .collect(); - musllinux.sort(); - let mut others: Vec<_> = platform_tag - .iter() - .filter(|tag| !tag.is_musllinux()) - .copied() - .collect(); - others.sort(); + let repairer = match self.make_repairer(platform_tag) { + Some(r) => r, + None => return Ok((Policy::default(), Vec::new())), + }; - // only bin bindings allow linking to libpython, extension modules must not - let allow_linking_libpython = self.project.bridge().is_bin(); - if self.project.bridge().is_bin() && !musllinux.is_empty() { - return get_policy_and_libs( - artifact, - Some(musllinux[0]), - &self.project.target, - &self.project.manifest_path, - allow_linking_libpython, - ); - } + let ld_paths: Vec = artifact.linked_paths.iter().map(PathBuf::from).collect(); + repairer.audit(artifact, ld_paths) + } - let tag = others.first().or_else(|| musllinux.first()).copied(); - get_policy_and_libs( - artifact, - tag, - &self.project.target, - &self.project.manifest_path, - allow_linking_libpython, - ) + /// Compute the wheel-internal directory where the artifact resides. + fn get_artifact_dir(&self) -> PathBuf { + match self.project.bridge() { + // cffi bindings that contains '.' in the module name will be split into directories + BridgeModel::Cffi => self.project.module_name.split(".").collect::(), + // For namespace packages the modules reside at ${module_name}.so + // where periods are replaced with slashes so for example my.namespace.module would reside + // at my/namespace/module.so + _ if self.project.module_name.contains(".") => { + let mut path = self.project.module_name.split(".").collect::(); + path.pop(); + path + } + // For other bindings artifact .so file usually resides at ${module_name}/${module_name}.so + _ => PathBuf::from(&self.project.module_name), + } } /// Add library search paths in Cargo target directory rpath when building in editable mode @@ -142,110 +171,35 @@ impl BuildContext { if matches!(self.python.auditwheel, AuditWheelMode::Check) { bail!( - "Your library is not manylinux/musllinux compliant because it requires copying the above libraries. \ + "Your library requires copying the above external libraries. \ Re-run with `--auditwheel=repair` to copy them." ); } - patchelf::verify_patchelf()?; + let repairer = self + .make_repairer(&self.python.platform_tag) + .context("No wheel repairer available for this platform")?; // Put external libs to ${distribution_name}.libs directory // See https://github.com/pypa/auditwheel/issues/89 // Use the distribution name (matching auditwheel's behavior) to avoid // conflicts with other packages in the same namespace. - let libs_dir = PathBuf::from(format!( - "{}.libs", - self.project.metadata24.get_distribution_escaped() - )); + let dist_name = self.project.metadata24.get_distribution_escaped(); + let libs_dir = repairer.libs_dir(&dist_name); let temp_dir = writer.temp_dir()?; - let mut soname_map = BTreeMap::new(); - let mut libs_copied = HashSet::new(); - for lib in ext_libs.iter().flatten() { - let lib_path = lib.realpath.clone().with_context(|| { - format!( - "Cannot repair wheel, because required library {} could not be located.", - lib.path.display() - ) - })?; - // Generate a new soname with a short hash - let short_hash = &hash_file(&lib_path)?[..8]; - let (file_stem, file_ext) = lib.name.split_once('.').with_context(|| { - format!("Unexpected library name without extension: {}", lib.name) - })?; - let new_soname = if !file_stem.ends_with(&format!("-{short_hash}")) { - format!("{file_stem}-{short_hash}.{file_ext}") - } else { - format!("{file_stem}.{file_ext}") - }; + let (grafted, libs_copied) = prepare_grafted_libs(ext_libs, temp_dir.path())?; - // Copy the original lib to a tmpdir and modify some of its properties - // for example soname and rpath - let dest_path = temp_dir.path().join(&new_soname); - fs::copy(&lib_path, &dest_path)?; - libs_copied.insert(lib_path); + let artifact_dir = self.get_artifact_dir(); + let artifact_refs: Vec<&BuildArtifact> = artifacts.iter().map(|a| a.borrow()).collect(); + repairer.patch(&artifact_refs, &grafted, &libs_dir, &artifact_dir)?; - // fs::copy copies permissions as well, and the original - // file may have been read-only - let mut perms = fs::metadata(&dest_path)?.permissions(); - #[allow(clippy::permissions_set_readonly_false)] - perms.set_readonly(false); - fs::set_permissions(&dest_path, perms)?; - - patchelf::set_soname(&dest_path, &new_soname)?; - if !lib.rpath.is_empty() { - patchelf::set_rpath(&dest_path, &libs_dir)?; - } - soname_map.insert( - lib.name.clone(), - (new_soname.clone(), dest_path.clone(), lib.needed.clone()), - ); - } - - for (artifact, artifact_ext_libs) in artifacts.iter().zip(ext_libs) { - let artifact = artifact.borrow(); - let artifact_deps: HashSet<_> = artifact_ext_libs.iter().map(|lib| &lib.name).collect(); - let replacements = soname_map - .iter() - .filter_map(|(k, v)| { - if artifact_deps.contains(k) { - Some((k, v.0.clone())) - } else { - None - } - }) - .collect::>(); - if !replacements.is_empty() { - patchelf::replace_needed(&artifact.path, &replacements[..])?; - } - } - - // we grafted in a bunch of libraries and modified their sonames, but - // they may have internal dependencies (DT_NEEDED) on one another, so - // we need to update those records so each now knows about the new - // name of the other. - for (new_soname, path, needed) in soname_map.values() { - let mut replacements = Vec::new(); - for n in needed { - if soname_map.contains_key(n) { - replacements.push((n, soname_map[n].0.clone())); - } - } - if !replacements.is_empty() { - patchelf::replace_needed(path, &replacements[..])?; - } - // Use add_file_force to bypass exclusion checks for external shared libraries - writer.add_file_force(libs_dir.join(new_soname), path, true)?; + // Add grafted libraries to the wheel + for lib in &grafted { + writer.add_file_force(libs_dir.join(&lib.new_name), &lib.dest_path, true)?; } - // Sort for deterministic output. - let mut grafted_paths: Vec = libs_copied.into_iter().collect(); - grafted_paths.sort(); - - eprintln!( - "🖨 Copied external shared libraries to package {} directory.", - libs_dir.display() - ); + log_grafted_libs(&libs_copied, &libs_dir); // Generate auditwheel SBOM for the grafted libraries. // This mirrors Python auditwheel's behaviour of writing a CycloneDX @@ -264,6 +218,8 @@ impl BuildContext { // prefixes when querying the host package manager. let sysroot = get_sysroot_path(&self.project.target).unwrap_or_else(|_| PathBuf::from("/")); + let mut grafted_paths: Vec = libs_copied.into_iter().collect(); + grafted_paths.sort(); if let Some(sbom_json) = crate::auditwheel::sbom::create_auditwheel_sbom( &self.project.metadata24.name, &self.project.metadata24.version.to_string(), @@ -280,30 +236,6 @@ impl BuildContext { } } - let artifact_dir = match self.project.bridge() { - // cffi bindings that contains '.' in the module name will be split into directories - BridgeModel::Cffi => self.project.module_name.split(".").collect::(), - // For namespace packages the modules reside at ${module_name}.so - // where periods are replaced with slashes so for example my.namespace.module would reside - // at my/namespace/module.so - _ if self.project.module_name.contains(".") => { - let mut path = self.project.module_name.split(".").collect::(); - path.pop(); - path - } - // For other bindings artifact .so file usually resides at ${module_name}/${module_name}.so - _ => PathBuf::from(&self.project.module_name), - }; - for artifact in artifacts { - let artifact = artifact.borrow(); - let mut new_rpaths = patchelf::get_rpath(&artifact.path)?; - // TODO: clean existing rpath entries if it's not pointed to a location within the wheel - // See https://github.com/pypa/auditwheel/blob/353c24250d66951d5ac7e60b97471a6da76c123f/src/auditwheel/repair.py#L160 - let new_rpath = Path::new("$ORIGIN").join(relpath(&libs_dir, &artifact_dir)); - new_rpaths.push(new_rpath.to_str().unwrap().to_string()); - let new_rpath = new_rpaths.join(":"); - patchelf::set_rpath(&artifact.path, &new_rpath)?; - } Ok(()) } From 82a63b49e12a895088aa1eb1aaded17bb468d583 Mon Sep 17 00:00:00 2001 From: messense Date: Tue, 31 Mar 2026 23:03:24 +0800 Subject: [PATCH 4/7] refactor: move Linux/ELF specific code from audit.rs to linux.rs Move all ELF-specific types and functions into the linux module where they belong, leaving only cross-platform utilities in audit.rs: Moved to linux.rs: - IS_LIBPYTHON, is_dynamic_linker() - AuditWheelError - VersionedLibrary, find_versioned_libraries() - find_incompliant_symbols(), policy_is_satisfied() - get_default_platform_policies() - auditwheel_rs() Remaining in audit.rs: - AuditWheelMode (general config enum) - get_sysroot_path() (used by SBOM + linux) - relpath() (generic path utility) Items that were previously pub(crate) are now private to linux.rs since they have no external consumers. --- src/auditwheel/audit.rs | 420 +----------------------------------- src/auditwheel/linux.rs | 456 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 446 insertions(+), 430 deletions(-) diff --git a/src/auditwheel/audit.rs b/src/auditwheel/audit.rs index 623421f19..1f7fc3cbe 100644 --- a/src/auditwheel/audit.rs +++ b/src/auditwheel/audit.rs @@ -1,76 +1,8 @@ -use super::musllinux::{find_musl_libc, get_musl_version}; -use super::policy::{MANYLINUX_POLICIES, MUSLLINUX_POLICIES, Policy}; -use crate::auditwheel::PlatformTag; -use crate::compile::BuildArtifact; -use crate::target::{Arch, Target}; +use crate::target::Target; use anyhow::{Context, Result, bail}; -use fs_err::File; -use goblin::elf::{Elf, sym::STB_WEAK, sym::STT_FUNC}; -use once_cell::sync::Lazy; -use regex::Regex; use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet}; -use std::io::Read; +use std::fmt; use std::path::{Path, PathBuf}; -use std::{fmt, io}; -use thiserror::Error; - -pub(crate) static IS_LIBPYTHON: Lazy = - Lazy::new(|| Regex::new(r"^libpython3\.\d+m?u?t?\.so\.\d+\.\d+$").unwrap()); - -/// Returns `true` if the given shared-library name is a dynamic linker -/// (e.g. `ld-linux-x86-64.so.2`, `ld64.so.2`, `ld-musl-*.so.1`). -pub(crate) fn is_dynamic_linker(name: &str) -> bool { - name.starts_with("ld-linux") - || name == "ld64.so.2" - || name == "ld64.so.1" - || name.starts_with("ld-musl") -} - -/// Error raised during auditing an elf file for manylinux/musllinux compatibility -#[derive(Error, Debug)] -#[error("Ensuring manylinux/musllinux compliance failed")] -pub enum AuditWheelError { - /// The wheel couldn't be read - #[error("Failed to read the wheel")] - IoError(#[source] io::Error), - /// Reexports goblin parsing errors - #[error("Goblin failed to parse the elf file")] - GoblinError(#[source] goblin::error::Error), - /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending - /// libraries. - #[error( - "Your library links libpython ({0}), which libraries must not do. Have you forgotten to activate the extension-module feature?" - )] - LinksLibPythonError(String), - /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending - /// libraries. - #[error( - "Your library is not {0} compliant because it links the following forbidden libraries: {1:?}" - )] - LinksForbiddenLibrariesError(Policy, Vec), - /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending - /// libraries. - #[error( - "Your library is not {0} compliant because of the presence of too-recent versioned symbols: {1:?}. Consider building in a manylinux docker container" - )] - VersionedSymbolTooNewError(Policy, Vec), - /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending - /// libraries with blacked-list symbols. - #[error("Your library is not {0} compliant because it depends on black-listed symbols: {1:?}")] - BlackListedSymbolsError(Policy, Vec), - /// The elf file isn't manylinux/musllinux compatible. Contains unsupported architecture - #[error("Your library is not {0} compliant because it has unsupported architecture: {1}")] - UnsupportedArchitecture(Policy, String), - /// This platform tag isn't defined by auditwheel yet - #[error( - "{0} compatibility policy is not defined by auditwheel yet, pass `--auditwheel=skip` to proceed anyway" - )] - UndefinedPolicy(PlatformTag), - /// Failed to analyze external shared library dependencies of the wheel - #[error("Failed to analyze external shared library dependencies of the wheel")] - DependencyAnalysisError(#[source] lddtree::Error), -} /// Auditwheel mode #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, clap::ValueEnum)] @@ -96,354 +28,6 @@ impl fmt::Display for AuditWheelMode { } } -#[derive(Clone, Debug)] -pub struct VersionedLibrary { - /// library name - pub name: String, - /// versions needed - versions: HashSet, -} - -impl VersionedLibrary { - /// Parse version strings (e.g. "GLIBC_2.17") into a map of name -> set of versions. - /// e.g. {"GLIBC" -> {"2.17", "2.5"}, "GCC" -> {"3.0"}} - /// - pub(crate) fn parsed_versions(&self) -> HashMap> { - let mut result: HashMap> = HashMap::new(); - for v in &self.versions { - if let Some((name, version)) = v.split_once('_') { - result - .entry(name.to_string()) - .or_default() - .insert(version.to_string()); - } - } - result - } -} - -/// Find required dynamic linked libraries with version information -pub fn find_versioned_libraries(elf: &Elf) -> Vec { - let mut symbols = Vec::new(); - if let Some(verneed) = &elf.verneed { - for need_file in verneed.iter() { - if let Some(name) = elf.dynstrtab.get_at(need_file.vn_file) { - // Skip dynamic linker/loader - if is_dynamic_linker(name) { - continue; - } - let mut versions = HashSet::new(); - for need_ver in need_file.iter() { - if let Some(aux_name) = elf.dynstrtab.get_at(need_ver.vna_name) { - versions.insert(aux_name.to_string()); - } - } - symbols.push(VersionedLibrary { - name: name.to_string(), - versions, - }); - } - } - } - symbols -} - -/// Find incompliant symbols from symbol versions -#[allow(clippy::result_large_err)] -fn find_incompliant_symbols( - elf: &Elf, - symbol_versions: &[String], -) -> Result, AuditWheelError> { - let mut symbols = Vec::new(); - let strtab = &elf.strtab; - for sym in &elf.syms { - if sym.st_type() == STT_FUNC { - let name = strtab.get_at(sym.st_name).unwrap_or("BAD NAME"); - for symbol_version in symbol_versions { - if name.ends_with(&format!("@{symbol_version}")) { - symbols.push(name.to_string()); - } - } - } - } - Ok(symbols) -} - -#[allow(clippy::result_large_err)] -fn policy_is_satisfied( - policy: &Policy, - elf: &Elf, - arch: &str, - deps: &[String], - versioned_libraries: &[VersionedLibrary], - allow_linking_libpython: bool, -) -> Result<(), AuditWheelError> { - let arch_versions = &policy.symbol_versions.get(arch).ok_or_else(|| { - AuditWheelError::UnsupportedArchitecture(policy.clone(), arch.to_string()) - })?; - let mut offending_libs = HashSet::new(); - let mut offending_versioned_syms = HashSet::new(); - let mut offending_blacklist_syms = HashMap::new(); - let undef_symbols: HashSet = elf - .dynsyms - .iter() - .filter_map(|sym| { - // Do not consider weak symbols as undefined, they are optional at runtime. - if sym.st_shndx == goblin::elf::section_header::SHN_UNDEF as usize - && sym.st_bind() != STB_WEAK - { - elf.dynstrtab.get_at(sym.st_name).map(ToString::to_string) - } else { - None - } - }) - .collect(); - - for dep in deps { - if is_dynamic_linker(dep) { - continue; - } - if !policy.lib_whitelist.contains(dep) { - if allow_linking_libpython && IS_LIBPYTHON.is_match(dep) { - continue; - } - offending_libs.insert(dep.clone()); - } - if let Some(sym_list) = policy.blacklist.get(dep) { - let mut intersection: Vec<_> = sym_list.intersection(&undef_symbols).cloned().collect(); - if !intersection.is_empty() { - intersection.sort(); - offending_blacklist_syms.insert(dep, intersection); - } - } - } - for library in versioned_libraries { - if !policy.lib_whitelist.contains(&library.name) { - offending_libs.insert(library.name.clone()); - continue; - } - for (name, versions_needed) in library.parsed_versions() { - let Some(versions_allowed) = arch_versions.get(&name) else { - offending_versioned_syms.insert(format!( - "{} offending versions: unknown symbol namespace {name}", - library.name, - )); - continue; - }; - if !versions_needed.is_subset(versions_allowed) { - let offending_versions: Vec<&str> = versions_needed - .difference(versions_allowed) - .map(|v| v.as_ref()) - .collect(); - let offending_symbol_versions: Vec = offending_versions - .iter() - .map(|v| format!("{name}_{v}")) - .collect(); - let offending_symbols = find_incompliant_symbols(elf, &offending_symbol_versions)?; - let offender = if offending_symbols.is_empty() { - format!( - "{} offending versions: {}", - library.name, - offending_symbol_versions.join(", ") - ) - } else { - format!( - "{} offending symbols: {}", - library.name, - offending_symbols.join(", ") - ) - }; - offending_versioned_syms.insert(offender); - } - } - } - // Check for black-listed symbols - if !offending_blacklist_syms.is_empty() { - let offenders = offending_blacklist_syms - .into_iter() - .map(|(lib, syms)| format!("{}: {}", lib, syms.join(", "))) - .collect(); - return Err(AuditWheelError::BlackListedSymbolsError( - policy.clone(), - offenders, - )); - } - // Check for too-recent versioned symbols - if !offending_versioned_syms.is_empty() { - return Err(AuditWheelError::VersionedSymbolTooNewError( - policy.clone(), - offending_versioned_syms.into_iter().collect(), - )); - } - // Check for libpython and forbidden libraries - let offenders: Vec = offending_libs.into_iter().collect(); - match offenders.as_slice() { - [] => Ok(()), - [lib] if IS_LIBPYTHON.is_match(lib) => { - Err(AuditWheelError::LinksLibPythonError(lib.clone())) - } - offenders => Err(AuditWheelError::LinksForbiddenLibrariesError( - policy.clone(), - offenders.to_vec(), - )), - } -} - -pub(crate) fn get_default_platform_policies() -> Vec { - if let Ok(Some(musl_libc)) = find_musl_libc() - && let Ok(Some((major, minor))) = get_musl_version(musl_libc) - { - return MUSLLINUX_POLICIES - .iter() - .filter(|policy| { - policy.name == "linux" || policy.name == format!("musllinux_{major}_{minor}") - }) - .cloned() - .collect(); - } - MANYLINUX_POLICIES.clone() -} - -/// An reimplementation of auditwheel, which checks elf files for -/// manylinux/musllinux compliance. -/// -/// If `platform_tag`, is None, it returns the the highest matching manylinux/musllinux policy -/// and whether we need to repair with patchelf,, or `linux` if nothing else matches. -/// It will error for bogus cases, e.g. if libpython is linked. -/// -/// If a specific manylinux/musllinux version is given, compliance is checked and a warning printed if -/// a higher version would be possible. -/// -/// Does nothing for `platform_tag` set to `Off`/`Linux` or non-linux platforms. -#[allow(clippy::result_large_err)] -pub fn auditwheel_rs( - artifact: &BuildArtifact, - target: &Target, - platform_tag: Option, - allow_linking_libpython: bool, -) -> Result<(Policy, bool), AuditWheelError> { - if !target.is_linux() || platform_tag == Some(PlatformTag::Linux) { - return Ok((Policy::default(), false)); - } - let path = &artifact.path; - let arch = target.target_arch().to_string(); - let mut file = File::open(path).map_err(AuditWheelError::IoError)?; - let mut buffer = Vec::new(); - file.read_to_end(&mut buffer) - .map_err(AuditWheelError::IoError)?; - let elf = Elf::parse(&buffer).map_err(AuditWheelError::GoblinError)?; - // This returns essentially the same as ldd - let deps: Vec = elf.libraries.iter().map(ToString::to_string).collect(); - let versioned_libraries = find_versioned_libraries(&elf); - - // Find the highest possible policy, if any - let platform_policies = match platform_tag { - Some(PlatformTag::Manylinux { .. }) => MANYLINUX_POLICIES.clone(), - Some(PlatformTag::Musllinux { major, minor }) => MUSLLINUX_POLICIES - .clone() - .into_iter() - .filter(|policy| { - policy.name == "linux" || policy.name == format!("musllinux_{major}_{minor}") - }) - .map(|mut policy| { - policy.fixup_musl_libc_so_name(target.target_arch()); - policy - }) - .collect(), - None | Some(PlatformTag::Pypi) => { - // Using the default for the `pypi` tag means we're correctly using manylinux where - // possible. - let mut policies = get_default_platform_policies(); - for policy in &mut policies { - policy.fixup_musl_libc_so_name(target.target_arch()); - } - policies - } - Some(PlatformTag::Linux) => unreachable!(), - }; - let mut highest_policy = None; - let mut should_repair = false; - for policy in platform_policies.iter() { - let result = policy_is_satisfied( - policy, - &elf, - &arch, - &deps, - &versioned_libraries, - allow_linking_libpython, - ); - match result { - Ok(_) => { - highest_policy = Some(policy.clone()); - should_repair = false; - break; - } - Err(AuditWheelError::LinksForbiddenLibrariesError(..)) => { - highest_policy = Some(policy.clone()); - should_repair = true; - break; - } - Err(AuditWheelError::VersionedSymbolTooNewError(..)) - | Err(AuditWheelError::BlackListedSymbolsError(..)) - // UnsupportedArchitecture happens when trying 2010 with aarch64 - | Err(AuditWheelError::UnsupportedArchitecture(..)) => continue, - // If there was an error parsing the symbols or libpython was linked, - // we error no matter what the requested policy was - Err(err) => return Err(err), - } - } - - let policy = if let Some(platform_tag) = platform_tag { - let mut policy = Policy::from_tag(&platform_tag) - .ok_or(AuditWheelError::UndefinedPolicy(platform_tag))?; - policy.fixup_musl_libc_so_name(target.target_arch()); - - if let Some(highest_policy) = highest_policy { - // Don't recommend manylinux1 because rust doesn't support it anymore - if policy.priority < highest_policy.priority && highest_policy.name != "manylinux_2_5" { - eprintln!( - "📦 Wheel is eligible for a higher priority tag. \ - You requested {policy} but this wheel is eligible for {highest_policy}", - ); - } - } - - match policy_is_satisfied( - &policy, - &elf, - &arch, - &deps, - &versioned_libraries, - allow_linking_libpython, - ) { - Ok(_) => { - should_repair = false; - Ok(policy) - } - Err(AuditWheelError::LinksForbiddenLibrariesError(..)) => { - should_repair = true; - Ok(policy) - } - Err(err) => Err(err), - } - } else if let Some(policy) = highest_policy { - Ok(policy) - } else if target.target_arch() == Arch::Armv6L || target.target_arch() == Arch::Armv7L { - // Old arm versions - // https://github.com/pypi/warehouse/blob/556e1e3390999381c382873b003a779a1363cb4d/warehouse/forklift/legacy.py#L122-L123 - Ok(Policy::default()) - } else { - eprintln!( - "⚠️ Warning: No compatible platform tag found, using the linux tag instead. \ - You won't be able to upload those wheels to PyPI." - ); - - // Fallback to linux - Ok(Policy::default()) - }?; - Ok((policy, should_repair)) -} - /// Get sysroot path from target C compiler /// /// Currently only gcc is supported, clang doesn't have a `--print-sysroot` option diff --git a/src/auditwheel/linux.rs b/src/auditwheel/linux.rs index 20c65e3cc..85c067409 100644 --- a/src/auditwheel/linux.rs +++ b/src/auditwheel/linux.rs @@ -3,25 +3,439 @@ //! This module implements [`WheelRepairer`] for Linux ELF binaries, //! providing the Rust equivalent of [auditwheel](https://github.com/pypa/auditwheel). //! -//! Delegates to the ELF compliance audit in [`super::audit`] and uses -//! `patchelf` for binary patching (SONAME, DT_NEEDED, RPATH). - -use super::audit::{ - AuditWheelError, IS_LIBPYTHON, VersionedLibrary, auditwheel_rs, find_versioned_libraries, - get_default_platform_policies, get_sysroot_path, is_dynamic_linker, relpath, -}; -use super::policy::{MANYLINUX_POLICIES, MUSLLINUX_POLICIES}; +//! It contains all ELF-specific logic: manylinux/musllinux compliance +//! auditing, external dependency discovery via lddtree, versioned symbol +//! checking, and binary patching via `patchelf` (SONAME, DT_NEEDED, RPATH). + +use super::audit::{get_sysroot_path, relpath}; +use super::musllinux::{find_musl_libc, get_musl_version}; +use super::policy::{MANYLINUX_POLICIES, MUSLLINUX_POLICIES, Policy}; use super::repair::{GraftedLib, WheelRepairer}; -use super::{PlatformTag, Policy, patchelf}; +use super::{PlatformTag, patchelf}; use crate::compile::BuildArtifact; -use crate::target::Target; +use crate::target::{Arch, Target}; use anyhow::{Context, Result, bail}; -use goblin::elf::Elf; +use fs_err::File; +use goblin::elf::{Elf, sym::STB_WEAK, sym::STT_FUNC}; use lddtree::Library; -use std::collections::BTreeMap; +use once_cell::sync::Lazy; +use regex::Regex; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::io; +use std::io::Read; use std::path::{Path, PathBuf}; +use thiserror::Error; use tracing::debug; +pub(crate) static IS_LIBPYTHON: Lazy = + Lazy::new(|| Regex::new(r"^libpython3\.\d+m?u?t?\.so\.\d+\.\d+$").unwrap()); + +/// Returns `true` if the given shared-library name is a dynamic linker +/// (e.g. `ld-linux-x86-64.so.2`, `ld64.so.2`, `ld-musl-*.so.1`). +fn is_dynamic_linker(name: &str) -> bool { + name.starts_with("ld-linux") + || name == "ld64.so.2" + || name == "ld64.so.1" + || name.starts_with("ld-musl") +} + +/// Error raised during auditing an elf file for manylinux/musllinux compatibility +#[derive(Error, Debug)] +#[error("Ensuring manylinux/musllinux compliance failed")] +pub enum AuditWheelError { + /// The wheel couldn't be read + #[error("Failed to read the wheel")] + IoError(#[source] io::Error), + /// Reexports goblin parsing errors + #[error("Goblin failed to parse the elf file")] + GoblinError(#[source] goblin::error::Error), + /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending + /// libraries. + #[error( + "Your library links libpython ({0}), which libraries must not do. Have you forgotten to activate the extension-module feature?" + )] + LinksLibPythonError(String), + /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending + /// libraries. + #[error( + "Your library is not {0} compliant because it links the following forbidden libraries: {1:?}" + )] + LinksForbiddenLibrariesError(Policy, Vec), + /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending + /// libraries. + #[error( + "Your library is not {0} compliant because of the presence of too-recent versioned symbols: {1:?}. Consider building in a manylinux docker container" + )] + VersionedSymbolTooNewError(Policy, Vec), + /// The elf file isn't manylinux/musllinux compatible. Contains the list of offending + /// libraries with blacked-list symbols. + #[error("Your library is not {0} compliant because it depends on black-listed symbols: {1:?}")] + BlackListedSymbolsError(Policy, Vec), + /// The elf file isn't manylinux/musllinux compatible. Contains unsupported architecture + #[error("Your library is not {0} compliant because it has unsupported architecture: {1}")] + UnsupportedArchitecture(Policy, String), + /// This platform tag isn't defined by auditwheel yet + #[error( + "{0} compatibility policy is not defined by auditwheel yet, pass `--auditwheel=skip` to proceed anyway" + )] + UndefinedPolicy(PlatformTag), + /// Failed to analyze external shared library dependencies of the wheel + #[error("Failed to analyze external shared library dependencies of the wheel")] + DependencyAnalysisError(#[source] lddtree::Error), +} + +#[derive(Clone, Debug)] +struct VersionedLibrary { + /// library name + name: String, + /// versions needed + versions: HashSet, +} + +impl VersionedLibrary { + /// Parse version strings (e.g. "GLIBC_2.17") into a map of name -> set of versions. + /// e.g. {"GLIBC" -> {"2.17", "2.5"}, "GCC" -> {"3.0"}} + /// + fn parsed_versions(&self) -> HashMap> { + let mut result: HashMap> = HashMap::new(); + for v in &self.versions { + if let Some((name, version)) = v.split_once('_') { + result + .entry(name.to_string()) + .or_default() + .insert(version.to_string()); + } + } + result + } +} + +/// Find required dynamic linked libraries with version information +fn find_versioned_libraries(elf: &Elf) -> Vec { + let mut symbols = Vec::new(); + if let Some(verneed) = &elf.verneed { + for need_file in verneed.iter() { + if let Some(name) = elf.dynstrtab.get_at(need_file.vn_file) { + // Skip dynamic linker/loader + if is_dynamic_linker(name) { + continue; + } + let mut versions = HashSet::new(); + for need_ver in need_file.iter() { + if let Some(aux_name) = elf.dynstrtab.get_at(need_ver.vna_name) { + versions.insert(aux_name.to_string()); + } + } + symbols.push(VersionedLibrary { + name: name.to_string(), + versions, + }); + } + } + } + symbols +} + +/// Find incompliant symbols from symbol versions +#[allow(clippy::result_large_err)] +fn find_incompliant_symbols( + elf: &Elf, + symbol_versions: &[String], +) -> Result, AuditWheelError> { + let mut symbols = Vec::new(); + let strtab = &elf.strtab; + for sym in &elf.syms { + if sym.st_type() == STT_FUNC { + let name = strtab.get_at(sym.st_name).unwrap_or("BAD NAME"); + for symbol_version in symbol_versions { + if name.ends_with(&format!("@{symbol_version}")) { + symbols.push(name.to_string()); + } + } + } + } + Ok(symbols) +} + +#[allow(clippy::result_large_err)] +fn policy_is_satisfied( + policy: &Policy, + elf: &Elf, + arch: &str, + deps: &[String], + versioned_libraries: &[VersionedLibrary], + allow_linking_libpython: bool, +) -> Result<(), AuditWheelError> { + let arch_versions = &policy.symbol_versions.get(arch).ok_or_else(|| { + AuditWheelError::UnsupportedArchitecture(policy.clone(), arch.to_string()) + })?; + let mut offending_libs = HashSet::new(); + let mut offending_versioned_syms = HashSet::new(); + let mut offending_blacklist_syms = HashMap::new(); + let undef_symbols: HashSet = elf + .dynsyms + .iter() + .filter_map(|sym| { + // Do not consider weak symbols as undefined, they are optional at runtime. + if sym.st_shndx == goblin::elf::section_header::SHN_UNDEF as usize + && sym.st_bind() != STB_WEAK + { + elf.dynstrtab.get_at(sym.st_name).map(ToString::to_string) + } else { + None + } + }) + .collect(); + + for dep in deps { + if is_dynamic_linker(dep) { + continue; + } + if !policy.lib_whitelist.contains(dep) { + if allow_linking_libpython && IS_LIBPYTHON.is_match(dep) { + continue; + } + offending_libs.insert(dep.clone()); + } + if let Some(sym_list) = policy.blacklist.get(dep) { + let mut intersection: Vec<_> = sym_list.intersection(&undef_symbols).cloned().collect(); + if !intersection.is_empty() { + intersection.sort(); + offending_blacklist_syms.insert(dep, intersection); + } + } + } + for library in versioned_libraries { + if !policy.lib_whitelist.contains(&library.name) { + offending_libs.insert(library.name.clone()); + continue; + } + for (name, versions_needed) in library.parsed_versions() { + let Some(versions_allowed) = arch_versions.get(&name) else { + offending_versioned_syms.insert(format!( + "{} offending versions: unknown symbol namespace {name}", + library.name, + )); + continue; + }; + if !versions_needed.is_subset(versions_allowed) { + let offending_versions: Vec<&str> = versions_needed + .difference(versions_allowed) + .map(|v| v.as_ref()) + .collect(); + let offending_symbol_versions: Vec = offending_versions + .iter() + .map(|v| format!("{name}_{v}")) + .collect(); + let offending_symbols = find_incompliant_symbols(elf, &offending_symbol_versions)?; + let offender = if offending_symbols.is_empty() { + format!( + "{} offending versions: {}", + library.name, + offending_symbol_versions.join(", ") + ) + } else { + format!( + "{} offending symbols: {}", + library.name, + offending_symbols.join(", ") + ) + }; + offending_versioned_syms.insert(offender); + } + } + } + // Check for black-listed symbols + if !offending_blacklist_syms.is_empty() { + let offenders = offending_blacklist_syms + .into_iter() + .map(|(lib, syms)| format!("{}: {}", lib, syms.join(", "))) + .collect(); + return Err(AuditWheelError::BlackListedSymbolsError( + policy.clone(), + offenders, + )); + } + // Check for too-recent versioned symbols + if !offending_versioned_syms.is_empty() { + return Err(AuditWheelError::VersionedSymbolTooNewError( + policy.clone(), + offending_versioned_syms.into_iter().collect(), + )); + } + // Check for libpython and forbidden libraries + let offenders: Vec = offending_libs.into_iter().collect(); + match offenders.as_slice() { + [] => Ok(()), + [lib] if IS_LIBPYTHON.is_match(lib) => { + Err(AuditWheelError::LinksLibPythonError(lib.clone())) + } + offenders => Err(AuditWheelError::LinksForbiddenLibrariesError( + policy.clone(), + offenders.to_vec(), + )), + } +} + +fn get_default_platform_policies() -> Vec { + if let Ok(Some(musl_libc)) = find_musl_libc() + && let Ok(Some((major, minor))) = get_musl_version(musl_libc) + { + return MUSLLINUX_POLICIES + .iter() + .filter(|policy| { + policy.name == "linux" || policy.name == format!("musllinux_{major}_{minor}") + }) + .cloned() + .collect(); + } + MANYLINUX_POLICIES.clone() +} + +/// An reimplementation of auditwheel, which checks elf files for +/// manylinux/musllinux compliance. +/// +/// If `platform_tag`, is None, it returns the the highest matching manylinux/musllinux policy +/// and whether we need to repair with patchelf,, or `linux` if nothing else matches. +/// It will error for bogus cases, e.g. if libpython is linked. +/// +/// If a specific manylinux/musllinux version is given, compliance is checked and a warning printed if +/// a higher version would be possible. +/// +/// Does nothing for `platform_tag` set to `Off`/`Linux` or non-linux platforms. +#[allow(clippy::result_large_err)] +fn auditwheel_rs( + artifact: &BuildArtifact, + target: &Target, + platform_tag: Option, + allow_linking_libpython: bool, +) -> Result<(Policy, bool), AuditWheelError> { + if !target.is_linux() || platform_tag == Some(PlatformTag::Linux) { + return Ok((Policy::default(), false)); + } + let path = &artifact.path; + let arch = target.target_arch().to_string(); + let mut file = File::open(path).map_err(AuditWheelError::IoError)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer) + .map_err(AuditWheelError::IoError)?; + let elf = Elf::parse(&buffer).map_err(AuditWheelError::GoblinError)?; + // This returns essentially the same as ldd + let deps: Vec = elf.libraries.iter().map(ToString::to_string).collect(); + let versioned_libraries = find_versioned_libraries(&elf); + + // Find the highest possible policy, if any + let platform_policies = match platform_tag { + Some(PlatformTag::Manylinux { .. }) => MANYLINUX_POLICIES.clone(), + Some(PlatformTag::Musllinux { major, minor }) => MUSLLINUX_POLICIES + .clone() + .into_iter() + .filter(|policy| { + policy.name == "linux" || policy.name == format!("musllinux_{major}_{minor}") + }) + .map(|mut policy| { + policy.fixup_musl_libc_so_name(target.target_arch()); + policy + }) + .collect(), + None | Some(PlatformTag::Pypi) => { + // Using the default for the `pypi` tag means we're correctly using manylinux where + // possible. + let mut policies = get_default_platform_policies(); + for policy in &mut policies { + policy.fixup_musl_libc_so_name(target.target_arch()); + } + policies + } + Some(PlatformTag::Linux) => unreachable!(), + }; + let mut highest_policy = None; + let mut should_repair = false; + for policy in platform_policies.iter() { + let result = policy_is_satisfied( + policy, + &elf, + &arch, + &deps, + &versioned_libraries, + allow_linking_libpython, + ); + match result { + Ok(_) => { + highest_policy = Some(policy.clone()); + should_repair = false; + break; + } + Err(AuditWheelError::LinksForbiddenLibrariesError(..)) => { + highest_policy = Some(policy.clone()); + should_repair = true; + break; + } + Err(AuditWheelError::VersionedSymbolTooNewError(..)) + | Err(AuditWheelError::BlackListedSymbolsError(..)) + // UnsupportedArchitecture happens when trying 2010 with aarch64 + | Err(AuditWheelError::UnsupportedArchitecture(..)) => continue, + // If there was an error parsing the symbols or libpython was linked, + // we error no matter what the requested policy was + Err(err) => return Err(err), + } + } + + let policy = if let Some(platform_tag) = platform_tag { + let mut policy = Policy::from_tag(&platform_tag) + .ok_or(AuditWheelError::UndefinedPolicy(platform_tag))?; + policy.fixup_musl_libc_so_name(target.target_arch()); + + if let Some(highest_policy) = highest_policy { + // Don't recommend manylinux1 because rust doesn't support it anymore + if policy.priority < highest_policy.priority && highest_policy.name != "manylinux_2_5" { + eprintln!( + "📦 Wheel is eligible for a higher priority tag. \ + You requested {policy} but this wheel is eligible for {highest_policy}", + ); + } + } + + match policy_is_satisfied( + &policy, + &elf, + &arch, + &deps, + &versioned_libraries, + allow_linking_libpython, + ) { + Ok(_) => { + should_repair = false; + Ok(policy) + } + Err(AuditWheelError::LinksForbiddenLibrariesError(..)) => { + should_repair = true; + Ok(policy) + } + Err(err) => Err(err), + } + } else if let Some(policy) = highest_policy { + Ok(policy) + } else if target.target_arch() == Arch::Armv6L || target.target_arch() == Arch::Armv7L { + // Old arm versions + // https://github.com/pypi/warehouse/blob/556e1e3390999381c382873b003a779a1363cb4d/warehouse/forklift/legacy.py#L122-L123 + Ok(Policy::default()) + } else { + eprintln!( + "⚠️ Warning: No compatible platform tag found, using the linux tag instead. \ + You won't be able to upload those wheels to PyPI." + ); + + // Fallback to linux + Ok(Policy::default()) + }?; + Ok((policy, should_repair)) +} + +// --------------------------------------------------------------------------- +// ElfRepairer – WheelRepairer implementation +// --------------------------------------------------------------------------- + /// Linux/ELF wheel repairer (auditwheel equivalent). /// /// Bundles external `.so` files and rewrites ELF metadata (SONAME, DT_NEEDED, @@ -121,6 +535,10 @@ impl WheelRepairer for ElfRepairer { } } +// --------------------------------------------------------------------------- +// Dependency discovery & policy adjustment +// --------------------------------------------------------------------------- + /// Find external shared library dependencies (Linux/ELF specific). /// /// Uses lddtree to resolve dependencies, then filters out the dynamic linker, @@ -277,6 +695,8 @@ fn check_external_libs_policy( for p in &mut platform_policies { p.fixup_musl_libc_so_name(target.target_arch()); } + // Policies must be sorted from highest to lowest priority so we find the + // best (most compatible) match first when iterating. debug_assert!( platform_policies .windows(2) @@ -301,6 +721,7 @@ fn check_external_libs_policy( continue; } + // Find the highest policy that this external library satisfies let unsatisfied = unsatisfied_symbol_versions(&result, &arch, &versioned_libraries); if unsatisfied.is_empty() { continue; @@ -333,6 +754,7 @@ fn extract_rustflags_library_paths(manifest_path: &Path, target: &Target) -> Opt let config = cargo_config2::Config::load_with_cwd(manifest_dir).ok()?; let rustflags = config.rustflags(target.target_triple()).ok()??; + // Encode the rustflags for parsing with the rustflags crate let encoded = rustflags.encode().ok()?; let mut library_paths = Vec::new(); @@ -355,13 +777,16 @@ mod tests { #[test] fn test_extract_rustflags_library_paths() { + // Create a temporary directory with a Cargo.toml and .cargo/config.toml let temp_dir = tempfile::tempdir().unwrap(); let manifest_path = temp_dir.path().join("Cargo.toml"); let cargo_dir = temp_dir.path().join(".cargo"); let config_path = cargo_dir.join("config.toml"); + // Create the directories fs_err::create_dir_all(&cargo_dir).unwrap(); + // Create a minimal Cargo.toml fs_err::write( &manifest_path, r#" @@ -373,6 +798,7 @@ edition = "2021" ) .unwrap(); + // Create a config.toml with rustflags containing -L options fs_err::write( &config_path, r#" @@ -382,6 +808,7 @@ rustflags = ["-L", "dependency=/usr/local/lib", "-L", "/some/other/path", "-C", ) .unwrap(); + // Test the function let target = Target::from_target_triple(None).unwrap(); let paths = super::extract_rustflags_library_paths(&manifest_path, &target); @@ -398,15 +825,19 @@ rustflags = ["-L", "dependency=/usr/local/lib", "-L", "/some/other/path", "-C", .any(|p| p.to_string_lossy() == "/some/other/path") ); } else { + // It's possible that rustflags parsing fails in some environments, + // so we just verify the function doesn't panic println!("No rustflags library paths found, which is acceptable"); } } #[test] fn test_extract_rustflags_library_paths_no_config() { + // Test with a directory that has no cargo config let temp_dir = tempfile::tempdir().unwrap(); let manifest_path = temp_dir.path().join("Cargo.toml"); + // Create a minimal Cargo.toml fs_err::write( &manifest_path, r#" @@ -421,6 +852,7 @@ edition = "2021" let target = Target::from_target_triple(None).unwrap(); let paths = super::extract_rustflags_library_paths(&manifest_path, &target); + // Should return None when there's no cargo config with rustflags assert!(paths.is_none()); } } From e9e1f2bf25fae70a581b05c3ff4b6761f3d8ca2e Mon Sep 17 00:00:00 2001 From: messense Date: Wed, 1 Apr 2026 07:32:22 +0800 Subject: [PATCH 5/7] fix: filter replace_needed per artifact by its actual DT_NEEDED The refactored patch() was applying a single global replacement list to every artifact, which could cause unnecessary patchelf work and may fail if patchelf --replace-needed errors on entries absent from a binary's DT_NEEDED. Restore the original per-artifact filtering by reading each artifact's DT_NEEDED via goblin before calling patchelf, matching the previous behavior where replacements were intersected with each artifact's own dependency set. --- src/auditwheel/linux.rs | 24 ++++++++++++++---------- src/auditwheel/repair.rs | 4 ++++ src/build_context/repair.rs | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/auditwheel/linux.rs b/src/auditwheel/linux.rs index 85c067409..be0e15a39 100644 --- a/src/auditwheel/linux.rs +++ b/src/auditwheel/linux.rs @@ -432,10 +432,6 @@ fn auditwheel_rs( Ok((policy, should_repair)) } -// --------------------------------------------------------------------------- -// ElfRepairer – WheelRepairer implementation -// --------------------------------------------------------------------------- - /// Linux/ELF wheel repairer (auditwheel equivalent). /// /// Bundles external `.so` files and rewrites ELF metadata (SONAME, DT_NEEDED, @@ -475,6 +471,7 @@ impl WheelRepairer for ElfRepairer { fn patch( &self, artifacts: &[&BuildArtifact], + ext_libs: &[Vec], grafted: &[GraftedLib], libs_dir: &Path, artifact_dir: &Path, @@ -499,8 +496,19 @@ impl WheelRepairer for ElfRepairer { } // Rewrite DT_NEEDED in each artifact to reference new sonames. - let replacements: Vec<_> = name_map.iter().map(|(k, v)| (*k, v.to_string())).collect(); - for artifact in artifacts { + // Only replace entries that the artifact actually depends on to avoid + // unnecessary patchelf invocations and errors when an old name is + // absent from a given binary. + for (artifact, artifact_ext_libs) in artifacts.iter().zip(ext_libs) { + let artifact_deps: HashSet<&str> = artifact_ext_libs + .iter() + .map(|lib| lib.name.as_str()) + .collect(); + let replacements: Vec<_> = name_map + .iter() + .filter(|(old, _)| artifact_deps.contains(**old)) + .map(|(k, v)| (*k, v.to_string())) + .collect(); if !replacements.is_empty() { patchelf::replace_needed(&artifact.path, &replacements)?; } @@ -535,10 +543,6 @@ impl WheelRepairer for ElfRepairer { } } -// --------------------------------------------------------------------------- -// Dependency discovery & policy adjustment -// --------------------------------------------------------------------------- - /// Find external shared library dependencies (Linux/ELF specific). /// /// Uses lddtree to resolve dependencies, then filters out the dynamic linker, diff --git a/src/auditwheel/repair.rs b/src/auditwheel/repair.rs index b47c7cb4e..6845204d1 100644 --- a/src/auditwheel/repair.rs +++ b/src/auditwheel/repair.rs @@ -63,9 +63,13 @@ pub trait WheelRepairer { /// 2. Set appropriate metadata on grafted libraries (soname, install ID, etc.) /// 3. Update cross-references between grafted libraries /// 4. Perform any final steps (e.g., code signing on macOS) + /// + /// `ext_libs` is parallel to `artifacts`: `ext_libs[i]` lists the external + /// libraries that `artifacts[i]` depends on. fn patch( &self, artifacts: &[&BuildArtifact], + ext_libs: &[Vec], grafted: &[GraftedLib], libs_dir: &Path, artifact_dir: &Path, diff --git a/src/build_context/repair.rs b/src/build_context/repair.rs index 8d8dcb8d8..84ca878d1 100644 --- a/src/build_context/repair.rs +++ b/src/build_context/repair.rs @@ -192,7 +192,7 @@ impl BuildContext { let artifact_dir = self.get_artifact_dir(); let artifact_refs: Vec<&BuildArtifact> = artifacts.iter().map(|a| a.borrow()).collect(); - repairer.patch(&artifact_refs, &grafted, &libs_dir, &artifact_dir)?; + repairer.patch(&artifact_refs, ext_libs, &grafted, &libs_dir, &artifact_dir)?; // Add grafted libraries to the wheel for lib in &grafted { From e888693ac93ede5b3798357baf4a0811e8647eb9 Mon Sep 17 00:00:00 2001 From: messense Date: Wed, 1 Apr 2026 07:41:13 +0800 Subject: [PATCH 6/7] refactor: introduce AuditedArtifact to bundle artifact with its external libs Replace the parallel-slices pattern (artifacts + ext_libs zipped at every call site) with AuditedArtifact, a struct that keeps a BuildArtifact together with its Vec external dependencies. This makes it impossible for the two to get out of sync, removes the need for parallel slice indexing, and allows patch() to filter replace_needed per artifact using the already-known dependency list rather than re-parsing ELF files. AuditedArtifact implements Borrow so it can be passed directly to generate_binding() and other functions that only need the artifact. --- src/auditwheel/linux.rs | 18 ++++----- src/auditwheel/mod.rs | 2 +- src/auditwheel/repair.rs | 30 +++++++++++---- src/build_context/repair.rs | 53 +++++++++++---------------- src/build_orchestrator.rs | 73 ++++++++++++++++--------------------- 5 files changed, 85 insertions(+), 91 deletions(-) diff --git a/src/auditwheel/linux.rs b/src/auditwheel/linux.rs index be0e15a39..7f40fa21c 100644 --- a/src/auditwheel/linux.rs +++ b/src/auditwheel/linux.rs @@ -10,7 +10,7 @@ use super::audit::{get_sysroot_path, relpath}; use super::musllinux::{find_musl_libc, get_musl_version}; use super::policy::{MANYLINUX_POLICIES, MUSLLINUX_POLICIES, Policy}; -use super::repair::{GraftedLib, WheelRepairer}; +use super::repair::{AuditedArtifact, GraftedLib, WheelRepairer}; use super::{PlatformTag, patchelf}; use crate::compile::BuildArtifact; use crate::target::{Arch, Target}; @@ -470,8 +470,7 @@ impl WheelRepairer for ElfRepairer { fn patch( &self, - artifacts: &[&BuildArtifact], - ext_libs: &[Vec], + audited: &[AuditedArtifact], grafted: &[GraftedLib], libs_dir: &Path, artifact_dir: &Path, @@ -499,8 +498,9 @@ impl WheelRepairer for ElfRepairer { // Only replace entries that the artifact actually depends on to avoid // unnecessary patchelf invocations and errors when an old name is // absent from a given binary. - for (artifact, artifact_ext_libs) in artifacts.iter().zip(ext_libs) { - let artifact_deps: HashSet<&str> = artifact_ext_libs + for aa in audited { + let artifact_deps: HashSet<&str> = aa + .external_libs .iter() .map(|lib| lib.name.as_str()) .collect(); @@ -510,7 +510,7 @@ impl WheelRepairer for ElfRepairer { .map(|(k, v)| (*k, v.to_string())) .collect(); if !replacements.is_empty() { - patchelf::replace_needed(&artifact.path, &replacements)?; + patchelf::replace_needed(&aa.artifact.path, &replacements)?; } } @@ -531,12 +531,12 @@ impl WheelRepairer for ElfRepairer { } // Set RPATH on artifacts to find the libs directory - for artifact in artifacts { - let mut new_rpaths = patchelf::get_rpath(&artifact.path)?; + for aa in audited { + let mut new_rpaths = patchelf::get_rpath(&aa.artifact.path)?; let new_rpath = Path::new("$ORIGIN").join(relpath(libs_dir, artifact_dir)); new_rpaths.push(new_rpath.to_str().unwrap().to_string()); let new_rpath = new_rpaths.join(":"); - patchelf::set_rpath(&artifact.path, &new_rpath)?; + patchelf::set_rpath(&aa.artifact.path, &new_rpath)?; } Ok(()) diff --git a/src/auditwheel/mod.rs b/src/auditwheel/mod.rs index bda1d49a0..43776c474 100644 --- a/src/auditwheel/mod.rs +++ b/src/auditwheel/mod.rs @@ -14,4 +14,4 @@ pub use audit::*; pub use linux::ElfRepairer; pub use platform_tag::PlatformTag; pub use policy::Policy; -pub use repair::{WheelRepairer, log_grafted_libs, prepare_grafted_libs}; +pub use repair::{AuditedArtifact, WheelRepairer, log_grafted_libs, prepare_grafted_libs}; diff --git a/src/auditwheel/repair.rs b/src/auditwheel/repair.rs index 6845204d1..0895fa3a5 100644 --- a/src/auditwheel/repair.rs +++ b/src/auditwheel/repair.rs @@ -10,11 +10,31 @@ use crate::compile::BuildArtifact; use crate::util::hash_file; use anyhow::{Context, Result}; +use std::borrow::Borrow; use std::collections::HashSet; use std::path::{Path, PathBuf}; use fs_err as fs; +/// A build artifact bundled with the external shared libraries it depends on. +/// +/// Keeps the artifact and its per-artifact dependency list together so they +/// cannot accidentally get out of sync when passed through the wheel-writing +/// pipeline. +pub struct AuditedArtifact { + /// The build artifact. + pub artifact: BuildArtifact, + /// External shared libraries this artifact depends on that must be + /// bundled into the wheel. + pub external_libs: Vec, +} + +impl Borrow for AuditedArtifact { + fn borrow(&self) -> &BuildArtifact { + &self.artifact + } +} + /// A library prepared for grafting into a wheel. /// /// Created by [`prepare_grafted_libs`] with a hash-suffixed filename and a @@ -63,13 +83,9 @@ pub trait WheelRepairer { /// 2. Set appropriate metadata on grafted libraries (soname, install ID, etc.) /// 3. Update cross-references between grafted libraries /// 4. Perform any final steps (e.g., code signing on macOS) - /// - /// `ext_libs` is parallel to `artifacts`: `ext_libs[i]` lists the external - /// libraries that `artifacts[i]` depends on. fn patch( &self, - artifacts: &[&BuildArtifact], - ext_libs: &[Vec], + audited: &[AuditedArtifact], grafted: &[GraftedLib], libs_dir: &Path, artifact_dir: &Path, @@ -97,7 +113,7 @@ pub trait WheelRepairer { /// file is referenced via multiple install names (common on macOS), only one /// copy is made, but all original names are recorded as aliases. pub fn prepare_grafted_libs( - ext_libs: &[Vec], + audited: &[AuditedArtifact], temp_dir: &Path, ) -> Result<(Vec, HashSet)> { let mut grafted = Vec::new(); @@ -105,7 +121,7 @@ pub fn prepare_grafted_libs( let mut realpath_to_idx: std::collections::HashMap = std::collections::HashMap::new(); - for lib in ext_libs.iter().flatten() { + for lib in audited.iter().flat_map(|a| &a.external_libs) { let source_path = lib.realpath.clone().with_context(|| { format!( "Cannot repair wheel, because required library {} could not be located.", diff --git a/src/build_context/repair.rs b/src/build_context/repair.rs index 84ca878d1..686ff6147 100644 --- a/src/build_context/repair.rs +++ b/src/build_context/repair.rs @@ -1,8 +1,8 @@ #[cfg(feature = "sbom")] use crate::auditwheel::get_sysroot_path; use crate::auditwheel::{ - AuditWheelMode, ElfRepairer, PlatformTag, Policy, WheelRepairer, log_grafted_libs, patchelf, - prepare_grafted_libs, + AuditWheelMode, AuditedArtifact, ElfRepairer, PlatformTag, Policy, WheelRepairer, + log_grafted_libs, patchelf, prepare_grafted_libs, }; #[cfg(feature = "sbom")] use crate::module_writer::ModuleWriter; @@ -12,7 +12,6 @@ use anyhow::{Context, Result, bail}; use fs_err as fs; use lddtree::Library; use normpath::PathExt; -use std::borrow::Borrow; use std::path::{Path, PathBuf}; use super::BuildContext; @@ -105,28 +104,24 @@ impl BuildContext { } /// Add library search paths in Cargo target directory rpath when building in editable mode - fn add_rpath(&self, artifacts: &[A]) -> Result<()> - where - A: Borrow, - { - if self.project.editable && self.project.target.is_linux() && !artifacts.is_empty() { - for artifact in artifacts { - let artifact = artifact.borrow(); - if artifact.linked_paths.is_empty() { + fn add_rpath(&self, audited: &[AuditedArtifact]) -> Result<()> { + if self.project.editable && self.project.target.is_linux() && !audited.is_empty() { + for aa in audited { + if aa.artifact.linked_paths.is_empty() { continue; } - let old_rpaths = patchelf::get_rpath(&artifact.path)?; + let old_rpaths = patchelf::get_rpath(&aa.artifact.path)?; let mut new_rpaths = old_rpaths.clone(); - for path in &artifact.linked_paths { + for path in &aa.artifact.linked_paths { if !old_rpaths.contains(path) { new_rpaths.push(path.to_string()); } } let new_rpath = new_rpaths.join(":"); - if let Err(err) = patchelf::set_rpath(&artifact.path, &new_rpath) { + if let Err(err) = patchelf::set_rpath(&aa.artifact.path, &new_rpath) { eprintln!( "⚠️ Warning: Failed to set rpath for {}: {}", - artifact.path.display(), + aa.artifact.path.display(), err ); } @@ -135,32 +130,27 @@ impl BuildContext { Ok(()) } - pub(crate) fn add_external_libs( + pub(crate) fn add_external_libs( &self, writer: &mut VirtualWriter, - artifacts: &[A], - ext_libs: &[Vec], - ) -> Result<()> - where - A: Borrow, - { + audited: &[AuditedArtifact], + ) -> Result<()> { if self.project.editable { - return self.add_rpath(artifacts); + return self.add_rpath(audited); } - if ext_libs.iter().all(|libs| libs.is_empty()) { + if audited.iter().all(|a| a.external_libs.is_empty()) { return Ok(()); } // Log which libraries need to be copied and which artifacts require them // before calling patchelf, so users can see this even if patchelf is missing. eprintln!("🔗 External shared libraries to be copied into the wheel:"); - for (artifact, artifact_ext_libs) in artifacts.iter().zip(ext_libs) { - let artifact = artifact.borrow(); - if artifact_ext_libs.is_empty() { + for aa in audited { + if aa.external_libs.is_empty() { continue; } - eprintln!(" {} requires:", artifact.path.display()); - for lib in artifact_ext_libs { + eprintln!(" {} requires:", aa.artifact.path.display()); + for lib in &aa.external_libs { if let Some(path) = lib.realpath.as_ref() { eprintln!(" {} => {}", lib.name, path.display()); } else { @@ -188,11 +178,10 @@ impl BuildContext { let libs_dir = repairer.libs_dir(&dist_name); let temp_dir = writer.temp_dir()?; - let (grafted, libs_copied) = prepare_grafted_libs(ext_libs, temp_dir.path())?; + let (grafted, libs_copied) = prepare_grafted_libs(audited, temp_dir.path())?; let artifact_dir = self.get_artifact_dir(); - let artifact_refs: Vec<&BuildArtifact> = artifacts.iter().map(|a| a.borrow()).collect(); - repairer.patch(&artifact_refs, ext_libs, &grafted, &libs_dir, &artifact_dir)?; + repairer.patch(audited, &grafted, &libs_dir, &artifact_dir)?; // Add grafted libraries to the wheel for lib in &grafted { diff --git a/src/build_orchestrator.rs b/src/build_orchestrator.rs index 8d76ec2df..0682468e6 100644 --- a/src/build_orchestrator.rs +++ b/src/build_orchestrator.rs @@ -1,4 +1,4 @@ -use crate::auditwheel::{PlatformTag, Policy}; +use crate::auditwheel::{AuditedArtifact, PlatformTag, Policy}; use crate::binding_generator::{ BinBindingGenerator, BindingGenerator, CffiBindingGenerator, Pyo3BindingGenerator, UniFfiBindingGenerator, generate_binding, @@ -19,7 +19,6 @@ use cargo_metadata::CrateType; use fs_err as fs; use ignore::overrides::{Override, OverrideBuilder}; use itertools::Itertools; -use lddtree::Library; use normpath::PathExt; use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; @@ -468,8 +467,7 @@ impl<'a> BuildOrchestrator<'a> { fn write_wheel<'b, F>( &'b self, tag: &str, - artifacts: &[&BuildArtifact], - ext_libs: &[Vec], + audited: &[AuditedArtifact], make_generator: F, sbom_data: &Option, out_dirs: &HashMap, @@ -490,8 +488,7 @@ impl<'a> BuildOrchestrator<'a> { file_options, )?; let mut writer = VirtualWriter::new(writer, self.excludes(Format::Wheel)?); - self.context - .add_external_libs(&mut writer, artifacts, ext_libs)?; + self.context.add_external_libs(&mut writer, audited)?; let temp_dir = writer.temp_dir()?; let mut generator = make_generator(temp_dir)?; @@ -499,7 +496,7 @@ impl<'a> BuildOrchestrator<'a> { &mut writer, generator.as_mut(), self.context, - artifacts, + audited, out_dirs, ) .context("Failed to add the files to the wheel")?; @@ -554,10 +551,13 @@ impl<'a> BuildOrchestrator<'a> { let abi_tag = stable_abi_kind.wheel_tag(); let tag = format!("cp{major}{min_minor}-{abi_tag}-{platform}"); + let audited = [AuditedArtifact { + artifact, + external_libs, + }]; let wheel_path = self.write_wheel( &tag, - &[&artifact], - &[external_libs], + &audited, |temp_dir| { Ok(Box::new( Pyo3BindingGenerator::new(Some(stable_abi_kind), python_interpreter, temp_dir) @@ -583,9 +583,8 @@ impl<'a> BuildOrchestrator<'a> { fn write_pyo3_wheel( &self, python_interpreter: &PythonInterpreter, - artifact: BuildArtifact, + audited: &[AuditedArtifact], platform_tags: &[PlatformTag], - ext_libs: Vec, sbom_data: &Option, out_dirs: &HashMap, ) -> Result { @@ -593,8 +592,7 @@ impl<'a> BuildOrchestrator<'a> { self.write_wheel( &tag, - &[&artifact], - &[ext_libs], + audited, |temp_dir| { Ok(Box::new( Pyo3BindingGenerator::new(None, Some(python_interpreter), temp_dir) @@ -623,11 +621,14 @@ impl<'a> BuildOrchestrator<'a> { Some(python_interpreter), )?; let platform_tags = self.resolve_platform_tags(&policy); + let audited = [AuditedArtifact { + artifact, + external_libs, + }]; let wheel_path = self.write_pyo3_wheel( python_interpreter, - artifact, + &audited, &platform_tags, - external_libs, sbom_data, &out_dirs, )?; @@ -705,14 +706,11 @@ impl<'a> BuildOrchestrator<'a> { .auditwheel(&artifact, &self.context.python.platform_tag, None)?; let platform_tags = self.resolve_platform_tags(&policy); let tag = self.get_universal_tag(&platform_tags)?; - let wheel_path = self.write_wheel( - &tag, - &[&artifact], - &[external_libs], - make_generator, - sbom_data, - &out_dirs, - )?; + let audited = [AuditedArtifact { + artifact, + external_libs, + }]; + let wheel_path = self.write_wheel(&tag, &audited, make_generator, sbom_data, &out_dirs)?; Ok((wheel_path, out_dirs)) } @@ -766,9 +764,8 @@ impl<'a> BuildOrchestrator<'a> { fn write_bin_wheel( &self, python_interpreter: Option<&PythonInterpreter>, - artifacts: &[BuildArtifact], + audited: &[AuditedArtifact], platform_tags: &[PlatformTag], - ext_libs: &[Vec], sbom_data: &Option, out_dirs: &HashMap, ) -> Result { @@ -805,19 +802,11 @@ impl<'a> BuildOrchestrator<'a> { let writer = WheelWriter::new(&tag, &self.context.artifact.out, &metadata24, file_options)?; let mut writer = VirtualWriter::new(writer, self.excludes(Format::Wheel)?); - let artifact_refs: Vec<&BuildArtifact> = artifacts.iter().collect(); - self.context - .add_external_libs(&mut writer, &artifact_refs, ext_libs)?; + self.context.add_external_libs(&mut writer, audited)?; let mut generator = BinBindingGenerator::new(&mut metadata24); - generate_binding( - &mut writer, - &mut generator, - self.context, - artifacts, - out_dirs, - ) - .context("Failed to add the files to the wheel")?; + generate_binding(&mut writer, &mut generator, self.context, audited, out_dirs) + .context("Failed to add the files to the wheel")?; self.add_pth(&mut writer)?; add_data( @@ -860,8 +849,7 @@ impl<'a> BuildOrchestrator<'a> { } let mut policies = Vec::with_capacity(result.artifacts.len()); - let mut ext_libs = Vec::new(); - let mut artifact_paths = Vec::with_capacity(result.artifacts.len()); + let mut audited_artifacts = Vec::new(); for artifact in result.artifacts { let mut artifact = artifact .get(&CrateType::Bin) @@ -872,19 +860,20 @@ impl<'a> BuildOrchestrator<'a> { self.context .auditwheel(&artifact, &self.context.python.platform_tag, None)?; policies.push(policy); - ext_libs.push(external_libs); self.context.stage_artifact(&mut artifact)?; - artifact_paths.push(artifact); + audited_artifacts.push(AuditedArtifact { + artifact, + external_libs, + }); } let policy = policies.iter().min_by_key(|p| p.priority).unwrap(); let platform_tags = self.resolve_platform_tags(policy); let wheel_path = self.write_bin_wheel( python_interpreter, - &artifact_paths, + &audited_artifacts, &platform_tags, - &ext_libs, sbom_data, &result.out_dirs, )?; From f5affd87e535c964d18ddcbdc9fe94c2cd98e2f8 Mon Sep 17 00:00:00 2001 From: messense Date: Wed, 1 Apr 2026 20:09:39 +0800 Subject: [PATCH 7/7] refactor: address review feedback for auditwheel refactor - Remove redundant libs_copied.insert in prepare_grafted_libs dedup branch - Make ElfRepairer::audit consume caller-provided ld_paths instead of ignoring them; get_policy_and_libs now accepts pre-built paths - Move editable-install patchelf logic from BuildContext::add_rpath into ElfRepairer::patch_editable via the WheelRepairer trait --- src/auditwheel/linux.rs | 42 +++++++++++++++++++++++++++++-------- src/auditwheel/repair.rs | 9 +++++++- src/build_context/repair.rs | 34 +++++------------------------- 3 files changed, 46 insertions(+), 39 deletions(-) diff --git a/src/auditwheel/linux.rs b/src/auditwheel/linux.rs index 7f40fa21c..7866c0254 100644 --- a/src/auditwheel/linux.rs +++ b/src/auditwheel/linux.rs @@ -457,13 +457,19 @@ impl WheelRepairer for ElfRepairer { fn audit( &self, artifact: &BuildArtifact, - _ld_paths: Vec, + mut ld_paths: Vec, ) -> Result<(Policy, Vec)> { + // Extend caller-provided paths with RUSTFLAGS library search paths + if let Some(rustflags_paths) = + extract_rustflags_library_paths(&self.manifest_path, &self.target) + { + ld_paths.extend(rustflags_paths); + } get_policy_and_libs( artifact, self.platform_tag, &self.target, - &self.manifest_path, + ld_paths, self.allow_linking_libpython, ) } @@ -541,6 +547,30 @@ impl WheelRepairer for ElfRepairer { Ok(()) } + + fn patch_editable(&self, audited: &[AuditedArtifact]) -> Result<()> { + for aa in audited { + if aa.artifact.linked_paths.is_empty() { + continue; + } + let old_rpaths = patchelf::get_rpath(&aa.artifact.path)?; + let mut new_rpaths = old_rpaths.clone(); + for path in &aa.artifact.linked_paths { + if !old_rpaths.contains(path) { + new_rpaths.push(path.to_string()); + } + } + let new_rpath = new_rpaths.join(":"); + if let Err(err) = patchelf::set_rpath(&aa.artifact.path, &new_rpath) { + eprintln!( + "⚠️ Warning: Failed to set rpath for {}: {}", + aa.artifact.path.display(), + err + ); + } + } + Ok(()) + } } /// Find external shared library dependencies (Linux/ELF specific). @@ -579,7 +609,7 @@ fn get_policy_and_libs( artifact: &BuildArtifact, platform_tag: Option, target: &Target, - manifest_path: &Path, + ld_paths: Vec, allow_linking_libpython: bool, ) -> Result<(Policy, Vec)> { let (policy, should_repair) = @@ -594,12 +624,6 @@ fn get_policy_and_libs( )?; let external_libs = if should_repair { let sysroot = get_sysroot_path(target).unwrap_or_else(|_| PathBuf::from("/")); - let mut ld_paths: Vec = artifact.linked_paths.iter().map(PathBuf::from).collect(); - - // Add library search paths from RUSTFLAGS - if let Some(rustflags_paths) = extract_rustflags_library_paths(manifest_path, target) { - ld_paths.extend(rustflags_paths); - } let external_libs = find_external_libs(&artifact.path, &policy, sysroot, ld_paths) .with_context(|| { diff --git a/src/auditwheel/repair.rs b/src/auditwheel/repair.rs index 0895fa3a5..9b8038ba2 100644 --- a/src/auditwheel/repair.rs +++ b/src/auditwheel/repair.rs @@ -91,6 +91,14 @@ pub trait WheelRepairer { artifact_dir: &Path, ) -> Result<()>; + /// Patch artifacts for editable installs (e.g., set RPATH to Cargo target dir). + /// + /// The default implementation is a no-op. Platform-specific repairers can + /// override this to add runtime library search paths for editable mode. + fn patch_editable(&self, _audited: &[AuditedArtifact]) -> Result<()> { + Ok(()) + } + /// Return the wheel-internal directory name for grafted libraries. /// /// macOS uses `.dylibs` (matching delocate convention), @@ -135,7 +143,6 @@ pub fn prepare_grafted_libs( if lib.name != existing.original_name && !existing.aliases.contains(&lib.name) { existing.aliases.push(lib.name.clone()); } - libs_copied.insert(source_path); continue; } diff --git a/src/build_context/repair.rs b/src/build_context/repair.rs index 686ff6147..0ba445ec6 100644 --- a/src/build_context/repair.rs +++ b/src/build_context/repair.rs @@ -2,7 +2,7 @@ use crate::auditwheel::get_sysroot_path; use crate::auditwheel::{ AuditWheelMode, AuditedArtifact, ElfRepairer, PlatformTag, Policy, WheelRepairer, - log_grafted_libs, patchelf, prepare_grafted_libs, + log_grafted_libs, prepare_grafted_libs, }; #[cfg(feature = "sbom")] use crate::module_writer::ModuleWriter; @@ -103,40 +103,16 @@ impl BuildContext { } } - /// Add library search paths in Cargo target directory rpath when building in editable mode - fn add_rpath(&self, audited: &[AuditedArtifact]) -> Result<()> { - if self.project.editable && self.project.target.is_linux() && !audited.is_empty() { - for aa in audited { - if aa.artifact.linked_paths.is_empty() { - continue; - } - let old_rpaths = patchelf::get_rpath(&aa.artifact.path)?; - let mut new_rpaths = old_rpaths.clone(); - for path in &aa.artifact.linked_paths { - if !old_rpaths.contains(path) { - new_rpaths.push(path.to_string()); - } - } - let new_rpath = new_rpaths.join(":"); - if let Err(err) = patchelf::set_rpath(&aa.artifact.path, &new_rpath) { - eprintln!( - "⚠️ Warning: Failed to set rpath for {}: {}", - aa.artifact.path.display(), - err - ); - } - } - } - Ok(()) - } - pub(crate) fn add_external_libs( &self, writer: &mut VirtualWriter, audited: &[AuditedArtifact], ) -> Result<()> { if self.project.editable { - return self.add_rpath(audited); + if let Some(repairer) = self.make_repairer(&self.python.platform_tag) { + return repairer.patch_editable(audited); + } + return Ok(()); } if audited.iter().all(|a| a.external_libs.is_empty()) { return Ok(());