Skip to content

Commit 93ab48d

Browse files
notriddleweihanglo
andcommitted
feat: support for rustdoc mergeable cross-crate info
This is an unstable feature that we designed to fix several performance problems with the old system: 1. You couldn't easily build crate docs in hermetic environments. This doesn't matter for Cargo, but it was one of the original reasons to implement the feature. 2. We have to build all the doc resources in their final form at every step, instead of delaying slow parts (mostly the search index) until the end and only doing them once. 3. It requires rustdoc to take a lock at the end. This reduces available concurrency for generating docs. A nightly feature `-Zrustdoc-mergeable-info` is added. Co-authored-by: Michael Howell <michael@notriddle.com> Co-authored-by: Weihang Lo <me@weihanglo.tw>
1 parent b4192c0 commit 93ab48d

File tree

10 files changed

+923
-12
lines changed

10 files changed

+923
-12
lines changed

crates/cargo-test-support/src/compare.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ static E2E_LITERAL_REDACTIONS: &[(&str, &str)] = &[
338338
("[BLOCKING]", " Blocking"),
339339
("[GENERATED]", " Generated"),
340340
("[OPENING]", " Opening"),
341+
("[MERGING]", " Merging"),
341342
];
342343

343344
/// Checks that the given string contains the given contiguous lines

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ pub enum FileFlavor {
7979
DebugInfo,
8080
/// SBOM (Software Bill of Materials pre-cursor) file (e.g. cargo-sbon.json).
8181
Sbom,
82+
/// Cross-crate info JSON files generated by rustdoc.
83+
DocParts,
8284
}
8385

8486
/// Type of each file generated by a Unit.

src/cargo/core/compiler/build_runner/compilation_files.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,14 +265,18 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
265265
/// Returns the host `deps` directory path.
266266
pub fn host_deps(&self, unit: &Unit) -> PathBuf {
267267
let dir = self.pkg_dir(unit);
268-
self.host.build_dir().deps(&dir)
268+
// `-Zrustdoc-mergeable-info` always uses new layout.
269+
let new_layout = unit.mode.is_doc() && self.ws.gctx().cli_unstable().rustdoc_mergeable_info;
270+
self.host.build_dir().deps(&dir, new_layout)
269271
}
270272

271273
/// Returns the directories where Rust crate dependencies are found for the
272274
/// specified unit.
273275
pub fn deps_dir(&self, unit: &Unit) -> PathBuf {
274276
let dir = self.pkg_dir(unit);
275-
self.layout(unit.kind).build_dir().deps(&dir)
277+
// `-Zrustdoc-mergeable-info` always uses new layout.
278+
let new_layout = unit.mode.is_doc() && self.ws.gctx().cli_unstable().rustdoc_mergeable_info;
279+
self.layout(unit.kind).build_dir().deps(&dir, new_layout)
276280
}
277281

278282
/// Directory where the fingerprint for the given unit should go.
@@ -495,12 +499,26 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
495499
.join("index.html")
496500
};
497501

498-
vec![OutputFile {
502+
let mut outputs = vec![OutputFile {
499503
path,
500504
hardlink: None,
501505
export_path: None,
502506
flavor: FileFlavor::Normal,
503-
}]
507+
}];
508+
509+
if bcx.gctx.cli_unstable().rustdoc_mergeable_info {
510+
outputs.push(OutputFile {
511+
path: self
512+
.deps_dir(unit)
513+
.join(unit.target.crate_name())
514+
.with_extension("json"),
515+
hardlink: None,
516+
export_path: None,
517+
flavor: FileFlavor::DocParts,
518+
})
519+
}
520+
521+
outputs
504522
}
505523
CompileMode::RunCustomBuild => {
506524
// At this time, this code path does not handle build script

src/cargo/core/compiler/build_runner/mod.rs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! [`BuildRunner`] is the mutable state used during the build process.
22
3-
use std::collections::{HashMap, HashSet};
3+
use std::collections::HashMap;
4+
use std::collections::HashSet;
45
use std::path::{Path, PathBuf};
56
use std::sync::{Arc, Mutex};
67

@@ -224,6 +225,8 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
224225
}
225226
}
226227

228+
self.collect_doc_merge_info()?;
229+
227230
// Collect the result of the build into `self.compilation`.
228231
for unit in &self.bcx.roots {
229232
self.collect_tests_and_executables(unit)?;
@@ -329,6 +332,77 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
329332
Ok(())
330333
}
331334

335+
fn collect_doc_merge_info(&mut self) -> CargoResult<()> {
336+
if !self.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
337+
return Ok(());
338+
}
339+
340+
if !self.bcx.build_config.intent.is_doc() {
341+
return Ok(());
342+
}
343+
344+
if self.bcx.build_config.intent.wants_doc_json_output() {
345+
// rustdoc JSON output doesn't support merge (yet?)
346+
return Ok(());
347+
}
348+
349+
let mut doc_parts_map: HashMap<_, Vec<_>> = HashMap::new();
350+
351+
let unit_iter = if self.bcx.build_config.intent.wants_deps_docs() {
352+
itertools::Either::Left(self.bcx.unit_graph.keys())
353+
} else {
354+
itertools::Either::Right(self.bcx.roots.iter())
355+
};
356+
357+
for unit in unit_iter {
358+
if !unit.mode.is_doc() {
359+
continue;
360+
}
361+
// Assumption: one `rustdoc` call generates only one cross-crate info JSON.
362+
let outputs = self.outputs(unit)?;
363+
364+
let Some(doc_parts) = outputs
365+
.iter()
366+
.find(|o| matches!(o.flavor, FileFlavor::DocParts))
367+
else {
368+
continue;
369+
};
370+
371+
doc_parts_map
372+
.entry(unit.kind)
373+
.or_default()
374+
.push(doc_parts.path.to_owned());
375+
}
376+
377+
self.compilation.doc_merge = Some(HashMap::from_iter(doc_parts_map.into_iter().map(
378+
|(kind, doc_parts)| {
379+
let out_dir = self
380+
.files()
381+
.layout(kind)
382+
.artifact_dir()
383+
.expect("artifact-dir was not locked")
384+
.doc()
385+
.to_path_buf();
386+
387+
let Some(fingerprint) = RustdocFingerprint::load(self, kind) else {
388+
let info = compiler::DocMergeInfo::new(doc_parts, out_dir, None);
389+
return (kind, compiler::DocMerge::Merge(info));
390+
};
391+
392+
let doc_merge = if fingerprint.is_outdated(self, &doc_parts) {
393+
let info = compiler::DocMergeInfo::new(doc_parts, out_dir, Some(fingerprint));
394+
compiler::DocMerge::Merge(info)
395+
} else {
396+
compiler::DocMerge::Fresh
397+
};
398+
399+
(kind, doc_merge)
400+
},
401+
)));
402+
403+
Ok(())
404+
}
405+
332406
/// Returns the executable for the specified unit (if any).
333407
pub fn get_executable(&mut self, unit: &Unit) -> CargoResult<Option<PathBuf>> {
334408
let is_binary = unit.target.is_executable();

src/cargo/core/compiler/compilation.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
33
use std::collections::{BTreeSet, HashMap};
44
use std::ffi::{OsStr, OsString};
5+
use std::path::Path;
56
use std::path::PathBuf;
67

78
use cargo_platform::CfgExpr;
89
use cargo_util::{ProcessBuilder, paths};
910

1011
use crate::core::Package;
1112
use crate::core::compiler::BuildContext;
13+
use crate::core::compiler::RustdocFingerprint;
1214
use crate::core::compiler::apply_env_config;
1315
use crate::core::compiler::{CompileKind, Unit, UnitHash};
1416
use crate::util::{CargoResult, GlobalContext, context};
@@ -106,6 +108,11 @@ pub struct Compilation<'gctx> {
106108
/// Libraries to test with rustdoc.
107109
pub to_doc_test: Vec<Doctest>,
108110

111+
/// Compilation information for running `rustdoc --merge=finalize`.
112+
///
113+
/// See `-Zrustdoc-mergeable-info` for more.
114+
pub doc_merge: Option<HashMap<CompileKind, DocMerge>>,
115+
109116
/// The target host triple.
110117
pub host: String,
111118

@@ -143,6 +150,7 @@ impl<'gctx> Compilation<'gctx> {
143150
root_crate_names: Vec::new(),
144151
extra_env: HashMap::new(),
145152
to_doc_test: Vec::new(),
153+
doc_merge: None,
146154
gctx: bcx.gctx,
147155
host: bcx.host_triple().to_string(),
148156
rustc_process,
@@ -383,6 +391,67 @@ impl<'gctx> Compilation<'gctx> {
383391
}
384392
}
385393

394+
/// Whether `rustdoc --merge=finalize` output is stale or fresh.
395+
pub enum DocMerge {
396+
/// Nothing is stale.
397+
Fresh,
398+
/// Doc merge is required.
399+
Merge(DocMergeInfo),
400+
}
401+
402+
/// Compilation information for running `rustdoc --merge=finalize`.
403+
pub struct DocMergeInfo {
404+
/// Cross-crate info JSON files for each rustdoc invocation during this `cargo doc` call.
405+
doc_parts: Vec<PathBuf>,
406+
/// Output directory for rustdoc final artifacts.
407+
out_dir: PathBuf,
408+
/// Rustdoc fingerprint file information, if existing.
409+
fingerprint: Option<RustdocFingerprint>,
410+
}
411+
412+
impl DocMergeInfo {
413+
pub fn new(
414+
doc_parts: Vec<PathBuf>,
415+
out_dir: PathBuf,
416+
fingerprint: Option<RustdocFingerprint>,
417+
) -> Self {
418+
Self {
419+
doc_parts,
420+
out_dir,
421+
fingerprint,
422+
}
423+
}
424+
425+
/// Provides arguments for rustdoc cross-crate info finalization.
426+
pub fn finalize<F>(&self, exec: F) -> CargoResult<()>
427+
where
428+
// 1. paths for `--include-parts-dir`
429+
// 2. path for `--out-dir`
430+
F: Fn(&[&Path], &Path) -> CargoResult<()>,
431+
{
432+
let mut doc_parts: Vec<_> = self
433+
.doc_parts
434+
.iter()
435+
.chain(self.fingerprint.iter().flat_map(|f| f.doc_parts().iter()))
436+
.cloned()
437+
.collect();
438+
439+
doc_parts.sort_unstable();
440+
doc_parts.dedup();
441+
442+
// rustdoc needs the directory holding doc parts files.
443+
let parts_dirs: Vec<_> = doc_parts.iter().map(|p| p.parent().unwrap()).collect();
444+
445+
exec(&parts_dirs, &self.out_dir)?;
446+
447+
if let Some(fingerprint) = &self.fingerprint {
448+
fingerprint.persist(doc_parts)?;
449+
}
450+
451+
Ok(())
452+
}
453+
}
454+
386455
/// Prepares a `rustc_tool` process with additional environment variables
387456
/// that are only relevant in a context that has a unit
388457
fn fill_rustc_tool_env(mut cmd: ProcessBuilder, unit: &Unit) -> ProcessBuilder {

src/cargo/core/compiler/fingerprint/rustdoc.rs

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::path::PathBuf;
33

44
use anyhow::Context as _;
55
use cargo_util::paths;
6+
use filetime::FileTime;
67
use serde::Deserialize;
78
use serde::Serialize;
89

@@ -15,6 +16,10 @@ use crate::core::compiler::CompileKind;
1516
struct RustdocFingerprintJson {
1617
/// `rustc -vV` verbose version output.
1718
pub rustc_vv: String,
19+
20+
/// Path to cross crate info JSON files from previous `cargo doc` invocations.
21+
#[serde(default, skip_serializing_if = "Vec::is_empty")]
22+
pub doc_parts: Vec<PathBuf>,
1823
}
1924

2025
/// Structure used to deal with Rustdoc fingerprinting
@@ -29,7 +34,13 @@ struct RustdocFingerprintJson {
2934
/// they were compiled with the same Rustc version that we're currently using.
3035
/// Otherwise we must remove the `doc/` folder and compile again forcing a rebuild.
3136
#[derive(Debug)]
32-
pub struct RustdocFingerprint {}
37+
pub struct RustdocFingerprint {
38+
/// File modified time when loading this fingerprint.
39+
mtime: Option<FileTime>,
40+
/// Path to this fingerprint file.
41+
path: PathBuf,
42+
fingerprint: RustdocFingerprintJson,
43+
}
3344

3445
impl RustdocFingerprint {
3546
/// Checks whether the latest version of rustc used to compile this workspace's docs
@@ -58,6 +69,7 @@ impl RustdocFingerprint {
5869
}
5970
let new_fingerprint = RustdocFingerprintJson {
6071
rustc_vv: build_runner.bcx.rustc().verbose_version.clone(),
72+
doc_parts: Vec::new(),
6173
};
6274

6375
for kind in &build_runner.bcx.build_config.requested_kinds {
@@ -66,6 +78,74 @@ impl RustdocFingerprint {
6678

6779
Ok(())
6880
}
81+
82+
/// Returns the path to rustdoc fingerprint file for a given [`CompileKind`].
83+
pub fn load(build_runner: &BuildRunner<'_, '_>, kind: CompileKind) -> Option<Self> {
84+
let path = fingerprint_path(build_runner, kind);
85+
let fingerprint = match paths::read(&path) {
86+
Ok(data) => data,
87+
Err(e) => {
88+
tracing::debug!("failed to read rustdoc fingerprint at {path:?}: {e}");
89+
return None;
90+
}
91+
};
92+
93+
match serde_json::from_str::<RustdocFingerprintJson>(&fingerprint) {
94+
Ok(mut fingerprint) => {
95+
// Doc parts may be selectively cleaned via `cargo clean -p <doc>`.
96+
// We should stop caching those.
97+
fingerprint.doc_parts.retain(|p| p.exists());
98+
Some(Self {
99+
mtime: paths::mtime(&path).ok(),
100+
path,
101+
fingerprint,
102+
})
103+
}
104+
Err(e) => {
105+
tracing::debug!("could not deserialize {:?}: {}", path, e);
106+
None
107+
}
108+
}
109+
}
110+
111+
/// Checks if the fingerprint is outdated comparing against given doc parts file paths.
112+
pub fn is_outdated(&self, build_runner: &BuildRunner<'_, '_>, doc_parts: &[PathBuf]) -> bool {
113+
let Some(fingerprint_mtime) = self.mtime.as_ref() else {
114+
return true;
115+
};
116+
117+
if self.fingerprint.rustc_vv != build_runner.bcx.rustc().verbose_version {
118+
return true;
119+
}
120+
121+
for path in doc_parts {
122+
let parts_mtime = match paths::mtime(&path) {
123+
Ok(mtime) => mtime,
124+
Err(e) => {
125+
tracing::debug!("failed to read mtime of {}: {e}", path.display());
126+
return true;
127+
}
128+
};
129+
130+
if &parts_mtime > fingerprint_mtime {
131+
return true;
132+
}
133+
}
134+
135+
false
136+
}
137+
138+
pub fn persist(&self, doc_parts: Vec<PathBuf>) -> CargoResult<()> {
139+
let new_fingerprint = RustdocFingerprintJson {
140+
rustc_vv: self.fingerprint.rustc_vv.clone(),
141+
doc_parts,
142+
};
143+
paths::write(&self.path, serde_json::to_string(&new_fingerprint)?)
144+
}
145+
146+
pub fn doc_parts(&self) -> &[PathBuf] {
147+
&self.fingerprint.doc_parts
148+
}
69149
}
70150

71151
/// Returns the path to rustdoc fingerprint file for a given [`CompileKind`].

src/cargo/core/compiler/layout.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,8 @@ impl BuildDirLayout {
307307
Ok(())
308308
}
309309
/// Fetch the deps path.
310-
pub fn deps(&self, pkg_dir: &str) -> PathBuf {
311-
if self.is_new_layout {
310+
pub fn deps(&self, pkg_dir: &str, force_new_layout: bool) -> PathBuf {
311+
if self.is_new_layout || force_new_layout {
312312
self.build_unit(pkg_dir).join("deps")
313313
} else {
314314
self.legacy_deps().to_path_buf()

0 commit comments

Comments
 (0)