diff --git a/packages/cli/tests/checkin/cyclic_dependencies/object.snapshot b/packages/cli/tests/checkin/cyclic_dependencies/object.snapshot index 20b36fecc..208c5e226 100644 --- a/packages/cli/tests/checkin/cyclic_dependencies/object.snapshot +++ b/packages/cli/tests/checkin/cyclic_dependencies/object.snapshot @@ -17,27 +17,11 @@ tg.directory({ }, "module": "ts", }, - { - "kind": "file", - "contents": tg.blob("import * as foo from \"../foo\";"), - "dependencies": { - "../foo": { - "item": { - "index": 3, - "kind": "directory", - }, - "options": { - "path": "../foo", - }, - }, - }, - "module": "ts", - }, { "kind": "directory", "entries": { "tangram.ts": { - "index": 1, + "index": 0, "kind": "file", }, }, @@ -46,13 +30,29 @@ tg.directory({ "kind": "directory", "entries": { "tangram.ts": { - "index": 0, + "index": 3, "kind": "file", }, }, }, + { + "kind": "file", + "contents": tg.blob("import * as foo from \"../foo\";"), + "dependencies": { + "../foo": { + "item": { + "index": 1, + "kind": "directory", + }, + "options": { + "path": "../foo", + }, + }, + }, + "module": "ts", + }, ], }), - "index": 3, + "index": 1, "kind": "directory", }) \ No newline at end of file diff --git a/packages/cli/tests/checkin/import_package_from_current/object.snapshot b/packages/cli/tests/checkin/import_package_from_current/object.snapshot index 4e758369b..dd387772d 100644 --- a/packages/cli/tests/checkin/import_package_from_current/object.snapshot +++ b/packages/cli/tests/checkin/import_package_from_current/object.snapshot @@ -2,26 +2,13 @@ tg.directory({ "a": { "graph": tg.graph({ "nodes": [ - { - "kind": "directory", - "entries": { - "mod.tg.ts": { - "index": 1, - "kind": "file", - }, - "tangram.ts": tg.file({ - "contents": tg.blob(""), - "module": "ts", - }), - }, - }, { "kind": "file", "contents": tg.blob("import * as a from \".\";"), "dependencies": { ".": { "item": { - "index": 0, + "index": 1, "kind": "directory", }, "options": { @@ -31,9 +18,22 @@ tg.directory({ }, "module": "ts", }, + { + "kind": "directory", + "entries": { + "mod.tg.ts": { + "index": 0, + "kind": "file", + }, + "tangram.ts": tg.file({ + "contents": tg.blob(""), + "module": "ts", + }), + }, + }, ], }), - "index": 0, + "index": 1, "kind": "directory", }, }) \ No newline at end of file diff --git a/packages/cli/tests/checkin/package_with_cyclic_modules/object.snapshot b/packages/cli/tests/checkin/package_with_cyclic_modules/object.snapshot index 8e438fa7a..1221dbb9a 100644 --- a/packages/cli/tests/checkin/package_with_cyclic_modules/object.snapshot +++ b/packages/cli/tests/checkin/package_with_cyclic_modules/object.snapshot @@ -4,15 +4,15 @@ tg.directory({ "nodes": [ { "kind": "file", - "contents": tg.blob("import * as root from \"./tangram.ts\";"), + "contents": tg.blob("import * as foo from \"./foo.tg.ts\";"), "dependencies": { - "./tangram.ts": { + "./foo.tg.ts": { "item": { "index": 1, "kind": "file", }, "options": { - "path": "tangram.ts", + "path": "foo.tg.ts", }, }, }, @@ -20,15 +20,15 @@ tg.directory({ }, { "kind": "file", - "contents": tg.blob("import * as foo from \"./foo.tg.ts\";"), + "contents": tg.blob("import * as root from \"./tangram.ts\";"), "dependencies": { - "./foo.tg.ts": { + "./tangram.ts": { "item": { "index": 0, "kind": "file", }, "options": { - "path": "foo.tg.ts", + "path": "tangram.ts", }, }, }, @@ -36,7 +36,7 @@ tg.directory({ }, ], }), - "index": 0, + "index": 1, "kind": "file", }, "tangram.ts": { @@ -44,15 +44,15 @@ tg.directory({ "nodes": [ { "kind": "file", - "contents": tg.blob("import * as root from \"./tangram.ts\";"), + "contents": tg.blob("import * as foo from \"./foo.tg.ts\";"), "dependencies": { - "./tangram.ts": { + "./foo.tg.ts": { "item": { "index": 1, "kind": "file", }, "options": { - "path": "tangram.ts", + "path": "foo.tg.ts", }, }, }, @@ -60,15 +60,15 @@ tg.directory({ }, { "kind": "file", - "contents": tg.blob("import * as foo from \"./foo.tg.ts\";"), + "contents": tg.blob("import * as root from \"./tangram.ts\";"), "dependencies": { - "./foo.tg.ts": { + "./tangram.ts": { "item": { "index": 0, "kind": "file", }, "options": { - "path": "foo.tg.ts", + "path": "tangram.ts", }, }, }, @@ -76,7 +76,7 @@ tg.directory({ }, ], }), - "index": 1, + "index": 0, "kind": "file", }, }) \ No newline at end of file diff --git a/packages/cli/tests/checkin/repeated_checkin_with_cycles.nu b/packages/cli/tests/checkin/repeated_checkin_with_cycles.nu new file mode 100644 index 000000000..beaee8542 --- /dev/null +++ b/packages/cli/tests/checkin/repeated_checkin_with_cycles.nu @@ -0,0 +1,35 @@ +use ../../test.nu * + +# Graph IDs must be identical regardless of entry point for cyclic imports. +# +# Two issues were identified: +# +# 1. External pointers (references to nodes outside the SCC) used ephemeral +# checkin graph indices, causing initial labels to differ by entry point. +# Fix: Resolve external pointers to object IDs in checkin_graph_node_initial_label. +# +# 2. Symmetric nodes (identical content and structure) produce identical WL labels. +# Fix: Use node paths as a tiebreaker in the sort. + +let server = spawn + +# Minimal 3-node star cycle: hub imports a and b, both a and b import hub. +# Nodes a and b have identical content and structure, producing identical WL labels. +let path = artifact { + a.tg.ts: 'import "./hub.tg.ts"; export default {};' + b.tg.ts: 'import "./hub.tg.ts"; export default {};' + hub.tg.ts: 'import "./a.tg.ts"; import "./b.tg.ts"; export default {};' +} + +# Check in from hub first. +let hub_id = tg checkin ($path | path join 'hub.tg.ts') +let hub_obj = tg get $hub_id +let graph_from_hub = $hub_obj | parse --regex '"graph":(gph_[a-z0-9]+)' | get capture0 | first + +# Check in from a. +let a_id = tg checkin ($path | path join 'a.tg.ts') +let a_obj = tg get $a_id +let graph_from_a = $a_obj | parse --regex '"graph":(gph_[a-z0-9]+)' | get capture0 | first + +# The graph IDs should be identical regardless of entry point. +assert equal $graph_from_hub $graph_from_a diff --git a/packages/cli/tests/checkin/tag_dependency_cycles/lock.snapshot b/packages/cli/tests/checkin/tag_dependency_cycles/lock.snapshot index 3b4e6421f..9958523d0 100644 --- a/packages/cli/tests/checkin/tag_dependency_cycles/lock.snapshot +++ b/packages/cli/tests/checkin/tag_dependency_cycles/lock.snapshot @@ -18,7 +18,7 @@ "kind": "directory" }, "options": { - "id": "dir_01p23dxncarc4nhkjpyy3tjbj7fmekx9fwty3gyzkt1as9t4vphg60", + "id": "dir_01bh601syfs5twxg1h0d8ar0avybnk2k91ggcqh7ptq01n78av2p90", "tag": "a/1.1.0" } }, @@ -28,7 +28,7 @@ "kind": "directory" }, "options": { - "id": "dir_01zp51yz7v92dwny4m5jvr3vkf33086z9vpjmhc0sk7rkt35pp8qg0", + "id": "dir_01jrxzb298n5pwk7q89bp3wv5q3yawmbwm1ynt2rgs8x3aedbfw0h0", "tag": "b/1.0.0" } } @@ -66,7 +66,7 @@ "kind": "directory" }, "options": { - "id": "dir_01zp51yz7v92dwny4m5jvr3vkf33086z9vpjmhc0sk7rkt35pp8qg0", + "id": "dir_01jrxzb298n5pwk7q89bp3wv5q3yawmbwm1ynt2rgs8x3aedbfw0h0", "tag": "b/1.0.0" } } @@ -106,7 +106,7 @@ "kind": "directory" }, "options": { - "id": "dir_01p23dxncarc4nhkjpyy3tjbj7fmekx9fwty3gyzkt1as9t4vphg60", + "id": "dir_01bh601syfs5twxg1h0d8ar0avybnk2k91ggcqh7ptq01n78av2p90", "tag": "a/1.1.0" } } diff --git a/packages/cli/tests/checkin/tag_dependency_cycles/object.snapshot b/packages/cli/tests/checkin/tag_dependency_cycles/object.snapshot index 6e0161579..92aca0fc2 100644 --- a/packages/cli/tests/checkin/tag_dependency_cycles/object.snapshot +++ b/packages/cli/tests/checkin/tag_dependency_cycles/object.snapshot @@ -6,15 +6,6 @@ tg.directory({ "item": { "graph": tg.graph({ "nodes": [ - { - "kind": "directory", - "entries": { - "tangram.ts": { - "index": 2, - "kind": "file", - }, - }, - }, { "kind": "directory", "entries": { @@ -23,35 +14,53 @@ tg.directory({ "kind": "file", }, "tangram.ts": { - "index": 4, + "index": 1, "kind": "file", }, }, }, { "kind": "file", - "contents": tg.blob("import * as b from \"b/*\";"), + "contents": tg.blob("import * as a from \"a/*\";\nimport * as foo from \"./foo.tg.ts\";"), "dependencies": { - "b/*": { + "./foo.tg.ts": { "item": { - "index": 1, + "index": 3, + "kind": "file", + }, + "options": { + "path": "foo.tg.ts", + }, + }, + "a/*": { + "item": { + "index": 2, "kind": "directory", }, "options": { - "id": "dir_01zp51yz7v92dwny4m5jvr3vkf33086z9vpjmhc0sk7rkt35pp8qg0", - "tag": "b/1.0.0", + "id": "dir_01bh601syfs5twxg1h0d8ar0avybnk2k91ggcqh7ptq01n78av2p90", + "tag": "a/1.1.0", }, }, }, "module": "ts", }, + { + "kind": "directory", + "entries": { + "tangram.ts": { + "index": 4, + "kind": "file", + }, + }, + }, { "kind": "file", "contents": tg.blob("import * as b from \"./tangram.ts\";"), "dependencies": { "./tangram.ts": { "item": { - "index": 4, + "index": 1, "kind": "file", }, "options": { @@ -63,25 +72,16 @@ tg.directory({ }, { "kind": "file", - "contents": tg.blob("import * as a from \"a/*\";\nimport * as foo from \"./foo.tg.ts\";"), + "contents": tg.blob("import * as b from \"b/*\";"), "dependencies": { - "./foo.tg.ts": { - "item": { - "index": 3, - "kind": "file", - }, - "options": { - "path": "foo.tg.ts", - }, - }, - "a/*": { + "b/*": { "item": { "index": 0, "kind": "directory", }, "options": { - "id": "dir_01p23dxncarc4nhkjpyy3tjbj7fmekx9fwty3gyzkt1as9t4vphg60", - "tag": "a/1.1.0", + "id": "dir_01jrxzb298n5pwk7q89bp3wv5q3yawmbwm1ynt2rgs8x3aedbfw0h0", + "tag": "b/1.0.0", }, }, }, @@ -89,11 +89,11 @@ tg.directory({ }, ], }), - "index": 0, + "index": 2, "kind": "directory", }, "options": { - "id": "dir_01p23dxncarc4nhkjpyy3tjbj7fmekx9fwty3gyzkt1as9t4vphg60", + "id": "dir_01bh601syfs5twxg1h0d8ar0avybnk2k91ggcqh7ptq01n78av2p90", "tag": "a/1.1.0", }, }, @@ -101,15 +101,6 @@ tg.directory({ "item": { "graph": tg.graph({ "nodes": [ - { - "kind": "directory", - "entries": { - "tangram.ts": { - "index": 2, - "kind": "file", - }, - }, - }, { "kind": "directory", "entries": { @@ -118,35 +109,53 @@ tg.directory({ "kind": "file", }, "tangram.ts": { - "index": 4, + "index": 1, "kind": "file", }, }, }, { "kind": "file", - "contents": tg.blob("import * as b from \"b/*\";"), + "contents": tg.blob("import * as a from \"a/*\";\nimport * as foo from \"./foo.tg.ts\";"), "dependencies": { - "b/*": { + "./foo.tg.ts": { "item": { - "index": 1, + "index": 3, + "kind": "file", + }, + "options": { + "path": "foo.tg.ts", + }, + }, + "a/*": { + "item": { + "index": 2, "kind": "directory", }, "options": { - "id": "dir_01zp51yz7v92dwny4m5jvr3vkf33086z9vpjmhc0sk7rkt35pp8qg0", - "tag": "b/1.0.0", + "id": "dir_01bh601syfs5twxg1h0d8ar0avybnk2k91ggcqh7ptq01n78av2p90", + "tag": "a/1.1.0", }, }, }, "module": "ts", }, + { + "kind": "directory", + "entries": { + "tangram.ts": { + "index": 4, + "kind": "file", + }, + }, + }, { "kind": "file", "contents": tg.blob("import * as b from \"./tangram.ts\";"), "dependencies": { "./tangram.ts": { "item": { - "index": 4, + "index": 1, "kind": "file", }, "options": { @@ -158,25 +167,16 @@ tg.directory({ }, { "kind": "file", - "contents": tg.blob("import * as a from \"a/*\";\nimport * as foo from \"./foo.tg.ts\";"), + "contents": tg.blob("import * as b from \"b/*\";"), "dependencies": { - "./foo.tg.ts": { - "item": { - "index": 3, - "kind": "file", - }, - "options": { - "path": "foo.tg.ts", - }, - }, - "a/*": { + "b/*": { "item": { "index": 0, "kind": "directory", }, "options": { - "id": "dir_01p23dxncarc4nhkjpyy3tjbj7fmekx9fwty3gyzkt1as9t4vphg60", - "tag": "a/1.1.0", + "id": "dir_01jrxzb298n5pwk7q89bp3wv5q3yawmbwm1ynt2rgs8x3aedbfw0h0", + "tag": "b/1.0.0", }, }, }, @@ -184,11 +184,11 @@ tg.directory({ }, ], }), - "index": 1, + "index": 0, "kind": "directory", }, "options": { - "id": "dir_01zp51yz7v92dwny4m5jvr3vkf33086z9vpjmhc0sk7rkt35pp8qg0", + "id": "dir_01jrxzb298n5pwk7q89bp3wv5q3yawmbwm1ynt2rgs8x3aedbfw0h0", "tag": "b/1.0.0", }, }, diff --git a/packages/cli/tests/checkin/tagged_package_with_cyclic_dependency/lock.snapshot b/packages/cli/tests/checkin/tagged_package_with_cyclic_dependency/lock.snapshot index ee6edb95b..cb34034f1 100644 --- a/packages/cli/tests/checkin/tagged_package_with_cyclic_dependency/lock.snapshot +++ b/packages/cli/tests/checkin/tagged_package_with_cyclic_dependency/lock.snapshot @@ -15,7 +15,7 @@ "a": { "item": null, "options": { - "id": "dir_01dtdtrz5tqh3s7c19sfdbhh3mmhgmnte751wz7g7spfqq1y9re340", + "id": "dir_01zbr8sgp4jttp8z078yxbnqtabk07avfamzafr9ka2g22ezxdnp10", "tag": "a" } } diff --git a/packages/cli/tests/checkin/tagged_package_with_cyclic_dependency/object.snapshot b/packages/cli/tests/checkin/tagged_package_with_cyclic_dependency/object.snapshot index 15dbda11a..f49b830a9 100644 --- a/packages/cli/tests/checkin/tagged_package_with_cyclic_dependency/object.snapshot +++ b/packages/cli/tests/checkin/tagged_package_with_cyclic_dependency/object.snapshot @@ -9,15 +9,15 @@ tg.directory({ "nodes": [ { "kind": "file", - "contents": tg.blob("import * as a from \"./tangram.ts\";"), + "contents": tg.blob("import foo from \"./foo.tg.ts\";"), "dependencies": { - "./tangram.ts": { + "./foo.tg.ts": { "item": { "index": 1, "kind": "file", }, "options": { - "path": "tangram.ts", + "path": "foo.tg.ts", }, }, }, @@ -25,15 +25,15 @@ tg.directory({ }, { "kind": "file", - "contents": tg.blob("import foo from \"./foo.tg.ts\";"), + "contents": tg.blob("import * as a from \"./tangram.ts\";"), "dependencies": { - "./foo.tg.ts": { + "./tangram.ts": { "item": { "index": 0, "kind": "file", }, "options": { - "path": "foo.tg.ts", + "path": "tangram.ts", }, }, }, @@ -41,7 +41,7 @@ tg.directory({ }, ], }), - "index": 0, + "index": 1, "kind": "file", }, "tangram.ts": { @@ -49,15 +49,15 @@ tg.directory({ "nodes": [ { "kind": "file", - "contents": tg.blob("import * as a from \"./tangram.ts\";"), + "contents": tg.blob("import foo from \"./foo.tg.ts\";"), "dependencies": { - "./tangram.ts": { + "./foo.tg.ts": { "item": { "index": 1, "kind": "file", }, "options": { - "path": "tangram.ts", + "path": "foo.tg.ts", }, }, }, @@ -65,15 +65,15 @@ tg.directory({ }, { "kind": "file", - "contents": tg.blob("import foo from \"./foo.tg.ts\";"), + "contents": tg.blob("import * as a from \"./tangram.ts\";"), "dependencies": { - "./foo.tg.ts": { + "./tangram.ts": { "item": { "index": 0, "kind": "file", }, "options": { - "path": "foo.tg.ts", + "path": "tangram.ts", }, }, }, @@ -81,12 +81,12 @@ tg.directory({ }, ], }), - "index": 1, + "index": 0, "kind": "file", }, }), "options": { - "id": "dir_01dtdtrz5tqh3s7c19sfdbhh3mmhgmnte751wz7g7spfqq1y9re340", + "id": "dir_01zbr8sgp4jttp8z078yxbnqtabk07avfamzafr9ka2g22ezxdnp10", "tag": "a", }, }, diff --git a/packages/cli/tests/object/get_by_id_with_path_cycle.nu b/packages/cli/tests/object/get_by_id_with_path_cycle.nu index d507d862e..b59806194 100644 --- a/packages/cli/tests/object/get_by_id_with_path_cycle.nu +++ b/packages/cli/tests/object/get_by_id_with_path_cycle.nu @@ -17,15 +17,15 @@ snapshot $output.stdout ' "nodes": [ { "kind": "file", - "contents": blb_010pwqd32ehjhaj9eswh61x95cgqby7x5w0fybj56a34cmbehs3mhg, + "contents": blb_01b7ka1dzz1k7n5fh52av0vxtkycf3z2kntyvnvv549x2xdy36mm9g, "dependencies": { - "./tangram.ts": { + "./file.tg.ts": { "item": { "index": 1, "kind": "file", }, "options": { - "path": "tangram.ts", + "path": "file.tg.ts", }, }, }, @@ -33,15 +33,15 @@ snapshot $output.stdout ' }, { "kind": "file", - "contents": blb_01b7ka1dzz1k7n5fh52av0vxtkycf3z2kntyvnvv549x2xdy36mm9g, + "contents": blb_010pwqd32ehjhaj9eswh61x95cgqby7x5w0fybj56a34cmbehs3mhg, "dependencies": { - "./file.tg.ts": { + "./tangram.ts": { "item": { "index": 0, "kind": "file", }, "options": { - "path": "file.tg.ts", + "path": "tangram.ts", }, }, }, @@ -49,7 +49,7 @@ snapshot $output.stdout ' }, ], }), - "index": 0, + "index": 1, "kind": "file", }) diff --git a/packages/cli/tests/publish/publish_external_dependency_determinism.nu b/packages/cli/tests/publish/publish_external_dependency_determinism.nu new file mode 100644 index 000000000..89cd1e918 --- /dev/null +++ b/packages/cli/tests/publish/publish_external_dependency_determinism.nu @@ -0,0 +1,58 @@ +use ../../test.nu * + +# Test that graph IDs are deterministic when a package with cycles is entered via an external dependency. +# +# This reproduces the bug where publishing std directly vs via jq produced different IDs. +# The root cause was external pointers (references to nodes outside the SCC) using ephemeral +# checkin graph indices instead of resolved object IDs. + +let remote = spawn --cloud -n remote +let local = spawn -n local -c { + remotes: [{ name: default, url: $remote.url }] +} + +# Create a package "inner" where nodes in a cycle reference nodes outside the cycle. +# - a.tg.ts and b.tg.ts form a cycle AND both reference helper.tg.ts +# - helper.tg.ts is outside the cycle +# This means a-b SCC has external pointers to helper. +let root = artifact { + packages: { + inner: { + "a.tg.ts": ' + import b from "./b.tg.ts"; + import helper from "./helper.tg.ts"; + export default {}; + ' + "b.tg.ts": ' + import a from "./a.tg.ts"; + import helper from "./helper.tg.ts"; + export default {}; + ' + "helper.tg.ts": ' + export default {}; + ' + tangram.ts: ' + import a from "./a.tg.ts"; + import b from "./b.tg.ts"; + export let metadata = { tag: "inner/0" }; + ' + } + outer: { + tangram.ts: ' + import inner from "inner" with { local: "../inner" }; + export let metadata = { tag: "outer/0" }; + ' + } + } +} + +# Publish inner directly (like publishing std). +tg publish ($root | path join "packages/inner") +let inner_from_inner = tg tag get inner/0 | from json | get item + +# Publish outer which depends on inner (like publishing jq which depends on std). +tg publish ($root | path join "packages/outer") +let inner_from_outer = tg tag get inner/0 | from json | get item + +# Inner should have the same ID regardless of entry point. +assert ($inner_from_inner == $inner_from_outer) $"inner has different IDs when published directly vs via outer. Direct: ($inner_from_inner), via outer: ($inner_from_outer)" diff --git a/packages/server/src/checkin/artifact.rs b/packages/server/src/checkin/artifact.rs index bf4a48a7a..5b6090112 100644 --- a/packages/server/src/checkin/artifact.rs +++ b/packages/server/src/checkin/artifact.rs @@ -10,7 +10,7 @@ use { }, num::ToPrimitive as _, std::{ - collections::{BTreeMap, BTreeSet, HashSet, hash_map::DefaultHasher}, + collections::{BTreeMap, BTreeSet}, hash::{Hash, Hasher}, path::Path, }, @@ -271,23 +271,34 @@ impl Server { scc: &[usize], touched_at: i64, ) -> tg::Result<()> { - // Compute canonical labels using the Weisfeiler-Leman algorithm. + // Run WL to compute canonical labels for all nodes in the SCC. let canonical_labels = Self::checkin_graph_canonical_labels(graph, paths, scc)?; - // Sort the SCC indices by canonical labels. - let mut sorted_scc: Vec = scc.to_vec(); - sorted_scc.sort_by(|a, b| canonical_labels[a].cmp(&canonical_labels[b])); + // Group nodes by their WL label. + let mut label_groups: BTreeMap> = BTreeMap::new(); + for &index in scc { + label_groups + .entry(canonical_labels[&index]) + .or_default() + .push(index); + } - // Create a mapping from global node index to position in the sorted SCC. - let scc_positions: BTreeMap = sorted_scc - .iter() - .enumerate() - .map(|(position, &global)| (global, position)) - .collect(); + // Build mappings from groups. + let mut scc_positions: BTreeMap = BTreeMap::new(); + let mut unique_indices: Vec = Vec::new(); + let mut all_indices_by_position: Vec> = Vec::new(); + + for (position, (_label, group)) in label_groups.into_iter().enumerate() { + unique_indices.push(group[0]); + all_indices_by_position.push(group.clone()); + for &index in &group { + scc_positions.insert(index, position); + } + } - // Create the nodes using the sorted order. - let mut nodes = Vec::with_capacity(sorted_scc.len()); - for index in &sorted_scc { + // Create the nodes using only the unique representatives. + let mut nodes = Vec::with_capacity(unique_indices.len()); + for index in &unique_indices { Self::checkin_create_graph_node(graph, paths, &scc_positions, &mut nodes, *index)?; } @@ -296,55 +307,57 @@ impl Server { let (id, _, _) = Self::checkin_create_artifact( graph, &data, - &sorted_scc, + &unique_indices, store_args, object_messages, touched_at, )?; - // Set edges and ids for the nodes in the graph. + // Set edges and ids for all original nodes in the graph. let graph_id = tg::graph::Id::try_from(id).unwrap(); - for (local, global) in sorted_scc.iter().copied().enumerate() { - let node = graph.nodes.get_mut(&global).unwrap(); - let artifact_kind = node.variant.kind(); - let data = match &node.variant { - Variant::Directory(_) => { - let pointer = tg::graph::data::Pointer { - graph: Some(graph_id.clone()), - index: local, - kind: artifact_kind, - }; - tg::object::Data::Directory(tg::directory::Data::Pointer(pointer)) - }, - Variant::File(_) => { - let pointer = tg::graph::data::Pointer { - graph: Some(graph_id.clone()), - index: local, - kind: artifact_kind, - }; - tg::object::Data::File(tg::file::Data::Pointer(pointer)) - }, - Variant::Symlink(_) => { - let pointer = tg::graph::data::Pointer { + for (local, global_indices) in all_indices_by_position.iter().enumerate() { + for &global in global_indices { + let node = graph.nodes.get_mut(&global).unwrap(); + let artifact_kind = node.variant.kind(); + let data = match &node.variant { + Variant::Directory(_) => { + let pointer = tg::graph::data::Pointer { + graph: Some(graph_id.clone()), + index: local, + kind: artifact_kind, + }; + tg::object::Data::Directory(tg::directory::Data::Pointer(pointer)) + }, + Variant::File(_) => { + let pointer = tg::graph::data::Pointer { + graph: Some(graph_id.clone()), + index: local, + kind: artifact_kind, + }; + tg::object::Data::File(tg::file::Data::Pointer(pointer)) + }, + Variant::Symlink(_) => { + let pointer = tg::graph::data::Pointer { + graph: Some(graph_id.clone()), + index: local, + kind: artifact_kind, + }; + tg::object::Data::Symlink(tg::symlink::Data::Pointer(pointer)) + }, + }; + let kind = data.kind(); + let bytes = data + .serialize() + .map_err(|source| tg::error!(!source, "failed to serialize the data"))?; + let id = tg::object::Id::new(kind, &bytes); + node.edge + .replace(tg::graph::data::Edge::Pointer(tg::graph::data::Pointer { graph: Some(graph_id.clone()), index: local, kind: artifact_kind, - }; - tg::object::Data::Symlink(tg::symlink::Data::Pointer(pointer)) - }, - }; - let kind = data.kind(); - let bytes = data - .serialize() - .map_err(|source| tg::error!(!source, "failed to serialize the data"))?; - let id = tg::object::Id::new(kind, &bytes); - node.edge - .replace(tg::graph::data::Edge::Pointer(tg::graph::data::Pointer { - graph: Some(graph_id.clone()), - index: local, - kind: artifact_kind, - })); - node.id.replace(id); + })); + node.id.replace(id); + } } Ok(()) @@ -844,14 +857,13 @@ impl Server { Ok(id) } - /// Compute initial labels for graph nodes by serializing node data with intra-SCC references set to None. fn checkin_graph_node_initial_label( graph: &Graph, paths: &Paths, - scc_set: &HashSet, + scc: &[usize], index: usize, ) -> tg::Result> { - // Create the node data with intra-SCC references set to None. + // Create the node data with normalized references. let node = graph.nodes.get(&index).unwrap(); let data = match &node.variant { Variant::File(file) => { @@ -870,11 +882,18 @@ impl Server { .cloned() .or_else(|| dependency.item().clone()); let edge = match edge { - Some(tg::graph::data::Edge::Pointer(p)) - if p.graph.is_none() && scc_set.contains(&p.index) => + Some(tg::graph::data::Edge::Pointer(pointer)) + if pointer.graph.is_none() && scc.contains(&pointer.index) => { None }, + Some(tg::graph::data::Edge::Pointer(pointer)) + if pointer.graph.is_none() => + { + let node = graph.nodes.get(&pointer.index).unwrap(); + let id = node.id.as_ref().unwrap(); + Some(tg::graph::data::Edge::Object(id.clone())) + }, other => other, }; tg::graph::data::Dependency(tg::Referent { @@ -898,15 +917,20 @@ impl Server { .iter() .map(|(name, edge)| { let edge = match edge { - tg::graph::data::Edge::Pointer(p) - if p.graph.is_none() && scc_set.contains(&p.index) => + tg::graph::data::Edge::Pointer(pointer) + if pointer.graph.is_none() && scc.contains(&pointer.index) => { tg::graph::data::Edge::Pointer(tg::graph::data::Pointer { graph: None, index: 0, - kind: p.kind, + kind: pointer.kind, }) }, + tg::graph::data::Edge::Pointer(pointer) if pointer.graph.is_none() => { + let node = graph.nodes.get(&pointer.index).unwrap(); + let id = node.id.as_ref().unwrap().clone().try_into().unwrap(); + tg::graph::data::Edge::Object(id) + }, other => other.clone(), }; (name.clone(), edge) @@ -917,17 +941,25 @@ impl Server { )) }, Variant::Symlink(symlink) => { - let artifact = symlink.artifact.as_ref().map(|edge| match edge { - tg::graph::data::Edge::Pointer(p) - if p.graph.is_none() && scc_set.contains(&p.index) => - { - tg::graph::data::Edge::Pointer(tg::graph::data::Pointer { - graph: None, - index: 0, - kind: p.kind, - }) - }, - other => other.clone(), + let artifact = symlink.artifact.as_ref().map(|edge| { + let edge: tg::graph::data::Edge = match edge { + tg::graph::data::Edge::Pointer(pointer) + if pointer.graph.is_none() && scc.contains(&pointer.index) => + { + tg::graph::data::Edge::Pointer(tg::graph::data::Pointer { + graph: None, + index: 0, + kind: pointer.kind, + }) + }, + tg::graph::data::Edge::Pointer(pointer) if pointer.graph.is_none() => { + let node = graph.nodes.get(&pointer.index).unwrap(); + let id = node.id.as_ref().unwrap().clone().try_into().unwrap(); + tg::graph::data::Edge::Object(id) + }, + other => other.clone(), + }; + edge }); tg::graph::data::Node::Symlink(tg::graph::data::Symlink { artifact, @@ -937,83 +969,95 @@ impl Server { }; // Serialize the node data. - let bytes = serde_json::to_vec(&data) + let bytes = tangram_serialize::to_vec(&data) .map_err(|source| tg::error!(!source, "failed to serialize the node"))?; Ok(bytes) } - /// Compute canonical labels using the Weisfeiler-Leman algorithm. This produces a deterministic ordering of nodes in a strongly connected component. + /// Compute canonical labels for all nodes in the SCC using the Weisfeiler-Lehman algorithm. fn checkin_graph_canonical_labels( graph: &Graph, paths: &Paths, scc: &[usize], ) -> tg::Result> { - // Build the SCC set for quick lookup. - let scc_set: HashSet = scc.iter().copied().collect(); + // Helper to hash bytes. + fn hash_bytes(bytes: &[u8]) -> u64 { + let mut hasher = fnv::FnvHasher::default(); + bytes.hash(&mut hasher); + hasher.finish() + } - // Compute initial labels from serialized node data. + // Compute initial labels from node content. let mut labels: BTreeMap = BTreeMap::new(); for &index in scc { - let data = Self::checkin_graph_node_initial_label(graph, paths, &scc_set, index)?; - let mut hasher = DefaultHasher::new(); - data.hash(&mut hasher); - labels.insert(index, hasher.finish()); + let bytes = Self::checkin_graph_node_initial_label(graph, paths, scc, index)?; + labels.insert(index, hash_bytes(&bytes)); } - // Collect edges within the SCC (both outgoing and incoming for undirected-style refinement). - let mut outgoing: BTreeMap> = BTreeMap::new(); - let mut incoming: BTreeMap> = BTreeMap::new(); - for &index in scc { - outgoing.insert(index, Vec::new()); - incoming.insert(index, Vec::new()); - } + // Create the neighbors. + let scc_set: BTreeSet = scc.iter().copied().collect(); + let mut neighbors: BTreeMap> = BTreeMap::new(); for &index in scc { let node = graph.nodes.get(&index).unwrap(); - for child in node.children() { - if scc_set.contains(&child) { - outgoing.get_mut(&index).unwrap().push(child); - incoming.get_mut(&child).unwrap().push(index); - } + let mut node_neighbors = Vec::new(); + match &node.variant { + Variant::File(file) => { + for (reference, option) in &file.dependencies { + let edge = paths + .get(&(index, reference.clone())) + .cloned() + .or_else(|| option.as_ref().and_then(|d| d.item().clone())); + if let Some(tg::graph::data::Edge::Pointer(pointer)) = edge + && pointer.graph.is_none() + && scc_set.contains(&pointer.index) + { + node_neighbors.push(pointer.index); + } + } + }, + Variant::Directory(directory) => { + for edge in directory.entries.values() { + if let tg::graph::data::Edge::Pointer(pointer) = edge + && pointer.graph.is_none() + && scc_set.contains(&pointer.index) + { + node_neighbors.push(pointer.index); + } + } + }, + Variant::Symlink(symlink) => { + if let Some(tg::graph::data::Edge::Pointer(pointer)) = &symlink.artifact + && pointer.graph.is_none() + && scc_set.contains(&pointer.index) + { + node_neighbors.push(pointer.index); + } + }, } + neighbors.insert(index, node_neighbors); } - // Sort edges for determinism. - for edges in outgoing.values_mut() { - edges.sort_unstable(); - } - for edges in incoming.values_mut() { - edges.sort_unstable(); - } - - // Iteratively refine labels until stable. - let max_iterations = scc.len() + 1; - for _ in 0..max_iterations { + // Run WL iterations until convergence. + loop { let mut new_labels: BTreeMap = BTreeMap::new(); - for &index in scc { - let mut hasher = DefaultHasher::new(); + // Collect and sort neighbor labels. + let mut neighbor_labels: Vec = + neighbors[&index].iter().map(|&n| labels[&n]).collect(); + neighbor_labels.sort_unstable(); - // Hash the current label. + // Hash the node's label with its sorted neighbor labels. + let mut hasher = fnv::FnvHasher::default(); labels[&index].hash(&mut hasher); - - // Hash sorted outgoing neighbor labels. - let mut out_labels: Vec = - outgoing[&index].iter().map(|&n| labels[&n]).collect(); - out_labels.sort_unstable(); - out_labels.hash(&mut hasher); - - // Hash sorted incoming neighbor labels. - let mut in_labels: Vec = - incoming[&index].iter().map(|&n| labels[&n]).collect(); - in_labels.sort_unstable(); - in_labels.hash(&mut hasher); - + neighbor_labels.hash(&mut hasher); new_labels.insert(index, hasher.finish()); } - // Check for stability. - if new_labels == labels { + // Check for convergence: same number of distinct labels. + let old_distinct: BTreeSet = labels.values().copied().collect(); + let new_distinct: BTreeSet = new_labels.values().copied().collect(); + if old_distinct.len() == new_distinct.len() { break; } labels = new_labels;