@@ -1094,9 +1094,14 @@ def test_from_standard_tree_sequence(self):
10941094 assert i1 .flags == i2 .flags
10951095 assert tsutil .json_metadata_is_subset (i1 .metadata , i2 .metadata )
10961096 # Unless inference is perfect, internal nodes may differ, but sample nodes
1097- # should be identical
1097+ # should be identical. Node metadata is not transferred, however, and a tsinfer-
1098+ # specific node metadata schema is used (where empty is None rather than b"")
1099+ assert (
1100+ ts_inferred .table_metadata_schemas .node
1101+ == tsinfer .formats .node_metadata_schema ()
1102+ )
10981103 for n1 , n2 in zip (ts .samples (), ts_inferred .samples ()):
1099- assert ts .node (n1 ) == ts_inferred .node (n2 )
1104+ assert ts .node (n1 ). replace ( metadata = None ) == ts_inferred .node (n2 )
11001105 # Sites can have metadata added by the inference process, but inferred site
11011106 # metadata should always include all the metadata in the original ts
11021107 for s1 , s2 in zip (ts .sites (), ts_inferred .sites ()):
@@ -1586,12 +1591,13 @@ def verify(self, sample_data, mismatch_ratio=None, recombination_rate=None):
15861591 ancestors_time = ancestor_data .ancestors_time [:]
15871592 num_ancestor_nodes = 0
15881593 for n in ancestors_ts .nodes ():
1589- md = json . loads ( n .metadata ) if n .metadata else {}
1594+ md = n .metadata if n .metadata else {}
15901595 if tsinfer .is_pc_ancestor (n .flags ):
1591- assert not ("ancestor_data_id" in md )
1596+ if "tsinfer" in md :
1597+ assert "ancestor_data_id" not in md ["tsinfer" ]
15921598 else :
1593- assert "ancestor_data_id" in md
1594- assert ancestors_time [md ["ancestor_data_id" ]] == n .time
1599+ assert "tsinfer" in md and " ancestor_data_id" in md [ "tsinfer" ]
1600+ assert ancestors_time [md ["tsinfer" ][ " ancestor_data_id" ]] == n .time
15951601 num_ancestor_nodes += 1
15961602 assert num_ancestor_nodes == ancestor_data .num_ancestors
15971603
@@ -3114,8 +3120,7 @@ def verify_augmented_ancestors(
31143120 node = t2 .nodes [m + j ]
31153121 assert node .flags == tsinfer .NODE_IS_SAMPLE_ANCESTOR
31163122 assert node .time == 1
3117- metadata = json .loads (node .metadata .decode ())
3118- assert node_id == metadata ["sample_data_id" ]
3123+ assert node_id == node .metadata ["tsinfer" ]["sample_data_id" ]
31193124
31203125 t2 .nodes .truncate (len (t1 .nodes ))
31213126 # Adding and subtracting 1 can lead to small diffs, so we compare
@@ -3265,8 +3270,7 @@ def verify_example(self, full_subset, samples, ancestors, path_compression):
32653270 num_sample_ancestors = 0
32663271 for node in final_ts .nodes ():
32673272 if node .flags == tsinfer .NODE_IS_SAMPLE_ANCESTOR :
3268- metadata = json .loads (node .metadata .decode ())
3269- assert metadata ["sample_data_id" ] in subset
3273+ assert node .metadata ["tsinfer" ]["sample_data_id" ] in subset
32703274 num_sample_ancestors += 1
32713275 assert expected_sample_ancestors == num_sample_ancestors
32723276 tsinfer .verify (samples , final_ts .simplify ())
0 commit comments