@@ -4549,19 +4549,138 @@ tsk_tree_position_in_interval(const tsk_tree_t *self, double x)
45494549 return self -> interval .left <= x && x < self -> interval .right ;
45504550}
45514551
4552- int TSK_WARN_UNUSED
4553- tsk_tree_seek (tsk_tree_t * self , double x , tsk_flags_t TSK_UNUSED (options ))
4552+ /* NOTE:
4553+ *
4554+ * Notes from Kevin Thornton:
4555+ *
4556+ * This method inserts the edges for an arbitrary tree
4557+ * in linear time and requires no additional memory.
4558+ *
4559+ * During design, the following alternatives were tested
4560+ * (in a combination of rust + C):
4561+ * 1. Indexing edge insertion/removal locations by tree.
4562+ * The indexing can be done in O(n) time, giving O(1)
4563+ * access to the first edge in a tree. We can then add
4564+ * edges to the tree in O(e) time, where e is the number
4565+ * of edges. This apparoach requires O(n) additional memory
4566+ * and is only marginally faster than the implementation below.
4567+ * 2. Building an interval tree mapping edge id -> span.
4568+ * This approach adds a lot of complexity and wasn't any faster
4569+ * than the indexing described above.
4570+ */
4571+ static int
4572+ tsk_tree_seek_from_null (tsk_tree_t * self , double x , tsk_flags_t TSK_UNUSED (options ))
45544573{
45554574 int ret = 0 ;
4575+ tsk_size_t edge ;
4576+ tsk_id_t p , c , e , j , k , tree_index ;
45564577 const double L = tsk_treeseq_get_sequence_length (self -> tree_sequence );
4557- const double t_l = self -> interval .left ;
4558- const double t_r = self -> interval .right ;
4559- double distance_left , distance_right ;
4578+ const tsk_treeseq_t * treeseq = self -> tree_sequence ;
4579+ const tsk_table_collection_t * tables = treeseq -> tables ;
4580+ const tsk_id_t * restrict edge_parent = tables -> edges .parent ;
4581+ const tsk_id_t * restrict edge_child = tables -> edges .child ;
4582+ const tsk_size_t num_edges = tables -> edges .num_rows ;
4583+ const tsk_size_t num_trees = self -> tree_sequence -> num_trees ;
4584+ const double * restrict edge_left = tables -> edges .left ;
4585+ const double * restrict edge_right = tables -> edges .right ;
4586+ const double * restrict breakpoints = treeseq -> breakpoints ;
4587+ const tsk_id_t * restrict insertion = tables -> indexes .edge_insertion_order ;
4588+ const tsk_id_t * restrict removal = tables -> indexes .edge_removal_order ;
4589+
4590+ // NOTE: it may be better to get the
4591+ // index first and then ask if we are
4592+ // searching in the first or last 1/2
4593+ // of trees.
4594+ j = -1 ;
4595+ if (x <= L / 2.0 ) {
4596+ for (edge = 0 ; edge < num_edges ; edge ++ ) {
4597+ e = insertion [edge ];
4598+ if (edge_left [e ] > x ) {
4599+ j = (tsk_id_t ) edge ;
4600+ break ;
4601+ }
4602+ if (x >= edge_left [e ] && x < edge_right [e ]) {
4603+ p = edge_parent [e ];
4604+ c = edge_child [e ];
4605+ tsk_tree_insert_edge (self , p , c , e );
4606+ }
4607+ }
4608+ } else {
4609+ for (edge = 0 ; edge < num_edges ; edge ++ ) {
4610+ e = removal [num_edges - edge - 1 ];
4611+ if (edge_right [e ] < x ) {
4612+ j = (tsk_id_t )(num_edges - edge - 1 );
4613+ while (j < (tsk_id_t ) num_edges && edge_left [insertion [j ]] <= x ) {
4614+ j ++ ;
4615+ }
4616+ break ;
4617+ }
4618+ if (x >= edge_left [e ] && x < edge_right [e ]) {
4619+ p = edge_parent [e ];
4620+ c = edge_child [e ];
4621+ tsk_tree_insert_edge (self , p , c , e );
4622+ }
4623+ }
4624+ }
45604625
4561- if (x < 0 || x >= L ) {
4626+ if (j == -1 ) {
4627+ j = 0 ;
4628+ while (j < (tsk_id_t ) num_edges && edge_left [insertion [j ]] <= x ) {
4629+ j ++ ;
4630+ }
4631+ }
4632+ k = 0 ;
4633+ while (k < (tsk_id_t ) num_edges && edge_right [removal [k ]] <= x ) {
4634+ k ++ ;
4635+ }
4636+
4637+ /* NOTE: tsk_search_sorted finds the first the first
4638+ * insertion locatiom >= the query point, which
4639+ * finds a RIGHT value for queries not at the left edge.
4640+ */
4641+ tree_index = (tsk_id_t ) tsk_search_sorted (breakpoints , num_trees + 1 , x );
4642+ if (breakpoints [tree_index ] > x ) {
4643+ tree_index -= 1 ;
4644+ }
4645+ self -> index = tree_index ;
4646+ self -> interval .left = breakpoints [tree_index ];
4647+ self -> interval .right = breakpoints [tree_index + 1 ];
4648+ self -> left_index = j ;
4649+ self -> right_index = k ;
4650+ self -> direction = TSK_DIR_FORWARD ;
4651+ self -> num_nodes = tables -> nodes .num_rows ;
4652+ if (tables -> sites .num_rows > 0 ) {
4653+ self -> sites = treeseq -> tree_sites [self -> index ];
4654+ self -> sites_length = treeseq -> tree_sites_length [self -> index ];
4655+ }
4656+
4657+ return ret ;
4658+ }
4659+
4660+ int TSK_WARN_UNUSED
4661+ tsk_tree_seek_index (tsk_tree_t * self , tsk_id_t tree , tsk_flags_t options )
4662+ {
4663+ int ret = 0 ;
4664+ double x ;
4665+
4666+ if (tree < 0 || tree >= (tsk_id_t ) self -> tree_sequence -> num_trees ) {
45624667 ret = TSK_ERR_SEEK_OUT_OF_BOUNDS ;
45634668 goto out ;
45644669 }
4670+ x = self -> tree_sequence -> breakpoints [tree ];
4671+ ret = tsk_tree_seek (self , x , options );
4672+ out :
4673+ return ret ;
4674+ }
4675+
4676+ static int TSK_WARN_UNUSED
4677+ tsk_tree_seek_linear (tsk_tree_t * self , double x , tsk_flags_t TSK_UNUSED (options ))
4678+ {
4679+ const double L = tsk_treeseq_get_sequence_length (self -> tree_sequence );
4680+ const double t_l = self -> interval .left ;
4681+ const double t_r = self -> interval .right ;
4682+ int ret = 0 ;
4683+ double distance_left , distance_right ;
45654684
45664685 if (x < t_l ) {
45674686 /* |-----|-----|========|---------| */
@@ -4594,6 +4713,27 @@ tsk_tree_seek(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(options))
45944713 return ret ;
45954714}
45964715
4716+ int TSK_WARN_UNUSED
4717+ tsk_tree_seek (tsk_tree_t * self , double x , tsk_flags_t options )
4718+ {
4719+ int ret = 0 ;
4720+ const double L = tsk_treeseq_get_sequence_length (self -> tree_sequence );
4721+
4722+ if (x < 0 || x >= L ) {
4723+ ret = TSK_ERR_SEEK_OUT_OF_BOUNDS ;
4724+ goto out ;
4725+ }
4726+
4727+ if (self -> index == -1 ) {
4728+ ret = tsk_tree_seek_from_null (self , x , options );
4729+ } else {
4730+ ret = tsk_tree_seek_linear (self , x , options );
4731+ }
4732+
4733+ out :
4734+ return ret ;
4735+ }
4736+
45974737int TSK_WARN_UNUSED
45984738tsk_tree_clear (tsk_tree_t * self )
45994739{
0 commit comments