Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/species.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7414,7 +7414,7 @@ void Species::WriteTreeSequence(std::string &p_recording_tree_path, bool p_simpl
// Add in the mutation.parent information; valid tree sequences need parents, but we don't keep them while running
ret = tsk_table_collection_build_index(&output_tables, 0);
if (ret < 0) handle_error("tsk_table_collection_build_index", ret);
ret = tsk_table_collection_compute_mutation_parents(&output_tables, 0);
ret = tsk_table_collection_compute_mutation_parents(&output_tables, TSK_NO_CHECK_INTEGRITY);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the motivation for this change? Does this simply preserve the pre-existing behavior (because tskit's default behavior changed), or will this change SLiM's behavior in some way?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, from the CHANGELOG to 1.2.0:

Add the TSK_NO_CHECK_INTEGRITY option to tsk_table_collection_compute_mutation_parents to skip the integrity checks that are normally run when computing mutation parents. This is useful for speeding up the computation of mutation parents when the tree sequence is certainly known to be valid. (:user:benjeffery, :pr:3212).

So - this removes the "check integrity" check from this function, so in principle we might be less likely to catch an error; however, we have just checked integrity on the previous line in tsk_table_collection_build_index, so it's unnecessary.

if (ret < 0) handle_error("tsk_table_collection_compute_mutation_parents", ret);

{
Expand Down Expand Up @@ -7801,7 +7801,7 @@ void Species::CrosscheckTreeSeqIntegrity(void)
ret = tsk_table_collection_build_index(tables_copy, 0);
if (ret < 0) handle_error("tsk_table_collection_build_index", ret);

ret = tsk_table_collection_compute_mutation_parents(tables_copy, 0);
ret = tsk_table_collection_compute_mutation_parents(tables_copy, TSK_NO_CHECK_INTEGRITY);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question as above.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

if (ret < 0) handle_error("tsk_table_collection_compute_mutation_parents", ret);

}
Expand Down
2 changes: 2 additions & 0 deletions treerec/_README
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ The code in `tskit/kastore` is from [kastore/c](https://github.com/tskit-dev/kas

**Changelog:**

- *30 November 2025*: updated to tskit 1.0.0
- *24 September 2025*: updated to tskit C_1.2.0 (=1.0.0b2) (still kastore C_2.1.1)
- *15 April 2025*: updated to tskit C_1.1.4 (=0.6.2) (still kastore C_2.1.1)
- *7 November 2023*: updated to tskit C_1.1.2 (=0.5.5) (still kastore C_2.1.1)
- *2 June 2022*: updated to tskit C_1.0.0 and kastore C_2.1.1
Expand Down
140 changes: 87 additions & 53 deletions treerec/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ tsk_strerror_internal(int err)
break;
case TSK_ERR_FILE_VERSION_TOO_OLD:
ret = "tskit file version too old. Please upgrade using the "
"'tskit upgrade' command. (TSK_ERR_FILE_VERSION_TOO_OLD)";
"'tskit upgrade' command from tskit version<0.6.2. "
"(TSK_ERR_FILE_VERSION_TOO_OLD)";
break;
case TSK_ERR_FILE_VERSION_TOO_NEW:
ret = "tskit file version is too new for this instance. "
Expand Down Expand Up @@ -346,6 +347,12 @@ tsk_strerror_internal(int err)
"(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME)";
break;

case TSK_ERR_BAD_MUTATION_PARENT:
ret = "A mutation's parent is not consistent with the topology of the tree. "
"Use compute_mutation_parents to set the parents correctly."
"(TSK_ERR_BAD_MUTATION_PARENT)";
break;

/* Migration errors */
case TSK_ERR_UNSORTED_MIGRATIONS:
ret = "Migrations must be sorted by time. (TSK_ERR_UNSORTED_MIGRATIONS)";
Expand Down Expand Up @@ -519,9 +526,13 @@ tsk_strerror_internal(int err)
"(TSK_ERR_BAD_SAMPLE_PAIR_TIMES)";
break;
case TSK_ERR_BAD_TIME_WINDOWS:
ret = "Time windows must be strictly increasing and end at infinity. "
ret = "Time windows must start at zero and be strictly increasing. "
"(TSK_ERR_BAD_TIME_WINDOWS)";
break;
case TSK_ERR_BAD_TIME_WINDOWS_END:
ret = "Time windows must end at infinity for this method. "
"(TSK_ERR_BAD_TIME_WINDOWS_END)";
break;
case TSK_ERR_BAD_NODE_TIME_WINDOW:
ret = "Node time does not fall within assigned time window. "
"(TSK_ERR_BAD_NODE_TIME_WINDOW)";
Expand Down Expand Up @@ -573,6 +584,14 @@ tsk_strerror_internal(int err)
ret = "Must have at least one allele when specifying an allele map. "
"(TSK_ERR_ZERO_ALLELES)";
break;
case TSK_ERR_BAD_ALLELE_LENGTH:
ret = "Alleles used when decoding alignments must have length one. "
"(TSK_ERR_BAD_ALLELE_LENGTH)";
break;
case TSK_ERR_MISSING_CHAR_COLLISION:
ret = "Alleles used when decoding alignments must not match the missing "
"data character. (TSK_ERR_MISSING_CHAR_COLLISION)";
break;

/* Distance metric errors */
case TSK_ERR_SAMPLE_SIZE_MISMATCH:
Expand Down Expand Up @@ -1249,16 +1268,16 @@ tsk_avl_tree_int_ordered_nodes(const tsk_avl_tree_int_t *self, tsk_avl_node_int_
}

// Bit Array implementation. Allows us to store unsigned integers in a compact manner.
// Currently implemented as an array of 32-bit unsigned integers for ease of counting.
// Currently implemented as an array of 32-bit unsigned integers.

int
tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length)
tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length)
{
int ret = 0;

self->size = (num_bits >> TSK_BIT_ARRAY_CHUNK)
+ (num_bits % TSK_BIT_ARRAY_NUM_BITS ? 1 : 0);
self->data = tsk_calloc(self->size * length, sizeof(*self->data));
self->row_len = (num_bits / TSK_BITSET_BITS) + (num_bits % TSK_BITSET_BITS ? 1 : 0);
self->len = length;
self->data = tsk_calloc(self->row_len * length, sizeof(*self->data));
if (self->data == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
Expand All @@ -1267,96 +1286,111 @@ tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length
return ret;
}

void
tsk_bit_array_get_row(const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out)
{
out->size = self->size;
out->data = self->data + (row * self->size);
}
#define BITSET_DATA_ROW(bs, row) ((bs)->data + (row) * (bs)->row_len)

void
tsk_bit_array_intersect(
const tsk_bit_array_t *self, const tsk_bit_array_t *other, tsk_bit_array_t *out)
tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out)
{
for (tsk_size_t i = 0; i < self->size; i++) {
out->data[i] = self->data[i] & other->data[i];
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
tsk_bitset_val_t *restrict out_d = out->data;
for (tsk_size_t i = 0; i < self->row_len; i++) {
out_d[i] = self_d[i] & other_d[i];
}
}

void
tsk_bit_array_subtract(tsk_bit_array_t *self, const tsk_bit_array_t *other)
tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row)
{
for (tsk_size_t i = 0; i < self->size; i++) {
self->data[i] &= ~(other->data[i]);
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
for (tsk_size_t i = 0; i < self->row_len; i++) {
self_d[i] &= ~(other_d[i]);
}
}

void
tsk_bit_array_add(tsk_bit_array_t *self, const tsk_bit_array_t *other)
tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row)
{
for (tsk_size_t i = 0; i < self->size; i++) {
self->data[i] |= other->data[i];
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
for (tsk_size_t i = 0; i < self->row_len; i++) {
self_d[i] |= other_d[i];
}
}

void
tsk_bit_array_add_bit(tsk_bit_array_t *self, const tsk_bit_array_value_t bit)
tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
{
tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK;
self->data[i] |= (tsk_bit_array_value_t) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i));
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
*(BITSET_DATA_ROW(self, row) + i) |= (tsk_bitset_val_t) 1
<< (bit - (TSK_BITSET_BITS * i));
}

bool
tsk_bit_array_contains(const tsk_bit_array_t *self, const tsk_bit_array_value_t bit)
tsk_bitset_contains(const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
{
tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK;
return self->data[i]
& ((tsk_bit_array_value_t) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i)));
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
return *(BITSET_DATA_ROW(self, row) + i)
& ((tsk_bitset_val_t) 1 << (bit - (TSK_BITSET_BITS * i)));
}

tsk_size_t
tsk_bit_array_count(const tsk_bit_array_t *self)
static inline uint32_t
popcount(tsk_bitset_val_t v)
{
// Utilizes 12 operations per bit array. NB this only works on 32 bit integers.
// Utilizes 12 operations per chunk. NB this only works on 32 bit integers.
// Taken from:
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
// There's a nice breakdown of this algorithm here:
// https://stackoverflow.com/a/109025
// Could probably do better with explicit SIMD (instead of SWAR), but not as
// portable: https://arxiv.org/pdf/1611.07612.pdf
//
// There is one solution to explore further, which uses __builtin_popcountll.
// This option is relatively simple, but requires a 64 bit bit array and also
// involves some compiler flag plumbing (-mpopcnt)
// The gcc/clang compiler flag will -mpopcnt will convert this code to a
// popcnt instruction (most if not all modern CPUs will support this). The
// popcnt instruction will yield some speed improvements, which depend on
// the tree sequence.
//
// NB: 32bit counting is typically faster than 64bit counting for this task.
// (at least on x86-64)

tsk_bit_array_value_t tmp;
tsk_size_t i, count = 0;
v = v - ((v >> 1) & 0x55555555);
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
}

tsk_size_t
tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row)
{
tsk_size_t i = 0;
tsk_size_t count = 0;
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);

for (i = 0; i < self->size; i++) {
tmp = self->data[i] - ((self->data[i] >> 1) & 0x55555555);
tmp = (tmp & 0x33333333) + ((tmp >> 2) & 0x33333333);
count += (((tmp + (tmp >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
for (i = 0; i < self->row_len; i++) {
count += popcount(self_d[i]);
}
return count;
}

void
tsk_bit_array_get_items(
const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items)
tsk_bitset_get_items(
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items)
{
// Get the items stored in the row of a bitset.
// Uses a de Bruijn sequence lookup table to determine the lowest bit set. See the
// wikipedia article for more info: https://w.wiki/BYiF
// Uses a de Bruijn sequence lookup table to determine the lowest bit set.
// See the wikipedia article for more info: https://w.wiki/BYiF

tsk_size_t i, n, off;
tsk_bit_array_value_t v, lsb; // least significant bit
tsk_bitset_val_t v, lsb; // least significant bit
static const tsk_id_t lookup[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25,
17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);

n = 0;
for (i = 0; i < self->size; i++) {
v = self->data[i];
off = i * ((tsk_size_t) TSK_BIT_ARRAY_NUM_BITS);
for (i = 0; i < self->row_len; i++) {
v = self_d[i];
off = i * TSK_BITSET_BITS;
if (v == 0) {
continue;
}
Expand All @@ -1370,7 +1404,7 @@ tsk_bit_array_get_items(
}

void
tsk_bit_array_free(tsk_bit_array_t *self)
tsk_bitset_free(tsk_bitset_t *self)
{
tsk_safe_free(self->data);
}
74 changes: 47 additions & 27 deletions treerec/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,12 @@ sizes and types of externally visible structs.
The library minor version. Incremented when non-breaking backward-compatible changes
to the API or ABI are introduced, i.e., the addition of a new function.
*/
#define TSK_VERSION_MINOR 1
#define TSK_VERSION_MINOR 3
/**
The library patch version. Incremented when any changes not relevant to the
to the API or ABI are introduced, i.e., internal refactors of bugfixes.
*/
#define TSK_VERSION_PATCH 4
#define TSK_VERSION_PATCH 0
/** @} */

/*
Expand Down Expand Up @@ -283,7 +283,7 @@ A file could not be read because it is in the wrong format
/**
The file is in tskit format, but the version is too old for the
library to read. The file should be upgraded to the latest version
using the ``tskit upgrade`` command line utility.
using the ``tskit upgrade`` command line utility from tskit version<0.6.2.
*/
#define TSK_ERR_FILE_VERSION_TOO_OLD -101
/**
Expand Down Expand Up @@ -510,6 +510,12 @@ Some mutations have TSK_UNKNOWN_TIME in an algorithm where that's
disallowed (use compute_mutation_times?).
*/
#define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME -510

/**
A mutation's parent was not consistent with the topology of the tree.
*/
#define TSK_ERR_BAD_MUTATION_PARENT -511

/** @} */

/**
Expand Down Expand Up @@ -740,13 +746,17 @@ Sample times do not all equal the start of first time window
*/
#define TSK_ERR_BAD_SAMPLE_PAIR_TIMES -923
/**
Time windows are not strictly increasing ending at infinity
Time windows are not strictly increasing
*/
#define TSK_ERR_BAD_TIME_WINDOWS -924
/**
Time windows do not end at infinity
*/
#define TSK_ERR_BAD_TIME_WINDOWS_END -925
/**
Node time does not fall within assigned time window
*/
#define TSK_ERR_BAD_NODE_TIME_WINDOW -925
#define TSK_ERR_BAD_NODE_TIME_WINDOW -926
/** @} */

/**
Expand Down Expand Up @@ -793,6 +803,14 @@ More than 2147483647 alleles were specified.
A user-specified allele map was used, but it contained zero alleles.
*/
#define TSK_ERR_ZERO_ALLELES -1103
/**
An allele used when decoding alignments had length other than one.
*/
#define TSK_ERR_BAD_ALLELE_LENGTH -1104
/**
An allele used when decoding alignments matched the missing data character.
*/
#define TSK_ERR_MISSING_CHAR_COLLISION -1105
/** @} */

/**
Expand Down Expand Up @@ -1094,29 +1112,31 @@ FILE *tsk_get_debug_stream(void);

/* Bit Array functionality */

typedef uint32_t tsk_bit_array_value_t;
// define a 32-bit chunk size for our bitsets.
// this means we'll be able to hold 32 distinct items in each 32 bit uint
#define TSK_BITSET_BITS ((tsk_size_t) 32)
typedef uint32_t tsk_bitset_val_t;

typedef struct {
tsk_size_t size; // Number of chunks per row
tsk_bit_array_value_t *data; // Array data
} tsk_bit_array_t;

#define TSK_BIT_ARRAY_CHUNK 5U
#define TSK_BIT_ARRAY_NUM_BITS (1U << TSK_BIT_ARRAY_CHUNK)

int tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length);
void tsk_bit_array_free(tsk_bit_array_t *self);
void tsk_bit_array_get_row(
const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out);
void tsk_bit_array_intersect(
const tsk_bit_array_t *self, const tsk_bit_array_t *other, tsk_bit_array_t *out);
void tsk_bit_array_subtract(tsk_bit_array_t *self, const tsk_bit_array_t *other);
void tsk_bit_array_add(tsk_bit_array_t *self, const tsk_bit_array_t *other);
void tsk_bit_array_add_bit(tsk_bit_array_t *self, const tsk_bit_array_value_t bit);
bool tsk_bit_array_contains(
const tsk_bit_array_t *self, const tsk_bit_array_value_t bit);
tsk_size_t tsk_bit_array_count(const tsk_bit_array_t *self);
void tsk_bit_array_get_items(
const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items);
tsk_size_t row_len; // Number of size TSK_BITSET_BITS chunks per row
tsk_size_t len; // Number of rows
tsk_bitset_val_t *data;
} tsk_bitset_t;

int tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length);
void tsk_bitset_free(tsk_bitset_t *self);
void tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out);
void tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row);
void tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row);
void tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
bool tsk_bitset_contains(
const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
tsk_size_t tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row);
void tsk_bitset_get_items(
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items);

#ifdef __cplusplus
}
Expand Down
Loading
Loading