Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 53 additions & 22 deletions c/tests/test_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
tsk_id_t index_tuples[2 * n * n];
double D1[n * n], D2[n * n];
tsk_size_t i, j, k;
tsk_flags_t lib_options = options;

for (j = 0; j < n; j++) {
sample_set_sizes[j] = 1;
Expand All @@ -284,8 +285,13 @@ verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
index_tuples[2 * (j * n + k) + 1] = (tsk_id_t) k;
}
}
/* Until we have mutation mode for divergence: */
if (!!(lib_options & TSK_STAT_MUTATION)) {
lib_options &= (tsk_flags_t) ~TSK_STAT_MUTATION;
lib_options |= (tsk_flags_t) TSK_STAT_SITE;
}
ret = tsk_treeseq_divergence(
ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, options, D1);
ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, lib_options, D1);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_divergence_matrix(
Expand Down Expand Up @@ -1315,6 +1321,12 @@ test_general_stat_input_errors(void)
&ts, 1, &W, 0, general_stat_sum, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_RESULT_DIMS);

/* Unsupported mode */
/* TODO: change when STAT_MUTATION is supported */
ret = tsk_treeseq_general_stat(
&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL, TSK_STAT_MUTATION, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

/* Multiple stats*/
ret = tsk_treeseq_general_stat(&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL,
TSK_STAT_SITE | TSK_STAT_BRANCH, &result);
Expand Down Expand Up @@ -1464,18 +1476,26 @@ test_single_tree_divergence_matrix(void)
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);

ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_NODE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D_branch);

ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D_site);

ret = tsk_treeseq_divergence_matrix(
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_divergence_matrix(
Expand All @@ -1488,15 +1508,15 @@ test_single_tree_divergence_matrix(void)
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* assert_arrays_almost_equal(4, result, D_site); */

verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);

tsk_treeseq_free(&ts);
}
Expand Down Expand Up @@ -1542,7 +1562,7 @@ test_single_tree_divergence_matrix_internal_samples(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);

Expand All @@ -1551,7 +1571,7 @@ test_single_tree_divergence_matrix_internal_samples(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
ret = tsk_treeseq_divergence_matrix(
&ts, 4, sizes, samples, 0, NULL, TSK_STAT_SITE, result);
&ts, 4, sizes, samples, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);

Expand All @@ -1560,14 +1580,14 @@ test_single_tree_divergence_matrix_internal_samples(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
ret = tsk_treeseq_divergence_matrix(
&ts, 4, NULL, samples, 0, NULL, TSK_STAT_SITE, result);
&ts, 4, NULL, samples, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);

verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);

tsk_treeseq_free(&ts);
free(result);
Expand Down Expand Up @@ -1616,8 +1636,8 @@ test_single_tree_divergence_matrix_multi_root(void)

verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);

tsk_treeseq_free(&ts);
}
Expand Down Expand Up @@ -2250,6 +2270,9 @@ test_paper_ex_genetic_relatedness_vector_errors(void)
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
num_samples, ts.samples, result, TSK_STAT_NODE);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
num_samples, ts.samples, result, TSK_STAT_MUTATION);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

tsk_treeseq_free(&ts);
free(weights);
Expand Down Expand Up @@ -2503,6 +2526,10 @@ test_paper_ex_afs_errors(void)
&ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_NODE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

ret = tsk_treeseq_allele_frequency_spectrum(
&ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

ret = tsk_treeseq_allele_frequency_spectrum(&ts, 2, sample_set_sizes, samples, 0,
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
Expand Down Expand Up @@ -2557,8 +2584,8 @@ test_paper_ex_divergence_matrix(void)

verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);

tsk_treeseq_free(&ts);
}
Expand Down Expand Up @@ -3790,6 +3817,10 @@ test_two_locus_stat_input_errors(void)
positions, 10, NULL, positions, TSK_STAT_NODE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,
positions, 10, NULL, positions, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

num_sample_sets = 2;
num_index_tuples = 0;
ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
Expand Down Expand Up @@ -3856,7 +3887,7 @@ test_simplest_divergence_matrix(void)
assert_arrays_almost_equal(4, D_site, result);

ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_site, result);

Expand All @@ -3866,7 +3897,7 @@ test_simplest_divergence_matrix(void)
assert_arrays_almost_equal(4, D_branch, result);

ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_site, result);

Expand All @@ -3879,7 +3910,7 @@ test_simplest_divergence_matrix(void)
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_POLARISED_UNSUPPORTED);

ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE | TSK_STAT_BRANCH, result);
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_MUTATION | TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);

sample_ids[0] = -1;
Expand Down Expand Up @@ -3935,7 +3966,7 @@ test_simplest_divergence_matrix_windows(void)

/* Windows for the second half */
ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_SITE, result);
&ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_site, result);
ret = tsk_treeseq_divergence_matrix(
Expand Down Expand Up @@ -3985,7 +4016,7 @@ test_simplest_divergence_matrix_internal_sample(void)
assert_arrays_almost_equal(9, D_branch, result);

ret = tsk_treeseq_divergence_matrix(
&ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);
&ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_MUTATION, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(9, D_site, result);

Expand All @@ -4002,8 +4033,8 @@ test_multiroot_divergence_matrix(void)

verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);

tsk_treeseq_free(&ts);
}
Expand Down
42 changes: 24 additions & 18 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -2041,13 +2041,18 @@ tsk_treeseq_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_branch = !!(options & TSK_STAT_BRANCH);
bool stat_node = !!(options & TSK_STAT_NODE);
bool stat_mutation = !!(options & TSK_STAT_MUTATION);
double default_windows[] = { 0, self->tables->sequence_length };
tsk_size_t row_size;

/* If no mode is specified, we default to site mode */
if (!(stat_site || stat_branch || stat_node)) {
if (!(stat_site || stat_branch || stat_node || stat_mutation)) {
stat_site = true;
}
if (stat_mutation) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
/* It's an error to specify more than one mode */
if (stat_site + stat_branch + stat_node > 1) {
ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);
Expand Down Expand Up @@ -3402,8 +3407,8 @@ tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sampl
.sample_set_sizes = sample_set_sizes,
.set_indexes = set_indexes };

// We do not support two-locus node stats
if (!!(options & TSK_STAT_NODE)) {
// We do not support two-locus node or mutation stats
if (!!(options & (TSK_STAT_NODE | TSK_STAT_MUTATION))) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
Expand Down Expand Up @@ -3821,7 +3826,6 @@ tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,
int ret = 0;
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_branch = !!(options & TSK_STAT_BRANCH);
bool stat_node = !!(options & TSK_STAT_NODE);
const double default_windows[] = { 0, self->tables->sequence_length };
const double default_time_windows[] = { 0, INFINITY };
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
Expand All @@ -3833,7 +3837,7 @@ tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,
* reuse code from the general_stats code paths. */
double *counts = NULL;
double *count_row;
if (stat_node) {
if (!!(options & (TSK_STAT_NODE | TSK_STAT_MUTATION))) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
Expand Down Expand Up @@ -8751,11 +8755,11 @@ remap_to_sample_sets(const tsk_size_t num_samples, const tsk_id_t *restrict samp
}

static int
tsk_treeseq_divergence_matrix_site(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_id_t *restrict sample_set_index_map, const tsk_size_t num_samples,
const tsk_id_t *restrict samples, tsk_size_t num_windows,
const double *restrict windows, tsk_flags_t TSK_UNUSED(options),
double *restrict result)
tsk_treeseq_divergence_matrix_mutation(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_id_t *restrict sample_set_index_map,
const tsk_size_t num_samples, const tsk_id_t *restrict samples,
tsk_size_t num_windows, const double *restrict windows,
tsk_flags_t TSK_UNUSED(options), double *restrict result)
{
int ret = 0;
tsk_size_t i;
Expand Down Expand Up @@ -8913,20 +8917,21 @@ tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_sample_s
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_branch = !!(options & TSK_STAT_BRANCH);
bool stat_node = !!(options & TSK_STAT_NODE);
bool stat_mutation = !!(options & TSK_STAT_MUTATION);
tsk_id_t *sample_set_index_map
= tsk_malloc(num_nodes * sizeof(*sample_set_index_map));
tsk_size_t j;

if (stat_node) {
if (stat_node || stat_site) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
/* If no mode is specified, we default to site mode */
if (!(stat_site || stat_branch)) {
stat_site = true;
/* If no mode is specified, we default to mutation mode */
if (!(stat_mutation || stat_branch)) {
stat_mutation = true;
}
/* It's an error to specify more than one mode */
if (stat_site + stat_branch > 1) {
if (stat_mutation + stat_branch > 1) {
ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);
goto out;
}
Expand Down Expand Up @@ -8982,8 +8987,8 @@ tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_sample_s
ret = tsk_treeseq_divergence_matrix_branch(self, N, sample_set_sizes,
sample_sets, num_windows, windows, options, result);
} else {
tsk_bug_assert(stat_site);
ret = tsk_treeseq_divergence_matrix_site(self, N, sample_set_index_map,
tsk_bug_assert(stat_mutation);
ret = tsk_treeseq_divergence_matrix_mutation(self, N, sample_set_index_map,
total_samples, sample_sets, num_windows, windows, options, result);
}
if (ret != 0) {
Expand Down Expand Up @@ -10788,11 +10793,12 @@ tsk_treeseq_genetic_relatedness_vector(const tsk_treeseq_t *self, tsk_size_t num
int ret = 0;
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_node = !!(options & TSK_STAT_NODE);
bool stat_mutation = !!(options & TSK_STAT_MUTATION);
tsk_matvec_calculator_t calc;

memset(&calc, 0, sizeof(calc));

if (stat_node || stat_site) {
if (stat_node || stat_site || stat_mutation) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
Expand Down
1 change: 1 addition & 0 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ extern "C" {
#define TSK_STAT_SITE (1 << 0)
#define TSK_STAT_BRANCH (1 << 1)
#define TSK_STAT_NODE (1 << 2)
#define TSK_STAT_MUTATION (1 << 3)

/* Leave room for other stat types */
#define TSK_STAT_POLARISED (1 << 10)
Expand Down
1 change: 1 addition & 0 deletions docs/python-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ Single site
TreeSequence.genetic_relatedness_weighted
TreeSequence.genetic_relatedness_vector
TreeSequence.genetic_relatedness_matrix
TreeSequence.divergence_matrix
TreeSequence.general_stat
TreeSequence.segregating_sites
TreeSequence.sample_count_stat
Expand Down
Loading