Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ name: CI
on:
push:
branches:
- master
- "*"
pull_request:
branches:
- master
- "*"

jobs:
build:
Expand Down Expand Up @@ -42,7 +42,7 @@ jobs:
uses: uraimo/run-on-arch-action@v2
with:
arch: aarch64
distro: ubuntu20.04
distro: ubuntu22.04
githubToken: ${{ github.token }}
dockerRunArgs: |
--volume "${PWD}:/beagle-lib"
Expand Down
19 changes: 14 additions & 5 deletions examples/hmctest/hmctest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ int main( int argc, const char* argv[] )
int nPatterns = strlen(human) * nRepeats;

// change # rate category to 2
// int rateCategoryCount = 4;
int rateCategoryCount = 1;
int rateCategoryCount = 2;
// int rateCategoryCount = 1;

int scaleCount = (scaling ? 7 : 0);

Expand Down Expand Up @@ -207,6 +207,11 @@ int main( int argc, const char* argv[] )
requirementFlags |= BEAGLE_FLAG_VECTOR_NONE;
}

int threadCount = 4;
if (threadCount > 1) {
requirementFlags |= BEAGLE_FLAG_THREADING_CPP;
}

// create an instance of the BEAGLE library
int instance = beagleCreateInstance(
3, /**< Number of tip data elements (input) */
Expand All @@ -228,6 +233,10 @@ int main( int argc, const char* argv[] )
exit(1);
}

if (threadCount > 1) {
beagleSetCPUThreadCount(instance, threadCount);
}


int rNumber = instDetails.resourceNumber;
fprintf(stdout, "Using resource %i:\n", rNumber);
Expand Down Expand Up @@ -275,10 +284,10 @@ int main( int argc, const char* argv[] )
//// rates[i] = 3.0 * (i + 1) / (2 * rateCategoryCount + 1);
// }

// rates[0] = 0.14251623900062188;
// rates[1] = 1.857483760999378;
rates[0] = 0.14251623900062188;
rates[1] = 1.857483760999378;

rates[0] = 1.0;
// rates[0] = 1.0;

beagleSetCategoryRates(instance, &rates[0]);

Expand Down
36 changes: 19 additions & 17 deletions libhmsbeagle/CPU/BeagleCPU4StateImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,25 +111,27 @@ class BeagleCPU4StateImpl : public BeagleCPUImpl<BEAGLE_CPU_GENERIC> {
int endPattern);

virtual void calcEdgeLogDerivativesStates(const int *tipStates,
const REALTYPE *preOrderPartial,
const int firstDerivativeIndex,
const int secondDerivativeIndex,
const double *categoryRates,
const REALTYPE *categoryWeights,
double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives);
const REALTYPE *preOrderPartial,
const int firstDerivativeIndex,
const int secondDerivativeIndex,
const double *categoryRates,
const REALTYPE *categoryWeights,
double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives,
int cacheOffset);

virtual void calcEdgeLogDerivativesPartials(const REALTYPE *postOrderPartial,
const REALTYPE *preOrderPartial,
const int firstDerivativeIndex,
const int secondDerivativeIndex,
const double *categoryRates,
const REALTYPE *categoryWeights,
const int scalingFactorsIndex,
double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives);
const REALTYPE *preOrderPartial,
const int firstDerivativeIndex,
const int secondDerivativeIndex,
const double *categoryRates,
const REALTYPE *categoryWeights,
const int scalingFactorsIndex,
double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives,
int cacheOffset);

virtual void calcCrossProductsStates(const int *tipStates,
const REALTYPE *preOrderPartial,
Expand Down
14 changes: 8 additions & 6 deletions libhmsbeagle/CPU/BeagleCPU4StateImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,8 @@ void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcEdgeLogDerivativesStates(const
const REALTYPE *categoryWeights,
double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives) {
double *outSumSquaredDerivatives,
int cacheOffset) {

for (int category = 0; category < kCategoryCount; category++) {

Expand All @@ -1031,8 +1032,8 @@ void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcEdgeLogDerivativesStates(const

REALTYPE denominator = preOrderPartial[localPatternOffset + (state & 3)];

grandNumeratorDerivTmp[pattern] += categoryWeights[category] * numerator;
grandDenominatorDerivTmp[pattern] += categoryWeights[category] * denominator;
grandNumeratorDerivTmp[cacheOffset + pattern] += categoryWeights[category] * numerator;
grandDenominatorDerivTmp[cacheOffset + pattern] += categoryWeights[category] * denominator;
}
}
}
Expand All @@ -1047,7 +1048,8 @@ void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcEdgeLogDerivativesPartials(con
const int scalingFactorsIndex,
double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives) {
double *outSumSquaredDerivatives,
int cacheOffset) {

const REALTYPE* transMatrix = gTransitionMatrices[firstDerivativeIndex];

Expand All @@ -1073,8 +1075,8 @@ void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcEdgeLogDerivativesPartials(con
// grandDenominatorDerivTmp[k] += (postPartials[v] * prePartials[v] + postPartials[v + 1] * prePartials[v + 1]
// + postPartials[v + 2] * prePartials[v + 2] + postPartials[v + 3] * prePartials[v + 3]) * weight;

grandDenominatorDerivTmp[k] += (p10 * p00 + p11 * p01 + p12 * p02 + p13 * p03) * weight;
grandNumeratorDerivTmp[k] += (sum10 * p00 + sum11 * p01 + sum12 * p02 + sum13 * p03) * weight;
grandDenominatorDerivTmp[cacheOffset + k] += (p10 * p00 + p11 * p01 + p12 * p02 + p13 * p03) * weight;
grandNumeratorDerivTmp[cacheOffset + k] += (sum10 * p00 + sum11 * p01 + sum12 * p02 + sum13 * p03) * weight;

v += 4;
}
Expand Down
31 changes: 17 additions & 14 deletions libhmsbeagle/CPU/BeagleCPU4StateSSEImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,26 +165,27 @@ class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE> : public BeagleCPU4StateIm
protected:
virtual int getPaddedPatternsModulus();

virtual void accumulateDerivatives(double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives);
virtual void accumulateDerivatives(double *outDerivatives, double *outSumDerivatives,
double *outSumSquaredDerivatives, int offset);

private:

template <bool DoDerivatives>
void accumulateDerivativesDispatch1(double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives);
void
accumulateDerivativesDispatch1(double *outDerivatives, double *outSumDerivatives, double *outSumSquaredDerivatives,
int offset);

template <bool DoDerivatives, bool DoSum>
void accumulateDerivativesDispatch2(double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives);
void accumulateDerivativesDispatch2(double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives,
int offset);

template <bool DoDerivatives, bool DoSum, bool DoSumSquared>
void accumulateDerivativesImpl(double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives);
void accumulateDerivativesImpl(double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives,
int offset);

virtual void calcStatesStates(double* destP,
const int* states1,
Expand Down Expand Up @@ -260,7 +261,8 @@ class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE> : public BeagleCPU4StateIm
const int scalingFactorsIndex,
double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives);
double* outSumSquaredDerivatives,
int cacheOffset);

virtual void calcEdgeLogDerivativesStates(const int* tipStates,
const double *__restrict preOrderPartial,
Expand All @@ -270,7 +272,8 @@ class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE> : public BeagleCPU4StateIm
const double* __restrict categoryWeights,
double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives);
double *outSumSquaredDerivatives,
int cacheOffset);

virtual void calcPartialsPartialsFixedScaling(double* __restrict destP,
const double* __restrict child0Partials,
Expand Down
63 changes: 33 additions & 30 deletions libhmsbeagle/CPU/BeagleCPU4StateSSEImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,19 +639,19 @@ void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::calcCrossProductsStates(co
}

BEAGLE_CPU_4_SSE_TEMPLATE template <bool DoDerivatives, bool DoSum, bool DoSumSquared>
void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivativesImpl(
double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives) {
void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivativesImpl(double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives,
int offset) {

V_Real vSum = VEC_SETZERO();
V_Real vSumSquared = VEC_SETZERO();

int k = 0;
for (; k < kPatternCount - 1; k += 2) {

V_Real numerator = VEC_LOAD(grandNumeratorDerivTmp + k);
V_Real denominator = VEC_LOAD(grandDenominatorDerivTmp + k);
V_Real numerator = VEC_LOAD(grandNumeratorDerivTmp + k + offset);
V_Real denominator = VEC_LOAD(grandDenominatorDerivTmp + k + offset);
V_Real derivative = VEC_DIV(numerator, denominator);
V_Real patternWeight = VEC_LOAD(gPatternWeights + k);

Expand Down Expand Up @@ -681,7 +681,7 @@ void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivativesImpl(
}

for (; k < kPatternCount; ++k) {
double derivative = grandNumeratorDerivTmp[k] / grandDenominatorDerivTmp[k];
double derivative = grandNumeratorDerivTmp[k + offset] / grandDenominatorDerivTmp[k + offset];
if (DoDerivatives) {
outDerivatives[k] = derivative;
}
Expand All @@ -703,46 +703,47 @@ void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivativesImpl(
}

BEAGLE_CPU_4_SSE_TEMPLATE template <bool DoDerivatives, bool DoSum>
void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivativesDispatch2(
double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives) {
void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivativesDispatch2(double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives,
int offset) {

if (outSumSquaredDerivatives == NULL) {
accumulateDerivativesImpl<DoDerivatives, DoSum, false>(
outDerivatives, outSumDerivatives, outSumSquaredDerivatives);
outDerivatives, outSumDerivatives, outSumSquaredDerivatives, offset);
} else {
accumulateDerivativesImpl<DoDerivatives, DoSum, true>(
outDerivatives, outSumDerivatives, outSumSquaredDerivatives);
outDerivatives, outSumDerivatives, outSumSquaredDerivatives, offset);
}
}

BEAGLE_CPU_4_SSE_TEMPLATE template <bool DoDerivatives>
void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivativesDispatch1(
double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives) {
void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivativesDispatch1(double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives,
int offset) {

if (outSumDerivatives == NULL) {
accumulateDerivativesDispatch2<DoDerivatives, false>(
outDerivatives, outSumDerivatives, outSumSquaredDerivatives);
outDerivatives, outSumDerivatives, outSumSquaredDerivatives, offset);
} else {
accumulateDerivativesDispatch2<DoDerivatives, true>(
outDerivatives, outSumDerivatives, outSumSquaredDerivatives);
outDerivatives, outSumDerivatives, outSumSquaredDerivatives, offset);
}
}


BEAGLE_CPU_4_SSE_TEMPLATE
void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivatives(double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives) {
void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::accumulateDerivatives(double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives,
int offset) {
if (outDerivatives == NULL) {
accumulateDerivativesDispatch1<false>(
outDerivatives, outSumDerivatives, outSumSquaredDerivatives);
outDerivatives, outSumDerivatives, outSumSquaredDerivatives, offset);
} else {
accumulateDerivativesDispatch1<true>(
outDerivatives, outSumDerivatives, outSumSquaredDerivatives);
outDerivatives, outSumDerivatives, outSumSquaredDerivatives, offset);
}
}

Expand All @@ -756,7 +757,8 @@ void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::calcEdgeLogDerivativesPart
const int scalingFactorsIndex,
double* outDerivatives,
double* outSumDerivatives,
double* outSumSquaredDerivatives) {
double* outSumSquaredDerivatives,
int cacheOffset) {
double* cl_p = integrationTmp;
memset(cl_p, 0, (kPatternCount * kStateCount)*sizeof(double));

Expand Down Expand Up @@ -809,8 +811,8 @@ void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::calcEdgeLogDerivativesPart
double numer = _mm_cvtsd_f64(vnumer) * wt[l];
double denon = _mm_cvtsd_f64(vdenom) * wt[l];

grandNumeratorDerivTmp[k] += numer; // TODO Merge [numer, denom] into single SSE transactions
grandDenominatorDerivTmp[k] += denon;
grandNumeratorDerivTmp[cacheOffset + k] += numer; // TODO Merge [numer, denom] into single SSE transactions
grandDenominatorDerivTmp[cacheOffset + k] += denon;

v += 4;
}
Expand All @@ -831,7 +833,8 @@ void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::calcEdgeLogDerivativesStat
const double* categoryWeights,
double *outDerivatives,
double *outSumDerivatives,
double *outSumSquaredDerivatives) {
double *outSumSquaredDerivatives,
int cacheOffset) {
double* cl_p = integrationTmp;
memset(cl_p, 0, (kPatternCount * kStateCount)*sizeof(double));

Expand Down Expand Up @@ -866,8 +869,8 @@ void BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE>::calcEdgeLogDerivativesStat
double numer = _mm_cvtsd_f64(vnumer);
double denom = cl_r[stateChild & 3]; cl_r += 4;

grandNumeratorDerivTmp[k] += numer * wt[l];
grandDenominatorDerivTmp[k] += denom * wt[l];
grandNumeratorDerivTmp[cacheOffset + k] += numer * wt[l];
grandDenominatorDerivTmp[cacheOffset + k] += denom * wt[l];
}
w += OFFSET*4;
vcl_r += 2 * kExtraPatterns;
Expand Down
Loading