Skip to content

Long compile time #85

@WhateverLiu

Description

@WhateverLiu

Is there a way to reduce the compile time when using Parlay? I know it's a template-heavy library, and there's no simple solution, but I'm curious if there's anything I might be missing. In larger projects, the long compile times can make iterations quite painful. For instance, even a simple code snippet of less than 150 lines takes 25 seconds to compile:

#include <iostream>
// #include "../code/cpp/infra/infra.hpp"
#include "parlay/primitives.hpp"
#define vec parlay::sequence

template <bool checked = false>
double computeJS (double p, double q) {
  if constexpr (!checked) {
    p = std::max(0.0, p);
    q = std::max(0.0, q);
  }
  double inv_half = 2.0 / (p + q);
  double left  = p == 0 ? 0 : p * std::log2(inv_half * p);
  double right = q == 0 ? 0 : q * std::log2(inv_half * q);
  return (left + right) * 0.5;
} 

template <typename Float = double>
struct Bin {
  Float p, dp, js, q;
  Bin() { p = dp = js = q = 0; }
  Bin(Float p_, Float dp_, Float q_) {
    p = p_; dp = dp_; q = q_;
    js = computeJS(p, q);
  } 
};

template <typename Float = double>
auto makeHists(auto && zbYear, auto && zbDim, 
               auto && gbid, 
               //auto && freq, 
               auto && subcandYears, // A vector of vectors
               int NsubYear,
               auto && targetYears,
               auto && dimw)
{ 
  auto inv_w = 1.0 / std::accumulate(dimw.begin(), dimw.end(), 0.0);
  for (auto && u: dimw) u *= inv_w;
  auto nyear = *parlay::max_element(zbYear) + 1;
  auto ndim = *parlay::max_element(zbDim) + 1;
  auto ngbid = *parlay::max_element(gbid) + 1;
  auto dimTotalFreq = parlay::reduce_by_index(parlay::depzip(
    parlay::slice(zbDim.begin(), zbDim.end()),
    parlay::delayed_tabulate(zbDim.size(), [](auto i)->unsigned { return 1; })),
    ndim);
  
  
  double targetScaleRatio = nyear / double(targetYears.size());
  double subScaleRatio =  nyear / double(NsubYear);
  auto rst = parlay::tabulate(subcandYears.size(), [ngbid](auto i)->auto { 
    return vec<Bin<Float>> (ngbid); });
  auto & rst0 = rst.front();
  
  
  auto whichIn = [](auto && x, auto && y, unsigned n)->auto {
    vec<bool> indi(n, false);
    parlay::for_each(y, [&](auto && u)->void { indi[u] = true; });
    return parlay::filter(parlay::iota(unsigned(x.size())), 
                   [&](auto i)->bool {return indi[x[i]]; });
  };
  auto targetYearsInd = whichIn(zbYear, targetYears, nyear);
  
  
  parlay::for_each(targetYearsInd, [&](auto i)->void {
    auto & b = rst0[gbid[i]];
    auto tfreq = dimTotalFreq[zbDim[i]];
    auto q = 1 * targetScaleRatio / tfreq * dimw[zbDim[i]];
    auto ptr = (std::atomic<Float>*)(&b.q);
    ptr->fetch_add(q);
  });
  
  auto subYearsInd = whichIn (zbYear, parlay::slice(
    subcandYears.front().begin(), subcandYears.front().end()), nyear);
  
  parlay::for_each(subYearsInd, [&](auto i)->void {
    auto & b = rst0[gbid[i]];
    auto ptr = (std::atomic<Float>*)(&b.p);
    auto tfreq = dimTotalFreq[zbDim[i]];
    auto p = 1 * subScaleRatio / tfreq * dimw[zbDim[i]];
    ptr->fetch_add(p);  
  }); 
  parlay::parFor(0, zbYear.size(), [&](size_t i)->void {
    auto & b = rst0[gbid[i]];
    auto tfreq = dimTotalFreq[zbDim[i]];
    b.dp = 1 * subScaleRatio / tfreq * dimw[zbDim[i]];
  }); 
  
  
  parlay::parFor(1, rst.size(), [&](int t)->void {
    auto & v = rst[t];
    auto subcat = parlay::slice(
      subcandYears[t].begin(), subcandYears[t].begin() + NsubYear);
    vec<bool> indi(nyear, false);
    for (auto && u: subcat) indi[u] = true;
    for (size_t i = 0, iend = zbYear.size(); i < iend; ++i) {
      if (!indi[zbYear[i]]) continue;
      auto & b = v[gbid[i]];
      auto tfreq = dimTotalFreq[zbDim[i]];
      b.p += 1 * subScaleRatio / tfreq * dimw[zbDim[i]];
    } 
    for (size_t i = 0, iend = rst0.size(); i < iend; ++i) {
      v[i].q = rst0[i].q;
      v[i].dp = rst0[i].dp;
    } 
  });
  
  
  parlay::for_each(rst, [&](auto & x)->void {
    for (auto & u: x) u.js = computeJS (u.p, u.q);
  }); 
  
  
  auto yearImpact = parlay::group_by_index(
    parlay::depzip(parlay::slice(zbYear.begin(), zbYear.end()), 
                  parlay::slice( gbid.begin(), gbid.end())), nyear); 
  return std::pair(std::move(yearImpact), std::move(rst));
}  

int main() {
  unsigned n = 1e6;
  auto zbYear = parlay::delayed_tabulate(n, [n](auto i)->auto {
    return unsigned(rand() % n);
  });
  auto rst = makeHists(
    parlay::delayed_tabulate(n, [n](auto i)->auto {return unsigned(rand() % n);}),
    parlay::delayed_tabulate(n, [n](auto i)->auto {return unsigned(rand() % n);}),
    parlay::delayed_tabulate(n, [n](auto i)->auto {return unsigned(rand() % n);}),
    parlay::delayed_tabulate(n, [n](auto i)->auto {
      return parlay::detab(1000, [n](auto i)->auto {
        return unsigned(rand() % 100);});
    }), 10000,
    parlay::delayed_tabulate(n, [n](auto i)->auto {return unsigned(rand() % n);}),
    parlay::delayed_tabulate(n, [n](auto i)->auto { return 0.1; }));
  std::cout << rst.first[0][0] << ", " << rst.second[0].size() << "\n";
}

#undef vec

The compile command is g++ -std=c++20 tempFiles/longcomptime.cpp -ftree-vectorize -march=native -O2 -pthread -o tempFiles/longcomptime.

Any suggestion would be greatly appreciated!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions