facebookresearch
diff --git a/‎tc/core/CMakeLists.txt
Lines changed: 0 additions & 1 deletion b/‎tc/core/CMakeLists.txt
Lines changed: 0 additions & 1 deletion
diff --git a/‎tc/core/polyhedral/cuda/mapped_scop.cc
Lines changed: 15 additions & 14 deletions b/‎tc/core/polyhedral/cuda/mapped_scop.cc
Lines changed: 15 additions & 14 deletions
diff --git a/‎tc/core/polyhedral/cuda/mapped_scop.h
Lines changed: 7 additions & 5 deletions b/‎tc/core/polyhedral/cuda/mapped_scop.h
Lines changed: 7 additions & 5 deletions
diff --git a/‎tc/core/polyhedral/cuda/memory_promotion_heuristic.cc
Lines changed: 30 additions & 20 deletions b/‎tc/core/polyhedral/cuda/memory_promotion_heuristic.cc
Lines changed: 30 additions & 20 deletions
diff --git a/‎tc/core/polyhedral/domain_types.h
Lines changed: 13 additions & 0 deletions b/‎tc/core/polyhedral/domain_types.h
Lines changed: 13 additions & 0 deletions
diff --git a/‎tc/core/polyhedral/memory_promotion.cc
Lines changed: 6 additions & 2 deletions b/‎tc/core/polyhedral/memory_promotion.cc
Lines changed: 6 additions & 2 deletions
diff --git a/‎tc/core/polyhedral/memory_promotion.h
Lines changed: 4 additions & 2 deletions b/‎tc/core/polyhedral/memory_promotion.h
Lines changed: 4 additions & 2 deletions
diff --git a/‎tc/core/polyhedral/reduction_matcher.cc
Lines changed: 5 additions & 3 deletions b/‎tc/core/polyhedral/reduction_matcher.cc
Lines changed: 5 additions & 3 deletions
@@ -30,7 +30,6 @@ add_library(
   polyhedral/schedule_tree_elem.cc
   polyhedral/schedule_print.cc
   polyhedral/scop.cc
-  polyhedral/separation.cc
   polyhedral/unroll.cc
 )
 target_include_directories(tc_core PUBLIC ${LLVM_INCLUDE_DIRS})
 
@@ -199,7 +199,7 @@ void fixThreadsBelow(
 bool separatedOut(
     Scop& scop,
     detail::ScheduleTree* tree,
-    isl::union_set updates) {
+    isl::UnionSet<Domain> updates) {
   auto domain = activeDomainPoints(scop.scheduleRoot(), tree);
   auto other = domain.subtract(updates);
   if (other.is_empty()) {
@@ -253,7 +253,7 @@ bool MappedScop::detectReductions(detail::ScheduleTree* tree) {
   // a single reduction for now.
   // Support for multiple reductions would require a check
   // that these reductions do not interfere with each other.
-  auto domain = band->mupa_.domain();
+  auto domain = isl::UnionSet<Domain>(band->mupa_.domain());
   auto updates = reductionUpdates(domain, scop());
   if (updates.n_set() != 1) {
     return false;
@@ -287,8 +287,8 @@ bool MappedScop::needReductionSeparation(const detail::ScheduleTree* st) {
   return !reductionBandUpdates_.at(st).separated;
 }
 
-isl::multi_union_pw_aff MappedScop::reductionMapSchedule(
-    const detail::ScheduleTree* st) {
+isl::MultiUnionPwAff<Domain, ReductionSchedule>
+MappedScop::reductionMapSchedule(const detail::ScheduleTree* st) {
   TC_CHECK(reductionBandUpdates_.count(st) == 1);
   auto reductionBand = st->elemAs<detail::ScheduleTreeElemBand>();
   TC_CHECK(reductionBand);
@@ -305,7 +305,7 @@ isl::multi_union_pw_aff MappedScop::reductionMapSchedule(
   reductionSchedule = reductionSchedule.drop_dims(
       isl::dim_type::set, 0, reductionDim - nMappedThreads + 1);
 
-  return reductionSchedule;
+  return isl::MultiUnionPwAff<Domain, ReductionSchedule>(reductionSchedule);
 }
 
 detail::ScheduleTree* MappedScop::separateReduction(detail::ScheduleTree* st) {
@@ -316,7 +316,7 @@ detail::ScheduleTree* MappedScop::separateReduction(detail::ScheduleTree* st) {
 
   auto root = scop_->scheduleRoot();
   auto domain = activeDomainPoints(root, st);
-  auto prefixSchedule = prefixScheduleMupa(root, st);
+  auto prefixSchedule = prefixScheduleMupa<Prefix>(root, st);
   auto reductionSchedule = reductionMapSchedule(st);
   auto space = reductionSchedule.get_space();
   auto size = isl::multi_val::zero(space);
@@ -479,7 +479,7 @@ constexpr auto kWarp = "warp";
  * (of size "warpSize") to a warp identifier,
  * based on the thread sizes s_x, s_y up to s_z in "block".
  */
-isl::multi_aff constructThreadToWarp(
+isl::MultiAff<Thread, Warp> constructThreadToWarp(
     isl::ctx ctx,
     const unsigned warpSize,
     const Block& block) {
@@ -498,35 +498,36 @@ isl::multi_aff constructThreadToWarp(
 
   aff = aff.scale_down(isl::val(ctx, warpSize)).floor();
   auto mapSpace = blockSpace.product(warpSpace).unwrap();
-  return isl::multi_aff(mapSpace, isl::aff_list(aff));
+  return isl::MultiAff<Thread, Warp>(
+      isl::multi_aff(mapSpace, isl::aff_list(aff)));
 }
 } // namespace
 
-isl::multi_union_pw_aff MappedScop::threadMappingSchedule(
+isl::MultiUnionPwAff<Domain, Thread> MappedScop::threadMappingSchedule(
     const detail::ScheduleTree* tree) const {
   std::vector<mapping::MappingId> ids;
   for (size_t i = 0; i < numThreads.view.size(); ++i) {
     ids.emplace_back(mapping::ThreadId::makeId(i));
   }
   auto tupleId = isl::id(tree->ctx_, kBlock);
-  return extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);
+  return extractDomainToIds<Thread>(scop_->scheduleRoot(), tree, ids, tupleId);
 }
 
-isl::multi_union_pw_aff MappedScop::blockMappingSchedule(
+isl::MultiUnionPwAff<Domain, Block> MappedScop::blockMappingSchedule(
     const detail::ScheduleTree* tree) const {
   std::vector<mapping::MappingId> ids;
   for (size_t i = 0; i < numBlocks.view.size(); ++i) {
     ids.emplace_back(mapping::BlockId::makeId(i));
   }
   auto tupleId = isl::id(tree->ctx_, kGrid);
-  return extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);
+  return extractDomainToIds<Block>(scop_->scheduleRoot(), tree, ids, tupleId);
 }
 
 Scop::SyncLevel MappedScop::findBestSync(
     detail::ScheduleTree* st1,
     detail::ScheduleTree* st2,
-    isl::multi_union_pw_aff domainToThread,
-    isl::multi_union_pw_aff domainToWarp) {
+    isl::MultiUnionPwAff<Domain, Thread> domainToThread,
+    isl::MultiUnionPwAff<Domain, Warp> domainToWarp) {
   // Active points in the two schedule trees
   auto stRoot = scop_->scheduleRoot();
   auto activePoints1 = activeDomainPointsBelow(stRoot, st1);
 
@@ -24,6 +24,7 @@
 #include "tc/core/cuda/cuda_mapping_options.h"
 #include "tc/core/polyhedral/cuda/mapping_types.h"
 #include "tc/core/polyhedral/cuda/memory_promotion_heuristic.h"
+#include "tc/core/polyhedral/domain_types.h"
 #include "tc/core/polyhedral/scop.h"
 #include "tc/core/tensor.h"
 #include "tc/external/isl.h"
@@ -160,7 +161,8 @@ class MappedScop {
   // Return the schedule that will be used by mapInnermostBandsToThreads
   // for mapping to thread identifiers, with the last function
   // corresponding to thread identifier x.
-  isl::multi_union_pw_aff reductionMapSchedule(const detail::ScheduleTree* st);
+  isl::MultiUnionPwAff<Domain, ReductionSchedule> reductionMapSchedule(
+      const detail::ScheduleTree* st);
   // Separate out reductions that can be mapped to an entire block.
   // The remaining parts, if any, are no longer considered for replacement
   // by a library call.
@@ -175,8 +177,8 @@ class MappedScop {
   Scop::SyncLevel findBestSync(
       detail::ScheduleTree* st1,
       detail::ScheduleTree* st2,
-      isl::multi_union_pw_aff domainToThread,
-      isl::multi_union_pw_aff domainToWarp);
+      isl::MultiUnionPwAff<Domain, Thread> domainToThread,
+      isl::MultiUnionPwAff<Domain, Warp> domainToWarp);
 
  public:
   // Find best configuration of synchronizations in a sequence, minimizing
@@ -197,14 +199,14 @@ class MappedScop {
   // to the thread identifiers, where all branches in "tree"
   // are assumed to have been mapped to thread identifiers.
   // The result lives in a space of the form block[x, ...].
-  isl::multi_union_pw_aff threadMappingSchedule(
+  isl::MultiUnionPwAff<Domain, Thread> threadMappingSchedule(
       const detail::ScheduleTree* tree) const;
 
   // Extract a mapping from the domain elements active at "tree"
   // to the block identifiers, where all branches in "tree"
   // are assumed to have been mapped to block identifiers.
   // The result lives in a space of the form grid[x, ...].
-  isl::multi_union_pw_aff blockMappingSchedule(
+  isl::MultiUnionPwAff<Domain, Block> blockMappingSchedule(
       const detail::ScheduleTree* tree) const;
 
  private:
 
@@ -131,6 +131,8 @@ std::vector<T> collectBranchMarkers(T root, T node) {
   return findThreadSpecificMarkers(node);
 }
 
+struct FullSchedule;
+
 /*
  * Transform schedule bands into a union_map.
  * Takes all partial schedules at leaves as MUPAs (without accounting for
@@ -139,7 +141,8 @@ std::vector<T> collectBranchMarkers(T root, T node) {
  * current leaves and transforms them into union maps.
  * Mapping filters are ignored.
  */
-isl::union_map fullSchedule(const detail::ScheduleTree* root) {
+isl::UnionMap<Domain, FullSchedule> fullSchedule(
+    const detail::ScheduleTree* root) {
   using namespace tc::polyhedral::detail;
 
   if (!root->elemAs<ScheduleTreeElemDomain>()) {
@@ -182,7 +185,7 @@ isl::union_map fullSchedule(const detail::ScheduleTree* root) {
       throw promotion::PromotionLogicError(ss.str());
     }
   }
-  return schedule;
+  return isl::UnionMap<Domain, FullSchedule>(schedule);
 }
 
 /*
@@ -263,7 +266,7 @@ bool promotionImprovesCoalescing(
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* node,
     const TensorReferenceGroup& group,
-    isl::union_map schedule) {
+    isl::UnionMap<Domain, FullSchedule> schedule) {
   auto originalAccesses = group.originalAccesses();
 
   auto markers = collectBranchMarkers(root, node);
@@ -313,6 +316,8 @@ isl::union_set collectMappingsTo(const Scop& scop) {
   return mapping;
 }
 
+struct Unrolled;
+
 /*
  * Check that only unrolled loops may appear in access subscripts.
  * Because the scoping point can be above a branching tree, descend into each
@@ -343,11 +348,12 @@ isl::union_set collectMappingsTo(const Scop& scop) {
  * different references may have different values, but all of them remain
  * independent of non-unrolled loop iterators.
  */
+template <typename Outer>
 bool accessSubscriptsAreUnrolledLoops(
     const TensorReferenceGroup& group,
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* scope,
-    isl::multi_union_pw_aff outerSchedule) {
+    isl::MultiUnionPwAff<Domain, Outer> outerSchedule) {
   using namespace detail;
 
   auto nodes = ScheduleTree::collect(scope);
@@ -366,7 +372,7 @@ bool accessSubscriptsAreUnrolledLoops(
 
     auto unrolledDims = isl::union_pw_aff_list(leaf->ctx_, 1);
     for (auto node : ancestors) {
-      auto band = node->elemAs<detail::ScheduleTreeElemBand>();
+      auto band = node->template elemAs<detail::ScheduleTreeElemBand>();
       if (!band) {
         continue;
       }
@@ -383,7 +389,8 @@ bool accessSubscriptsAreUnrolledLoops(
 
     auto space = isl::space(leaf->ctx_, 0, unrolledDims.n())
                      .align_params(subdomain.get_space());
-    auto unrolledDimsMupa = isl::multi_union_pw_aff(space, unrolledDims);
+    auto unrolledDimsMupa =
+        isl::MultiUnionPwAff<Domain, Unrolled>(space, unrolledDims);
 
     // It is possible that no loops are unrolled, in which case
     // unrolledDimsMupa is zero-dimensional and needs an explicit domain
@@ -392,10 +399,11 @@ bool accessSubscriptsAreUnrolledLoops(
         unrolledDimsMupa.intersect_domain(group.originalAccesses().domain());
 
     auto accesses = group.originalAccesses();
-    auto schedule = outerSchedule.flat_range_product(unrolledDimsMupa);
-    accesses = accesses.apply_domain(isl::union_map::from(schedule));
+    auto schedule = outerSchedule.range_product(unrolledDimsMupa);
+    auto scheduleMap = schedule.asUnionMap();
+    auto scheduledAccesses = accesses.apply_domain(scheduleMap);
 
-    if (!accesses.is_single_valued()) {
+    if (!scheduledAccesses.is_single_valued()) {
       return false;
     }
   }
@@ -415,23 +423,25 @@ bool accessSubscriptsAreUnrolledLoops(
  * thread associated to a given pair of tensor element and outer schedule
  * iteration.
  */
+template <typename Outer>
 bool isPromotableToRegistersBelow(
     const TensorReferenceGroup& group,
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* scope,
-    isl::multi_union_pw_aff outer,
-    isl::multi_union_pw_aff thread) {
+    isl::MultiUnionPwAff<Domain, Outer> outer,
+    isl::MultiUnionPwAff<Domain, Thread> thread) {
   if (!accessSubscriptsAreUnrolledLoops(
-          group, root, scope, outer.flat_range_product(thread))) {
+          group, root, scope, outer.range_product(thread))) {
     return false;
   }
 
   auto originalAccesses = group.originalAccesses();
-  auto map = isl::union_map::from(outer);
-  map = map.range_product(originalAccesses);
-  map = map.apply_domain(isl::union_map::from(thread));
+  auto outerMap = isl::UnionMap<Domain, Outer>::from(outer);
+  auto pair = outerMap.range_product(originalAccesses);
+  auto threadMap = isl::UnionMap<Domain, Thread>::from(thread);
+  auto threadToPair = pair.apply_domain(threadMap);
 
-  return map.is_injective();
+  return threadToPair.is_injective();
 }
 
 /*
@@ -654,15 +664,15 @@ void promoteToRegistersBelow(MappedScop& mscop, detail::ScheduleTree* scope) {
   auto blockSchedule = mscop.blockMappingSchedule(mscop.schedule());
 
   // Pure affine schedule without (mapping) filters.
-  auto partialSchedMupa = partialScheduleMupa(root, scope);
+  auto partialSchedMupa = partialScheduleMupa<Scope>(root, scope);
   // Schedule with block mapping filter.
   auto partialSched =
       isl::union_map::from(partialSchedMupa).intersect_domain(blockMapping);
   // The following promotion validity and profitability checks need to be
   // performed with respect to the block mapping, so append the block schedule.
   // If the partial schedule contains it already, it will just end up with
   // identical dimensions without affecting the result of the checks.
-  partialSchedMupa = partialSchedMupa.flat_range_product(blockSchedule);
+  auto partialSchedBlockMupa = partialSchedMupa.range_product(blockSchedule);
 
   for (auto& tensorGroups : groupMap) {
     auto tensorId = tensorGroups.first;
@@ -676,11 +686,11 @@ void promoteToRegistersBelow(MappedScop& mscop, detail::ScheduleTree* scope) {
         continue;
       }
       if (!isPromotableToRegistersBelow(
-              *group, root, scope, partialSchedMupa, threadSchedule)) {
+              *group, root, scope, partialSchedBlockMupa, threadSchedule)) {
         continue;
       }
       // Check reuse within threads.
-      auto schedule = partialSchedMupa.flat_range_product(threadSchedule);
+      auto schedule = partialSchedBlockMupa.flat_range_product(threadSchedule);
       if (!hasReuseWithin(*group, schedule)) {
         continue;
       }
 
@@ -0,0 +1,13 @@
+namespace tc {
+namespace polyhedral {
+
+struct Domain;
+struct Prefix;
+struct ReductionSchedule;
+struct Scope;
+struct Tensor;
+struct Thread;
+struct Warp;
+
+} // namespace polyhedral
+} // namespace tc
@@ -547,14 +547,18 @@ ScheduleTree* insertCopiesUnder(
 
   if (reads) {
     insertExtensionBefore(
-        root, tree, tree->child({0}), readExtension, std::move(readFilterNode));
+        root,
+        tree,
+        tree->child({0}),
+        isl::UnionMap<Prefix, Domain>(readExtension),
+        std::move(readFilterNode));
   }
   if (writes) {
     insertExtensionAfter(
         root,
         tree,
         tree->child({0}),
-        writeExtension,
+        isl::UnionMap<Prefix, Domain>(writeExtension),
         std::move(writeFilterNode));
   }
 
 
@@ -17,6 +17,7 @@
 
 #include <iostream>
 
+#include "tc/core/polyhedral/domain_types.h"
 #include "tc/core/polyhedral/schedule_tree.h"
 #include "tc/core/polyhedral/scop.h"
 #include "tc/external/isl.h"
@@ -137,8 +138,9 @@ class TensorReferenceGroup {
   // range spaces.
   isl::union_map originalWrites() const;
   isl::union_map originalReads() const;
-  isl::union_map originalAccesses() const {
-    return originalWrites().unite(originalReads());
+  isl::UnionMap<Domain, Tensor> originalAccesses() const {
+    auto accesses = originalWrites().unite(originalReads());
+    return isl::UnionMap<Domain, Tensor>(accesses);
   }
 
   // Rectangular overapproximation of the set of tensor elements accessed below
 
@@ -117,13 +117,15 @@ bool isAlmostIdentityReduction(isl::pw_aff pa, const Scop& scop) {
 
 } // namespace
 
-isl::union_set reductionUpdates(isl::union_set domain, const Scop& scop) {
-  auto update = isl::union_set::empty(domain.get_space());
+isl::UnionSet<Domain> reductionUpdates(
+    isl::UnionSet<Domain> domain,
+    const Scop& scop) {
+  auto update = isl::UnionSet<Domain>::empty(domain.get_space());
   domain.foreach_set([&update, &scop](isl::set set) {
     auto setId = set.get_tuple_id();
     std::vector<size_t> reductionDims;
     if (isReductionUpdateId(setId, scop, reductionDims)) {
-      update = update.unite(set);
+      update = update.unite(isl::UnionSet<Domain>(set));
     }
   });
   return update;
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,6 @@ add_library(`
`30`	`30`	`polyhedral/schedule_tree_elem.cc`
`31`	`31`	`polyhedral/schedule_print.cc`
`32`	`32`	`polyhedral/scop.cc`
`33`		`- polyhedral/separation.cc`
`34`	`33`	`polyhedral/unroll.cc`
`35`	`34`	`)`
`36`	`35`	`target_include_directories(tc_core PUBLIC ${LLVM_INCLUDE_DIRS})`