Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
1c757e3
arena: add allocator + plan helper + tests
zhihao-deng May 11, 2026
52fdeaa
arena_kernels: add ToT kernels + tests
zhihao-deng May 11, 2026
7e6b58a
arena_einsum: regime-A (outer-Hadamard) plans + dispatch + tests
zhihao-deng May 12, 2026
582937a
tensor: route ToT trivial ops through arena kernels + tests
zhihao-deng May 12, 2026
463aa6e
cont_engine: thread arena plan + zero-overhead sizeof gate
zhihao-deng May 12, 2026
d9e6a59
einsum + tests/cases: hook regime-A arena into einsum + add hec_* cas…
zhihao-deng May 12, 2026
ad1a8c6
review fixes: portable sizeof gate, explicit plan-move, alignment intent
zhihao-deng May 12, 2026
6525c36
ArenaTensor parity: Tensor<ArenaTensor> behaves like Tensor<Tensor>
evaleev May 15, 2026
310a62b
Add axpy_to CPO; thread it into einsum/cont_engine scale paths
evaleev May 15, 2026
e7222eb
arena ToT: unified construction + arena-aware fill/set/init_elements
evaleev May 17, 2026
40a90bd
arena ToT: arena-aware add/subt/scale/neg tile ops + expression tests
evaleev May 18, 2026
c1a2172
cont_engine: route ToT x ToT Hadamard with view inner cells via outer…
evaleev May 18, 2026
9cba450
tot tests: add end-to-end ToT einsum contraction harness
evaleev May 18, 2026
bd050b3
einsum: guard legacy ToT element-op path for view inner cells
evaleev May 18, 2026
42ae19a
arena_kernels: add arena_inner_permute slab-rewrite kernel
evaleev May 18, 2026
36924b6
tensor: Tensor<ArenaTensor>::permute handles bipartite permutations
evaleev May 18, 2026
03f502a
arena_einsum: handle permuted inner contractions via slab-level hoist
evaleev May 18, 2026
3a1a545
arena_einsum: hoist permuted inner-Hadamard operands to C-layout
evaleev May 18, 2026
9c392da
tensor: arena-aware t x tot Hadamard mult with a result permutation
evaleev May 18, 2026
2d93fbf
tensor: arena-aware tot x t mult, ArenaTensor::sum, size_of(ArenaTensor)
evaleev May 18, 2026
722918c
arena ToT: einsum/contraction-engine support for Tensor<ArenaTensor>
evaleev May 18, 2026
a940d0b
arena ToT: MultEngine Hadamard-outer x inner-contraction; fix Mult co…
evaleev May 19, 2026
86ee236
arena ToT: support einsum's replicate_array path on >1 rank
evaleev May 19, 2026
cc9a386
arena_einsum: make ContractionArenaPlan nameable for non-ToT operands
evaleev May 19, 2026
bd7b4ce
tensor: permuting axpy_to initializes an empty target
evaleev May 19, 2026
5db3289
arena_tensor: drop redundant member scalar operator*=
evaleev May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 94 additions & 35 deletions src/TiledArray/array_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,14 @@
#define TILEDARRAY_ARRAY_IMPL_H__INCLUDED

#include <TiledArray/distributed_storage.h>
#include <TiledArray/tensor/arena_kernels.h>
#include <TiledArray/tensor_impl.h>
#include <TiledArray/transform_iterator.h>
#include <TiledArray/type_traits.h>
#include <TiledArray/util/function.h>

#include <map>

namespace TiledArray {
namespace detail {

Expand Down Expand Up @@ -986,48 +989,104 @@ std::shared_ptr<ArrayImpl<Tile, Policy>> make_with_new_trange(
Policy::default_pmap(world, target_trange.tiles_range().volume())),
Array::lazy_deleter);
auto& target_array = *target_array_sptr;
target_array.init_tiles([value = new_value_fill](const Range& range) {
return typename Array::value_type(range, value);
});
target_array.world().gop.fence();

// loop over local tile and sends its contributions to the targets
{
const auto e = source_array.cend();
auto& target_tiles_range = target_trange.tiles_range();
for (auto it = source_array.cbegin(); it != e; ++it) {
const auto& source_tile = *it;
auto source_tile_idx = it.index();

// make range for iterating over all possible target tile idx combinations
TA::Index target_tile_ord_extent_range(rank);
for (auto d = 0; d != rank; ++d) {
target_tile_ord_extent_range[d] =
all_target_tiles[d][source_tile_idx[d]].size();
if constexpr (detail::is_tensor_of_tensor_v<Tile> &&
is_arena_tensor_v<typename Tile::value_type>) {
// Arena tensor-of-tensor: a ToT tile's inner cells are non-owning views
// into that tile's own arena slab. The generic null-init + write_tile_block
// scatter (the `else` branch) would rebind the target's null inner cells to
// the *source* tiles' slabs, leaving them dangling once the source array is
// destroyed. Instead build each local target tile directly (deep copy) by
// pulling the source cells: a retile preserves the element space, so the
// target cell at global outer element `e` takes its inner range and data
// from the source cell at `e` (elements outside the source range, e.g. a
// retile that grows the element range, yield null cells).
using inner_range_type = typename Tile::value_type::range_type;
const auto& source_elements = source_array.trange().elements_range();
std::map<std::size_t, Tile> src_tile_cache;
auto source_cell_at =
[&](const auto& e) -> const typename Tile::value_type* {
if (!source_elements.includes(e)) return nullptr;
const auto src_tile_idx = source_array.trange().element_to_tile(e);
const auto src_ord =
source_array.trange().tiles_range().ordinal(src_tile_idx);
auto it = src_tile_cache.find(src_ord);
if (it == src_tile_cache.end()) {
it = src_tile_cache
.emplace(src_ord, source_array.is_zero(src_tile_idx)
? Tile{}
: source_array.get(src_tile_idx).get())
.first;
}

// loop over every target tile combination
TA::Range target_tile_ord_extent(target_tile_ord_extent_range);
for (auto& target_tile_ord : target_tile_ord_extent) {
TA::Index target_tile_idx(rank);
container::svector<TA::Range1> target_tile_rngs1(rank);
const Tile& st = it->second;
if (st.empty()) return nullptr;
return &st(e);
};
Comment on lines +1006 to +1024
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The gather-based retile is deliberate for arena ToT (see the comment opening this branch): the generic scatter path (write_tile_block) would rebind the target's null inner cells onto the source tiles' arena slabs, leaving dangling views once the source array is destroyed — so the target rank must pull source tiles and deep-copy. Fetches are de-duplicated per rank via src_tile_cache, so the cost is O(distinct source tiles)/rank, not per target tile. Batching/prefetching those remote fetches is a worthwhile follow-up optimization but is orthogonal to correctness; noting it for later.

for (const auto target_ord : *target_array.pmap()) {
if (target_array.is_zero(target_ord)) continue;
Tile tile = make_nested_tile<Tile>(
target_trange.make_tile_range(target_ord),
[&](const auto& e) -> inner_range_type {
const auto* sc = source_cell_at(e);
return (sc && !sc->empty()) ? sc->range() : inner_range_type{};
},
[&](auto& cell, const auto& e) {
const auto* sc = source_cell_at(e);
if (sc && !sc->empty()) {
const auto* s = sc->data();
auto* d = cell.data();
for (std::size_t p = 0; p < cell.size(); ++p) d[p] = s[p];
}
});
target_array.set(target_ord, std::move(tile));
}
target_array.world().gop.fence();
} else {
target_array.init_tiles([value = new_value_fill](const Range& range) {
return typename Array::value_type(range, value);
});
target_array.world().gop.fence();

// loop over local tile and sends its contributions to the targets
{
const auto e = source_array.cend();
auto& target_tiles_range = target_trange.tiles_range();
for (auto it = source_array.cbegin(); it != e; ++it) {
const auto& source_tile = *it;
auto source_tile_idx = it.index();

// make range for iterating over all possible target tile idx
// combinations
TA::Index target_tile_ord_extent_range(rank);
for (auto d = 0; d != rank; ++d) {
std::tie(target_tile_idx[d], target_tile_rngs1[d]) =
all_target_tiles[d][source_tile_idx[d]][target_tile_ord[d]];
target_tile_ord_extent_range[d] =
all_target_tiles[d][source_tile_idx[d]].size();
}

// loop over every target tile combination
TA::Range target_tile_ord_extent(target_tile_ord_extent_range);
for (auto& target_tile_ord : target_tile_ord_extent) {
TA::Index target_tile_idx(rank);
container::svector<TA::Range1> target_tile_rngs1(rank);
for (auto d = 0; d != rank; ++d) {
std::tie(target_tile_idx[d], target_tile_rngs1[d]) =
all_target_tiles[d][source_tile_idx[d]][target_tile_ord[d]];
}
TA_ASSERT(source_tile.future().probe());
Tile target_tile_contribution(
source_tile.get().block(target_tile_rngs1));
auto target_tile_idx_ord =
target_tiles_range.ordinal(target_tile_idx);
auto target_proc = target_array.pmap()->owner(target_tile_idx_ord);
world.taskq.add(target_proc, &write_tile_block<Tile, Policy>,
target_array.id(), target_tile_idx_ord,
target_tile_contribution);
}
TA_ASSERT(source_tile.future().probe());
Tile target_tile_contribution(
source_tile.get().block(target_tile_rngs1));
auto target_tile_idx_ord = target_tiles_range.ordinal(target_tile_idx);
auto target_proc = target_array.pmap()->owner(target_tile_idx_ord);
world.taskq.add(target_proc, &write_tile_block<Tile, Policy>,
target_array.id(), target_tile_idx_ord,
target_tile_contribution);
}
}
// data is mutated in place, so must wait for all tasks to complete
target_array.world().gop.fence();
}
// data is mutated in place, so must wait for all tasks to complete
target_array.world().gop.fence();
// WARNING!! need to truncate in DistArray ctor

return target_array_sptr;
Expand Down
Loading
Loading