Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
c246227
fix for bug
tdavidcl Feb 8, 2026
5b0fda3
fix for bug
tdavidcl Feb 8, 2026
d526c32
fix for bug
tdavidcl Feb 8, 2026
eeda3a0
[Base] add mem perf info to crash callstack report
tdavidcl Feb 8, 2026
a297fd3
Merge branch 'main' into aurora_scaling_test
tdavidcl Feb 8, 2026
c32b0fc
Merge branch 'patch-2026-02-08-20-13' into aurora_scaling_test
tdavidcl Feb 8, 2026
3de42d2
wait everywhere
tdavidcl Feb 8, 2026
6e84c03
wait everywhere
tdavidcl Feb 8, 2026
530cb30
wait everywhere
tdavidcl Feb 8, 2026
9968f1b
wait everywhere
tdavidcl Feb 8, 2026
df63477
make it fit in 2GB allocs
tdavidcl Feb 9, 2026
e6c4f22
tidy up debug calls
tdavidcl Feb 9, 2026
c4d28ec
smaller injection step
tdavidcl Feb 9, 2026
9c9801f
smaller injection step
tdavidcl Feb 9, 2026
d53d7fa
ensure saxpy < i32_max
tdavidcl Feb 9, 2026
da8a142
Merge branch 'main' into aurora_scaling_test
tdavidcl Feb 9, 2026
122bec1
perform only safe copies
tdavidcl Feb 9, 2026
491be01
noicer
tdavidcl Feb 9, 2026
a2c10e6
use append from device buffer
tdavidcl Feb 9, 2026
b087ae5
intercept sigkill and sigiot
tdavidcl Feb 9, 2026
bc2c10e
intercept sigkill and sigiot
tdavidcl Feb 9, 2026
2a6a517
mayyyybe
tdavidcl Feb 9, 2026
7cfefea
sync points
tdavidcl Feb 9, 2026
3674193
sync points
tdavidcl Feb 9, 2026
149f9e8
sync points
tdavidcl Feb 9, 2026
a6218ec
sync points
tdavidcl Feb 9, 2026
51791f1
remove a print
tdavidcl Feb 9, 2026
a345a8c
Merge branch 'main' into aurora_scaling_test
tdavidcl Feb 10, 2026
eacfb41
very safe indeed
tdavidcl Feb 10, 2026
5abf60c
more logs ?
tdavidcl Feb 17, 2026
ceece61
log
tdavidcl Feb 23, 2026
8eb91dd
log
tdavidcl Feb 23, 2026
57b2934
potential overflow
tdavidcl Feb 23, 2026
f13a927
harden narrowing
tdavidcl Feb 23, 2026
950be29
more debug
tdavidcl Feb 24, 2026
0948c97
more checks
tdavidcl Feb 26, 2026
8f01860
fix ?
tdavidcl Feb 26, 2026
d4d7b88
Merge branch 'main' into aurora_scaling_test
tdavidcl Feb 26, 2026
587f0d8
Merge branch 'main' into aurora_scaling_test
tdavidcl Feb 27, 2026
e1e81fe
Merge branch 'main' into aurora_scaling_test
tdavidcl Feb 28, 2026
10f743a
Name all signals
tdavidcl Feb 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/benchmarks/sph_weak_scale_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@
setup.apply_setup(
gen,
gen_step=int(scheduler_split_val / 8),
insert_step=int(scheduler_split_val * 2),
insert_step=int(scheduler_split_val / 4),
msg_count_limit=1024,
rank_comm_size_limit=int(scheduler_split_val) * 2,
rank_comm_size_limit=int(scheduler_split_val),
max_msg_size=int(scheduler_split_val / 8),
do_setup_log=False,
)
Expand Down
105 changes: 102 additions & 3 deletions src/shamalgs/include/shamalgs/serialize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,18 +343,52 @@ namespace shamalgs {

template<class T>
inline void write_buf(sham::DeviceBuffer<T> &buf, u64 len) {
StackEntry stack_loc{false};

__shamrock_stack_entry();

/*
std::string info
= shambase::format("write_buf: len={}, buf.get_size()={}", len, buf.get_size());

[[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
false, SourceLocation{info}};
*/
using Helper = details::SerializeHelperMember<T>;
u64 current_head = head_device;

u64 offset = align_repr(len * Helper::szrepr);
check_head_move_device<T>(offset, len);

/*
info = shambase::format(
"write_buf: len={}, buf.get_size()={}, current_head={}, offset={}",
len,
buf.get_size(),
current_head,
offset);

[[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
false, SourceLocation{info}};
*/
sham::EventList depends_list;
const T *accbuf = buf.get_read_access(depends_list);
auto accbufbyte = storage.get_write_access(depends_list);

/*
info = shambase::format(
"write_buf: len={}, buf.get_size()={}, current_head={}, offset={}, "
"depends_list.size()={}, accbuf={}, accbufbyte={}",
len,
buf.get_size(),
current_head,
offset,
depends_list.get_events().size(),
static_cast<const void *>(accbuf),
static_cast<void *>(accbufbyte));

[[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
false, SourceLocation{info}};
*/
auto e = dev_sched->get_queue().submit(
depends_list, [&, current_head](sycl::handler &cgh) {
cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
Expand All @@ -363,22 +397,51 @@ namespace shamalgs {
});
});

// [[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
// false, SourceLocation{info}};

buf.complete_event_state(e);
storage.complete_event_state(e);

head_device += offset;

// [[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
// false, SourceLocation{info}};
}

template<class T>
inline void load_buf(sham::DeviceBuffer<T> &buf, u64 len) {
StackEntry stack_loc{false};

__shamrock_stack_entry();

/*
std::string info = shambase::format(
"load_buf: len={}, buf.get_size()={} head_device={}",
len,
buf.get_size(),
head_device);

[[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
false, SourceLocation{info}};
*/
using Helper = details::SerializeHelperMember<T>;
u64 current_head = head_device;

u64 offset = align_repr(len * Helper::szrepr);
check_head_move_device<T>(offset, len);

/*
info = shambase::format(
"load_buf: len={}, buf.get_size()={}, current_head={}, offset={} head_device={}",
len,
buf.get_size(),
current_head,
offset,
head_device);

[[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
false, SourceLocation{info}};
*/
if (buf.get_size() < len) {
shambase::throw_with_loc<std::invalid_argument>(shambase::format(
"SerializeHelper::load_buf: (buf.get_size() < len)\n buf.get_size()={}\n "
Expand All @@ -391,18 +454,54 @@ namespace shamalgs {
T *accbuf = buf.get_write_access(depends_list);
auto accbufbyte = storage.get_read_access(depends_list);

/*
info = shambase::format(
"load_buf: len={}, buf.get_size()={}, current_head={}, offset={}, "
"depends_list.size()={}, accbuf={}, accbufbyte={} head_device={}",
len,
buf.get_size(),
current_head,
offset,
depends_list.get_events().size(),
static_cast<void *>(accbuf),
static_cast<const void *>(accbufbyte),
head_device);

[[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
false, SourceLocation{info}};
*/
auto e = dev_sched->get_queue().submit(
depends_list, [&, current_head](sycl::handler &cgh) {
cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
u64 head = current_head + id.get_linear_id() * Helper::szrepr;
accbuf[id] = Helper::load(&accbufbyte[head]);
});
});

/*
[[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
false, SourceLocation{info}};
*/
buf.complete_event_state(e);
storage.complete_event_state(e);

head_device += offset;

/*
info = shambase::format(
"load_buf: len={}, buf.get_size()={}, current_head={}, offset={}, "
"depends_list.size()={}, accbuf={}, accbufbyte={} head_device={}",
len,
buf.get_size(),
current_head,
offset,
depends_list.get_events().size(),
static_cast<void *>(accbuf),
static_cast<const void *>(accbufbyte),
head_device);

[[maybe_unused]] StackEntry __shamrock_unique_name(stack_loc_){
false, SourceLocation{info}};
*/
}
};

Expand Down
2 changes: 1 addition & 1 deletion src/shambackends/include/shambackends/DeviceBuffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ namespace sham {
inline sham::EventList safe_fill_lambda(
sham::DeviceQueue &q, sham::EventList &depends_list, T *dest, size_t count, Fct &&fct) {
__shamrock_stack_entry();
u64 max_copy_len = (1 << 30); // 1G elements, this garanteee indexing below i32_max
u64 max_copy_len = (1 << 30) / sizeof(T); // 1GB

sham::EventList events;

Expand Down
23 changes: 21 additions & 2 deletions src/shambackends/include/shambackends/DeviceQueue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,32 @@ namespace sham {
template<class Fct>
sycl::event submit(Fct &&fct) {

__shamrock_stack_entry();

wait_and_throw();

auto e = q.submit([&](sycl::handler &h) {
fct(h);
});

if (wait_after_submit) {
__shamrock_stack_entry();
e.wait_and_throw();
}

return e;
}

/**
* @brief Wait for the queue to finish and throw an exception if one has occurred
*
* This function waits for the queue to finish and throws an exception if one has occurred.
*/
void wait_and_throw() {
__shamrock_stack_entry();
q.wait_and_throw();
}

/**
* @brief Submits a kernel to the SYCL queue, adding the events in the
* provided EventList as dependencies
Expand Down Expand Up @@ -138,14 +153,18 @@ namespace sham {
template<class Fct>
sycl::event submit(EventList &elist, Fct &&fct) {

elist.consumed = true;
__shamrock_stack_entry();

auto e = q.submit([&](sycl::handler &h) {
wait_and_throw();

elist.consumed = true;
auto e = q.submit([&](sycl::handler &h) {
elist.apply_dependancy(h);
fct(h);
});

if (wait_after_submit) {
__shamrock_stack_entry();
e.wait_and_throw();
}

Expand Down
10 changes: 0 additions & 10 deletions src/shambackends/src/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,6 @@ namespace sham {
try { \
return {dev.get_info<sycl::info::device::info_>()}; \
} catch (...) { \
logger::warn_ln( \
"Device", \
"dev.get_info<sycl::info::device::" #info_ ">() raised an exception for device", \
name); \
return {}; \
} \
}();
Expand All @@ -102,10 +98,6 @@ namespace sham {
try { \
return {dev.get_info<sycl::info::device::info_>()}; \
} catch (...) { \
logger::warn_ln( \
"Device", \
"dev.get_info<sycl::info::device::" #info_ ">() raised an exception for device", \
name); \
return {}; \
} \
}();
Expand All @@ -116,8 +108,6 @@ namespace sham {
try { \
return {dev.get_info<info_>()}; \
} catch (...) { \
logger::warn_ln( \
"Device", "dev.get_info<" #info_ ">() raised an exception for device", name); \
return {}; \
} \
}();
Expand Down
6 changes: 5 additions & 1 deletion src/shambase/include/shambase/SourceLocation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

#include "shambase/source_location.hpp"
#include <optional>
#include <string>

/**
Expand All @@ -32,8 +33,11 @@ struct SourceLocation {
using srcloc = shambase::cxxstd::source_location;

srcloc loc;
std::optional<std::string> message;

inline explicit SourceLocation(srcloc _loc = srcloc::current()) : loc(_loc) {}
inline explicit SourceLocation(
std::optional<std::string> _message = std::nullopt, srcloc _loc = srcloc::current())
: loc(_loc), message(_message) {}

/**
* @brief format the location in multiple lines
Expand Down
14 changes: 12 additions & 2 deletions src/shambase/src/SourceLocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ std::string SourceLocation::format_one_line() const {
}

std::string SourceLocation::format_one_line_func() const {
return fmt::format(
"{} ({}:{}:{})", loc.function_name(), loc.file_name(), loc.line(), loc.column());
if (message.has_value()) {
return fmt::format(
"{} ({}:{}:{}) info={}",
loc.function_name(),
loc.file_name(),
loc.line(),
loc.column(),
message.value());
} else {
return fmt::format(
"{} ({}:{}:{})", loc.function_name(), loc.file_name(), loc.line(), loc.column());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@

namespace shammodels::sph::modules {

namespace {
void sync_point(SourceLocation loc = SourceLocation{}) {
shamcomm::mpi::Barrier(MPI_COMM_WORLD);
auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
auto &q = shambase::get_check_ref(dev_sched).get_queue();
q.wait_and_throw();
shamcomm::mpi::Barrier(MPI_COMM_WORLD);

if (shamcomm::world_rank() == 0) {
logger::raw_ln("sync point", loc.format_one_line());
}

shamcomm::mpi::Barrier(MPI_COMM_WORLD);
}
} // namespace

template<class Tvec>
class GeneratorLatticeHCP : public ISPHSetupNode {
using Tscal = shambase::VecComponent<Tvec>;
Expand Down Expand Up @@ -78,9 +94,14 @@ namespace shammodels::sph::modules {
"total",
skip_start + gen_cnt + skip_end);

sync_point();

generator.skip(skip_start);
sync_point();
auto tmp = generator.next_n(gen_cnt);
sync_point();
generator.skip(skip_end);
sync_point();

for (Tvec r : tmp) {
if (Patch::is_in_patch_converted(r, box.lower, box.upper)) {
Expand All @@ -91,24 +112,33 @@ namespace shammodels::sph::modules {

// Make a patchdata from pos_data
PatchDataLayer tmp(sched.get_layout_ptr_old());
sync_point();
if (!pos_data.empty()) {
tmp.resize(pos_data.size());
}
sync_point();
if (!pos_data.empty()) {
tmp.fields_raz();
}
sync_point();

{
u32 len = pos_data.size();
PatchDataField<Tvec> &f
= tmp.get_field<Tvec>(sched.pdl_old().get_field_idx<Tvec>("xyz"));
// sycl::buffer<Tvec> buf(pos_data.data(), len);
f.override(pos_data, len);
}
if (!pos_data.empty()) {
u32 len = pos_data.size();
PatchDataField<Tvec> &f
= tmp.get_field<Tvec>(sched.pdl_old().get_field_idx<Tvec>("xyz"));
// sycl::buffer<Tvec> buf(pos_data.data(), len);
f.override(pos_data, len);
}
sync_point();

{
PatchDataField<Tscal> &f
= tmp.get_field<Tscal>(sched.pdl_old().get_field_idx<Tscal>("hpart"));
f.override(dr);
}
if (!pos_data.empty()) {
PatchDataField<Tscal> &f
= tmp.get_field<Tscal>(sched.pdl_old().get_field_idx<Tscal>("hpart"));
f.override(dr);
}

sync_point();

return tmp;
}

Expand Down
Loading