-
Notifications
You must be signed in to change notification settings - Fork 25
[sham::Dump] versioning & unify all metadata in single json #1705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -19,13 +19,20 @@ | |||||
| #include "shamcomm/logs.hpp" | ||||||
| #include "shamrock/io/ShamrockDump.hpp" | ||||||
|
|
||||||
| /// Currently this is the only valid tag as "1.0" of Shamrock dumps did not include any versioning | ||||||
| /// :'( | ||||||
| #define WRITER_VERSION_TAG "2.0" | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using a
Suggested change
|
||||||
|
|
||||||
| bool fused_metadata = true; | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
|
|
||||||
| namespace shamrock { | ||||||
|
|
||||||
| void write_shamrock_dump(std::string fname, std::string metadata_user, PatchScheduler &sched) { | ||||||
| void write_shamrock_dump( | ||||||
| std::string fname, const nlohmann::json &metadata_user, PatchScheduler &sched) { | ||||||
|
|
||||||
| StackEntry stack_loc{}; | ||||||
|
|
||||||
| std::string metadata_patch = sched.serialize_patch_metadata().dump(4); | ||||||
| nlohmann::json metadata_patch = sched.serialize_patch_metadata(); | ||||||
|
|
||||||
| using namespace shamrock::patch; | ||||||
|
|
||||||
|
|
@@ -63,9 +70,12 @@ namespace shamrock { | |||||
| using namespace nlohmann; | ||||||
|
|
||||||
| json j; | ||||||
| j["pids"] = all_pids; | ||||||
| j["bytecounts"] = all_bytecounts; | ||||||
| j["offsets"] = all_offsets; | ||||||
| j["pids"] = all_pids; | ||||||
| j["bytecounts"] = all_bytecounts; | ||||||
| j["offsets"] = all_offsets; | ||||||
| j["metadata_patch"] = metadata_patch; | ||||||
| j["metadata_user"] = metadata_user; | ||||||
| j["versioning"] = WRITER_VERSION_TAG; | ||||||
|
|
||||||
| std::string sout = j.dump(4); | ||||||
|
|
||||||
|
|
@@ -82,19 +92,14 @@ namespace shamrock { | |||||
| // do some perf investigation before enabling preallocation | ||||||
| bool preallocate = false; | ||||||
| if (preallocate) { | ||||||
| MPI_Offset tot_byte = all_offsets.back() + all_bytecounts.back() + metadata_user.size() | ||||||
| + metadata_patch.size() + sout.size() + sizeof(std::size_t) * 3; | ||||||
| MPI_Offset tot_byte | ||||||
| = all_offsets.back() + all_bytecounts.back() + sout.size() + sizeof(std::size_t); | ||||||
| MPICHECK(MPI_File_preallocate(mfile, tot_byte)); | ||||||
| } | ||||||
|
|
||||||
| shamalgs::collective::write_header(mfile, metadata_user, head_ptr); | ||||||
| shamalgs::collective::write_header(mfile, metadata_patch, head_ptr); | ||||||
| shamalgs::collective::write_header(mfile, sout, head_ptr); | ||||||
|
|
||||||
| shamlog_debug_ln( | ||||||
| "ShamrockDump", | ||||||
| shambase::format( | ||||||
| "table sizes {} {} {}", metadata_patch.size(), metadata_user.size(), sout.size())); | ||||||
| shamlog_debug_ln("ShamrockDump", shambase::format("table sizes {}", sout.size())); | ||||||
|
|
||||||
| if (/*do check*/ true) { | ||||||
| auto check_same_mpi = [](std::string s) { | ||||||
|
|
@@ -112,8 +117,6 @@ namespace shamrock { | |||||
| } | ||||||
| }; | ||||||
|
|
||||||
| check_same_mpi(metadata_user); | ||||||
| check_same_mpi(metadata_patch); | ||||||
| check_same_mpi(sout); | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -159,7 +162,7 @@ namespace shamrock { | |||||
| } | ||||||
| } | ||||||
|
|
||||||
| void load_shamrock_dump(std::string fname, std::string &metadata_user, ShamrockCtx &ctx) { | ||||||
| void load_shamrock_dump(std::string fname, nlohmann::json &metadata_user, ShamrockCtx &ctx) { | ||||||
|
|
||||||
| StackEntry stack_loc{}; | ||||||
|
|
||||||
|
|
@@ -171,12 +174,9 @@ namespace shamrock { | |||||
| shambase::Timer timer; | ||||||
| timer.start(); | ||||||
|
|
||||||
| std::string metadata_patch{}; | ||||||
| std::string patchdata_infos{}; | ||||||
| std::string metadata{}; | ||||||
|
|
||||||
| metadata_user = shamalgs::collective::read_header(mfile, head_ptr); | ||||||
| metadata_patch = shamalgs::collective::read_header(mfile, head_ptr); | ||||||
| patchdata_infos = shamalgs::collective::read_header(mfile, head_ptr); | ||||||
| metadata = shamalgs::collective::read_header(mfile, head_ptr); | ||||||
|
|
||||||
| if (!shamcmdopt::getenv_str("SHAMDUMP_OFFSET_MODE_OLD").has_value()) { | ||||||
| // reset MPI view | ||||||
|
|
@@ -186,8 +186,11 @@ namespace shamrock { | |||||
|
|
||||||
| using namespace nlohmann; | ||||||
|
|
||||||
| json jmeta_patch = json::parse(metadata_patch); | ||||||
| json jpdat_info = json::parse(patchdata_infos); | ||||||
| json jmeta = json::parse(metadata); | ||||||
|
|
||||||
| metadata_user = jmeta.at("metadata_user"); | ||||||
|
|
||||||
| auto jmeta_patch = jmeta.at("metadata_patch"); | ||||||
|
Comment on lines
+189
to
+193
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new dump format includes a The current implementation will fail when trying to load old dumps because it expects a single unified JSON header. To make the loading process more robust, you should check for the presence and value of the For example: json jmeta = json::parse(metadata);
if (jmeta.contains("versioning")) {
// New format
auto version = jmeta.at("versioning").get<std::string>();
if (version != WRITER_VERSION_TAG) {
logger::warn_ln("ShamrockDump", "Loading dump with version " + version + ", which differs from writer version " + WRITER_VERSION_TAG);
}
// ... proceed with loading new format
metadata_user = jmeta.at("metadata_user");
auto jmeta_patch = jmeta.at("metadata_patch");
// ...
} else {
// Old format or corrupted file
shambase::throw_with_loc<std::runtime_error>(
"Unsupported dump format: versioning information is missing.");
} |
||||||
|
|
||||||
| ctx.pdata_layout_new(); | ||||||
| *ctx.pdl = jmeta_patch.at("patchdata_layout").get<patch::PatchDataLayerLayout>(); | ||||||
|
|
@@ -217,9 +220,9 @@ namespace shamrock { | |||||
| std::vector<u64> all_pids; | ||||||
| std::vector<u64> all_bytecounts; | ||||||
|
|
||||||
| all_bytecounts = jpdat_info.at("bytecounts").get<std::vector<u64>>(); | ||||||
| all_offsets = jpdat_info.at("offsets").get<std::vector<u64>>(); | ||||||
| all_pids = jpdat_info.at("pids").get<std::vector<u64>>(); | ||||||
| all_bytecounts = jmeta.at("bytecounts").get<std::vector<u64>>(); | ||||||
| all_offsets = jmeta.at("offsets").get<std::vector<u64>>(); | ||||||
| all_pids = jmeta.at("pids").get<std::vector<u64>>(); | ||||||
|
|
||||||
| struct PatchFileOffset { | ||||||
| u64 offset, bytecount; | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This block of code appears to be for testing the dump/load functionality and includes an
exit()call. This prevents the rest of the example script from running, effectively breaking it. This test code should be removed before merging.