Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/realizations/catchment/Bmi_Multi_Formulation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ namespace realization {

// Since this is a nested formulation, support usage of the '{{id}}' syntax for init config file paths.
Catchment_Formulation::config_pattern_substitution(properties, BMI_REALIZATION_CFG_PARAM_REQ__INIT_CONFIG,
"{{id}}", id);
"{{id}}", Catchment_Formulation::config_pattern_id_replacement(id));

// Call create_formulation to perform the rest of the typical initialization steps for the formulation.
mod->create_formulation(properties);
Expand Down
6 changes: 6 additions & 0 deletions include/realizations/catchment/Catchment_Formulation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ namespace realization {
static void config_pattern_substitution(geojson::PropertyMap &properties, const std::string &key,
const std::string &pattern, const std::string &replacement);

/**Remove leading non-numeric characters from the ID string.
*
* This may be needed to correct NGEN adding an identifying prefix to the ID with system file names without the prefix.
*/
static std::string config_pattern_id_replacement(const std::string &id);

/**
* Get a header line appropriate for a file made up of entries from this type's implementation of
* ``get_output_line_for_timestep``.
Expand Down
8 changes: 5 additions & 3 deletions include/realizations/catchment/Formulation_Manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ namespace realization {
this->add_formulation(
this->construct_formulation_from_config(
simulation_time_config,
catchment_config.first,
"cat-" + catchment_config.first,
catchment_formulation,
output_stream
)
Expand Down Expand Up @@ -553,7 +553,7 @@ namespace realization {
global_copy.formulation.parameters,
BMI_REALIZATION_CFG_PARAM_REQ__INIT_CONFIG,
"{{id}}",
identifier
Catchment_Formulation::config_pattern_id_replacement(identifier)
);
} else {
ss.str(""); ss << "init_config is present but empty for identifier: " << identifier << std::endl;
Expand Down Expand Up @@ -665,7 +665,9 @@ namespace realization {

// Replace {{id}} if present
if (id_index != std::string::npos) {
filepattern = filepattern.replace(id_index, sizeof("{{id}}") - 1, identifier);
// account generate the regex to search for the ID with or without a prefix
std::string pattern_id = Catchment_Formulation::config_pattern_id_replacement(identifier);
filepattern = filepattern.replace(id_index, sizeof("{{id}}") - 1, pattern_id);
}

// Compile the file pattern as a regex
Expand Down
10 changes: 6 additions & 4 deletions src/NGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,11 +446,12 @@ int run_ngen(int argc, char* argv[], int mpi_num_procs, int mpi_rank) {
#if NGEN_WITH_SQLITE3
try {
nexus_collection = ngen::geopackage::read(nexusDataFile, "nexus", nexus_subset_ids);
} catch (...) {
} catch (std::exception &e) {
// Handle all exceptions
std::string msg = "Geopackage error occurred reading nexuses: " + nexusDataFile;
LOG(msg,LogLevel::FATAL);
throw std::runtime_error(msg);
LOG(LogLevel::FATAL, e.what());
throw;
}
#else
Logger::logMsgAndThrowError("SQLite3 support required to read GeoPackage files.");
Expand All @@ -477,11 +478,12 @@ int run_ngen(int argc, char* argv[], int mpi_num_procs, int mpi_rank) {
try {
catchment_collection =
ngen::geopackage::read(catchmentDataFile, "divides", catchment_subset_ids);
} catch (...) {
} catch (std::exception &e) {
// Handle all exceptions
std::string msg = "Geopackage error occurred reading divides: " + catchmentDataFile;
LOG(msg,LogLevel::FATAL);
throw std::runtime_error(msg);
LOG(LogLevel::FATAL, e.what());
throw;
}

#else
Expand Down
202 changes: 107 additions & 95 deletions src/geopackage/read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,6 @@ std::shared_ptr<geojson::FeatureCollection> ngen::geopackage::read(
// Check for malicious/invalid layer input
check_table_name(layer);
std::vector<geojson::Feature> features;
if (ids.size() > 0)
features.reserve(ids.size());
double min_x = std::numeric_limits<double>::infinity();
double min_y = std::numeric_limits<double>::infinity();
double max_x = -std::numeric_limits<double>::infinity();
double max_y = -std::numeric_limits<double>::infinity();

LOG(LogLevel::DEBUG, "Establishing connection to geopackage %s.", gpkg_path.c_str());
ngen::sqlite::database db{gpkg_path};
Expand All @@ -57,103 +51,121 @@ std::shared_ptr<geojson::FeatureCollection> ngen::geopackage::read(
Logger::logMsgAndThrowError(errmsg);
}

// Introspect if the layer is divides to see which ID field is in use
std::string id_column = "id";
if(layer == "divides"){
try {
//TODO: A bit primitive. Actually introspect the schema somehow? https://www.sqlite.org/c3ref/funclist.html
auto query_get_first_row = db.query("SELECT divide_id FROM " + layer + " LIMIT 1");
id_column = "divide_id";
}
catch (const std::exception& e){
#ifndef NGEN_QUIET
// output debug info on what is read exactly
read_ss << "WARN: Using legacy ID column \"id\" in layer " << layer << " is DEPRECATED and may stop working at any time." << std::endl;
LOG(read_ss.str(), LogLevel::WARNING); read_ss.str("");
#endif
}
std::string id_column;
std::string feature_query;
if (layer == "divides") {
id_column = "div_id";
feature_query =
"SELECT "
"('cat-' || divides.div_id) AS id, "
"('nex-' || flowpaths.dn_nex_id) AS toid, "
"flowpaths.slope AS So, "
"divides.area_sqkm AS areasqkm, " // faster for later code to rename the field here
"divides.geom AS geom "
"FROM divides "
"LEFT JOIN flowpaths "
"ON divides.div_id = flowpaths.div_id";
} else if (layer == "nexus") {
id_column = "nex_id";
feature_query =
"SELECT "
"('nex-' || nexus.nex_id) AS id, "
"CASE "
"WHEN flowpaths.div_id IS NULL THEN 'terminal' "
"ELSE ('cat-' || flowpaths.div_id) "
"END AS toid, "
"CASE "
"WHEN flowpaths.slope IS NULL THEN 0.0 "
"ELSE flowpaths.slope "
"END AS So, "
"nexus.geom AS geom "
"FROM nexus "
"LEFT JOIN flowpaths "
"ON nexus.dn_fp_id = flowpaths.fp_id";
} else {
Logger::logMsgAndThrowError("Geopackage read only accepts layers `divides` and `nexus`. The layer entered was " + layer);
}

// execute sub-queries if the number of IDs gets too long or once if ids.size() == 0
int bind_limit = 900;
boost::span<const std::string> id_span(ids);
for (int i = 0; i < ids.size() || (i == 0 && ids.size() == 0); i += bind_limit) {
int span_size = (i + bind_limit >= ids.size()) ? (ids.size() - i) : bind_limit;
boost::span<const std::string> sub_ids = id_span.subspan(i, span_size);

// Layer exists, getting statement for it
//
// this creates a string in the form:
// WHERE id IN (?, ?, ?, ...)
// so that it can be bound by SQLite.
// This is safer than trying to concatenate
// the IDs together.
std::string joined_ids = "";
if (!sub_ids.empty()) {
joined_ids = " WHERE "+id_column+" IN (?";
for (size_t i = 1; i < sub_ids.size(); i++) {
joined_ids += ", ?";
std::string joined_ids = "";
if (!ids.empty()) {
std::stringstream filter;
filter << " WHERE " << layer << '.' << id_column << " IN (";
for (size_t i = 0; i < ids.size(); ++i) {
if (i != 0)
filter << ',';
auto &filter_id = ids[i];
size_t sep_index = filter_id.find('-');
if (sep_index == std::string::npos) {
sep_index = 0;
} else {
sep_index++;
}
joined_ids += ")";
int id_num = std::atoi(filter_id.c_str() + sep_index);
if (id_num <= 0)
Logger::logMsgAndThrowError("Could not convert input " + layer + " ID into a number: " + filter_id);
filter << id_num;
}
filter << ')';
joined_ids = filter.str();
}

// Get number of features
auto query_get_layer_count = db.query("SELECT COUNT(*) FROM " + layer + joined_ids, sub_ids);
query_get_layer_count.next();
const int layer_feature_count = query_get_layer_count.get<int>(0);

#ifndef NGEN_QUIET
// output debug info on what is read exactly
read_ss << "Reading " << layer_feature_count << " features from layer " << layer << " using ID column `"<< id_column << "`";
if (!sub_ids.empty()) {
read_ss << " (id subset:";
for (auto& id : sub_ids) {
read_ss << " " << id;
}
read_ss << ")";
}
read_ss << std::endl;
LOG(read_ss.str(), LogLevel::DEBUG); read_ss.str("");
#endif

// Get layer feature metadata (geometry column name + type)
auto query_get_layer_geom_meta = db.query("SELECT column_name FROM gpkg_geometry_columns WHERE table_name = ?", layer);
query_get_layer_geom_meta.next();
const std::string layer_geometry_column = query_get_layer_geom_meta.get<std::string>(0);

// Get layer
LOG(LogLevel::DEBUG, "Reading %d features from layer %s.", layer_feature_count, layer.c_str());
auto query_get_layer = db.query("SELECT * FROM " + layer + joined_ids, sub_ids);
query_get_layer.next();

// build features out of layer query
if (ids.size() == 0)
features.reserve(layer_feature_count);
while(!query_get_layer.done()) {
geojson::Feature feature = build_feature(
query_get_layer,
id_column,
layer_geometry_column
);

features.push_back(feature);
query_get_layer.next();
}
// Get number of features
auto query_get_layer_count = db.query("SELECT COUNT(*) FROM " + layer + joined_ids);
query_get_layer_count.next();
const int layer_feature_count = query_get_layer_count.get<int>(0);
features.reserve(layer_feature_count);
if (!ids.empty() && ids.size() != layer_feature_count) {
LOG(LogLevel::WARNING, "The number of input IDs (%d) does not equal the number of features with those IDs in the geopackage (%d) for layer %s.",
ids.size(), layer_feature_count, layer.c_str());
}

// get layer bounding box from features
//
// GeoPackage contains a bounding box in the SQLite DB,
// however, it is in the SRS of the GPKG. By creating
// the bbox after the features are built, the projection
// is already done. This also should be fairly cheap to do.
for (const auto& feature : features) {
const auto& bbox = feature->get_bounding_box();
min_x = bbox[0] < min_x ? bbox[0] : min_x;
min_y = bbox[1] < min_y ? bbox[1] : min_y;
max_x = bbox[2] > max_x ? bbox[2] : max_x;
max_y = bbox[3] > max_y ? bbox[3] : max_y;
#ifndef NGEN_QUIET
// output debug info on what is read exactly
read_ss << "Reading " << layer_feature_count << " features from layer " << layer << " using ID column `"<< id_column << "`";
if (!ids.empty()) {
read_ss << " (id subset:";
for (auto& id : ids) {
read_ss << " " << id;
}
read_ss << ")";
}
read_ss << std::endl;
LOG(read_ss.str(), LogLevel::DEBUG); read_ss.str("");
#endif

// Get layer
LOG(LogLevel::DEBUG, "Reading %d features from layer %s.", layer_feature_count, layer.c_str());
auto query_get_layer = db.query(feature_query + joined_ids);
query_get_layer.next();

// build features out of layer query
while(!query_get_layer.done()) {
geojson::Feature feature = build_feature(
query_get_layer,
"id",
"geom"
);

features.push_back(feature);
query_get_layer.next();
}

// get layer bounding box from features
//
// GeoPackage contains a bounding box in the SQLite DB,
// however, it is in the SRS of the GPKG. By creating
// the bbox after the features are built, the projection
// is already done. This also should be fairly cheap to do.
double min_x = std::numeric_limits<double>::infinity();
double min_y = std::numeric_limits<double>::infinity();
double max_x = -std::numeric_limits<double>::infinity();
double max_y = -std::numeric_limits<double>::infinity();
for (const auto& feature : features) {
const auto& bbox = feature->get_bounding_box();
min_x = bbox[0] < min_x ? bbox[0] : min_x;
min_y = bbox[1] < min_y ? bbox[1] : min_y;
max_x = bbox[2] > max_x ? bbox[2] : max_x;
max_y = bbox[3] > max_y ? bbox[3] : max_y;
}

auto fc = std::make_shared<geojson::FeatureCollection>(
Expand Down
12 changes: 7 additions & 5 deletions src/partitionGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,12 @@ int main(int argc, char* argv[])
#if NGEN_WITH_SQLITE3
try {
catchment_collection = ngen::geopackage::read(catchmentDataFile, "divides", catchment_subset_ids);
} catch (...) {
} catch (std::exception &e) {
// Handle all exceptions
std::string msg = "Geopackage error occurred reading divides: " + catchmentDataFile;
LOG(msg,LogLevel::FATAL);
throw std::runtime_error(msg);
LOG(msg, LogLevel::FATAL);
LOG(LogLevel::FATAL, e.what());
throw;
}
#else
Logger::logMsgAndThrowError("SQLite3 support required to read GeoPackage files.");
Expand Down Expand Up @@ -470,11 +471,12 @@ int main(int argc, char* argv[])
#if NGEN_WITH_SQLITE3
try {
global_nexus_collection = ngen::geopackage::read(nexusDataFile, "nexus", nexus_subset_ids);
} catch (...) {
} catch (std::exception &e) {
// Handle all exceptions
std::string msg = "Geopackage error occurred reading nexuses: " + nexusDataFile;
LOG(msg,LogLevel::FATAL);
throw std::runtime_error(msg);
LOG(LogLevel::FATAL, e.what());
throw;
}
#else
Logger::logMsgAndThrowError("SQLite3 support required to read GeoPackage files.");
Expand Down
11 changes: 11 additions & 0 deletions src/realizations/catchment/Catchment_Formulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,17 @@ namespace realization {
// LOG(ss.str(), LogLevel::DEBUG);
}

std::string Catchment_Formulation::config_pattern_id_replacement(const std::string &id) {
size_t index = id.find_last_of('-');
if (index != std::string::npos && ++index < id.length()) {
// check if first character after the last hyphen is a number
if (static_cast<unsigned char>(id[index]) - static_cast<unsigned char>('0') <= 9) {
return id.substr(index);
}
}
return id;
}

std::string Catchment_Formulation::get_output_header_line(std::string delimiter) const {
return "Total Discharge";
}
Expand Down