From 810f7bb540db41348af072a4e9e6f648c89a54a7 Mon Sep 17 00:00:00 2001 From: Joseph White Date: Fri, 27 Feb 2026 09:55:08 -0500 Subject: [PATCH 1/3] Add ablility to specifiy the resource identifier --- configuration/etl/etl.d/ingest_resources.json | 3 ++- configuration/etl/etl.d/staging.json | 3 ++- .../common/staging/resource-config.json | 3 ++- .../etl_action_defs.d/common/staging/resource.json | 12 +++++++++++- .../etl/etl_schemas.d/common/resources.schema.json | 4 ++++ .../etl_tables.d/common/staging/resource-config.json | 6 ++++++ 6 files changed, 27 insertions(+), 4 deletions(-) diff --git a/configuration/etl/etl.d/ingest_resources.json b/configuration/etl/etl.d/ingest_resources.json index 1f11751559..7501843cb0 100644 --- a/configuration/etl/etl.d/ingest_resources.json +++ b/configuration/etl/etl.d/ingest_resources.json @@ -48,7 +48,8 @@ "shared_jobs", "timezone", "resource_allocation_type", - "organization" + "organization", + "resource_key" ] }, "destination": { diff --git a/configuration/etl/etl.d/staging.json b/configuration/etl/etl.d/staging.json index f8372de7f5..3dc4917bf6 100644 --- a/configuration/etl/etl.d/staging.json +++ b/configuration/etl/etl.d/staging.json @@ -155,7 +155,8 @@ "timezone", "resource_type", "resource_allocation_type", - "organization" + "organization", + "resource_key" ] } } diff --git a/configuration/etl/etl_action_defs.d/common/staging/resource-config.json b/configuration/etl/etl_action_defs.d/common/staging/resource-config.json index c123c820a3..eeca2555a8 100644 --- a/configuration/etl/etl_action_defs.d/common/staging/resource-config.json +++ b/configuration/etl/etl_action_defs.d/common/staging/resource-config.json @@ -11,7 +11,8 @@ "timezone": "timezone", "type_abbrev": "resource_type", "resource_allocation_type_abbrev": "resource_allocation_type", - "organization": "organization" + "organization": "organization", + "resource_key": "resource_key" } } } diff --git a/configuration/etl/etl_action_defs.d/common/staging/resource.json b/configuration/etl/etl_action_defs.d/common/staging/resource.json index e16e644e37..e20153bd2a 100644 --- a/configuration/etl/etl_action_defs.d/common/staging/resource.json +++ b/configuration/etl/etl_action_defs.d/common/staging/resource.json @@ -3,18 +3,28 @@ "#": "cloud resources since a primary key must be assigned to the resource", "#": "before the data for that resource is ingested and the cloud data", "#": "does not contain the name of the resource.", + "#": "If the resource is already in the table the resource_id stays the same. If not then", + "#": "the resource_key will used as the resource_id, else the autoincrement", "table_definition": { "$ref": "${table_definition_dir}/common/staging/resource.json#/table_definition" }, "source_query": { "records": { - "resource_name": "DISTINCT rc.resource" + "resource_id": "COALESCE(res.resource_id, rc.resource_key)", + "resource_name": "rc.resource" }, "joins": [ { "schema": "${SOURCE_SCHEMA}", "name": "staging_resource_config", "alias": "rc" + }, + { + "schema": "${SOURCE_SCHEMA}", + "name": "staging_resource", + "type": "LEFT", + "on": "rc.resource = res.resource_name", + "alias": "res" } ] } diff --git a/configuration/etl/etl_schemas.d/common/resources.schema.json b/configuration/etl/etl_schemas.d/common/resources.schema.json index 2f9998771b..44847ea3c7 100644 --- a/configuration/etl/etl_schemas.d/common/resources.schema.json +++ b/configuration/etl/etl_schemas.d/common/resources.schema.json @@ -45,6 +45,10 @@ "type": "string", "description": "The organization the resource belongs to", "maxLength": 100 + }, + "resource_key": { + "type": "number", + "description": "Optional: Numerical identifier for the resource in the database. For a typical XDMoD install this value need not be specified. Typical reasons why you might specify the identifier would be if you had data for the same resource in multiple XDMoD instances and you wanted the internal identifiers to match across all the separate databases." } }, "required": [ diff --git a/configuration/etl/etl_tables.d/common/staging/resource-config.json b/configuration/etl/etl_tables.d/common/staging/resource-config.json index 265ca84294..a57a3d3353 100644 --- a/configuration/etl/etl_tables.d/common/staging/resource-config.json +++ b/configuration/etl/etl_tables.d/common/staging/resource-config.json @@ -57,6 +57,12 @@ "type": "varchar(30)", "nullable": true, "comment": "Organization for the resource" + }, + { + "name": "resource_key", + "type": "int(11)", + "nullable": true, + "comment": "The numerical identifier for the resource in the XDMoD datawarehouse. For a typical XDMoD install this value need not be specified. Typical reasons why you might specify the identifier would be if you had data for the same resource in multiple XDMoD instances and you wanted the internal identifiers to match across all the separate databases." } ], "indexes": [ From 8c4270b214f9c56af71d5126272b33c6eeb00adb Mon Sep 17 00:00:00 2001 From: Joseph White Date: Fri, 27 Feb 2026 11:33:41 -0500 Subject: [PATCH 2/3] Don't bother running the jobs resource ingestor. It should have been removed way back when the common resource ingestor was added. Note you can't get jobs into the jobs staging table without having an entry in the shredder. --- configuration/etl/etl.d/staging.json | 5 ----- .../common/staging/resource.json | 3 +++ .../jobs/staging/resource.json | 17 ----------------- 3 files changed, 3 insertions(+), 22 deletions(-) delete mode 100644 configuration/etl/etl_action_defs.d/jobs/staging/resource.json diff --git a/configuration/etl/etl.d/staging.json b/configuration/etl/etl.d/staging.json index 3dc4917bf6..fea69b3a4c 100644 --- a/configuration/etl/etl.d/staging.json +++ b/configuration/etl/etl.d/staging.json @@ -207,11 +207,6 @@ "description": "Ingest job PIs", "definition_file": "jobs/staging/pi.json" }, - { - "name": "resource", - "description": "Ingest job resources", - "definition_file": "jobs/staging/resource.json" - }, { "name": "union-user-pi--pi", "description": "Ingest job PIs (combined with users)", diff --git a/configuration/etl/etl_action_defs.d/common/staging/resource.json b/configuration/etl/etl_action_defs.d/common/staging/resource.json index e20153bd2a..f05b1e2e99 100644 --- a/configuration/etl/etl_action_defs.d/common/staging/resource.json +++ b/configuration/etl/etl_action_defs.d/common/staging/resource.json @@ -26,6 +26,9 @@ "on": "rc.resource = res.resource_name", "alias": "res" } + ], + "orderby": [ + "rc.resource_id" ] } } diff --git a/configuration/etl/etl_action_defs.d/jobs/staging/resource.json b/configuration/etl/etl_action_defs.d/jobs/staging/resource.json deleted file mode 100644 index dc57150a5d..0000000000 --- a/configuration/etl/etl_action_defs.d/jobs/staging/resource.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "table_definition": { - "$ref": "${table_definition_dir}/common/staging/resource.json#/table_definition" - }, - "source_query": { - "records": { - "resource_name": "DISTINCT j.resource_name" - }, - "joins": [ - { - "name": "shredded_job", - "schema": "${SOURCE_SCHEMA}", - "alias": "j" - } - ] - } -} From d6ab045cdd45d3136134fa7afb68f0c03d1a59f4 Mon Sep 17 00:00:00 2001 From: Joseph White Date: Fri, 27 Feb 2026 12:56:19 -0500 Subject: [PATCH 3/3] update documentation --- docs/configuration.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/configuration.md b/docs/configuration.md index f45960d657..5da70539ec 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -542,6 +542,19 @@ to users. ] ``` +| Property | Required? | Type | Description | +| ------------------ | --------- | ------- | ------------ | +| resource | Yes | string | The string identifier for the resource. This is the value that will be used when runing the `xdmod-shredder` command. Typically this should be a short string without spaces | +| name | Yes | string | The human readable name of the resource. | +| resource_type | Yes | string | The type of the resource from the resource_types.json. Examples include "HPC" for batch computing resources, "Cloud" for infrastructure as a service resources, and "Disk" for data storage resources. | +| resource_allocation_type | Yes | string | The base unit of resource allocation, such as by CPU, GPU or Node. There are 4 possible values, CPU, CPUNode, GPU, and GPUNode. CPUNode denotes a resource that allocates nodes of CPUs to users, whereas CPU denotes a resource that allocates individual CPUs to users. | +| organization | Yes | string | The name of the organization for the resource. It should match the `abbrev` value from the organizations.json file. | +| description | No | string | Human-readable description of the resource. | +| pi_column | No | string | The name of the column in the resource specific job table to identify the PI. The column names that may be used with this feature must exist in the corresponding shredded_job_* table (e.g. shredded_job_pbs, shredded_job_slurm) of the mod_shredder database for the resource manager you are using. For example, to use accounts from PBS/TORQUE you must use "pi_column": "account", but to use accounts from Slurm you must use "pi_column": "account_name". The default is to use the group column | +| shared_jobs | No | boolean | Whether the option indicates that the resource allows multiple to share compute nodes. This information is used by the Job Performance Data (SUPReMM) module to determine which HPC jobs shared compute nodes. The default is that resources are assumed to not allow node sharing. If the SUPReMM module is in use and a resource does allow node sharing then this should be set to true. | +| timezone | No | string | The timezone of the resource. This is used in the Job Viewer to display job timestamps in the local timezone of the resource. This value should be a valid timezone identifier such as "America/New_York" and not the three letter abbreviation or hour offset. | +| resource_key | No | integer | Optional specify the internal identifier for the resource in the database. For a typical XDMoD install this value should not be specified. This option exists to support the scenario where there are multiple XDMoD instances (such as production and developement) and you wanted the internal identifiers to match across all the separate databases. | + ### resource_specs.json Defines resource node and processor counts. Each object in the array