diff --git a/flow/flow.cal.conv/flow.cal.conv.R b/flow/flow.cal.conv/flow.cal.conv.R
index 565e8b518..269ecc422 100644
--- a/flow/flow.cal.conv/flow.cal.conv.R
+++ b/flow/flow.cal.conv/flow.cal.conv.R
@@ -190,6 +190,14 @@
#' 12. "DirSubCopy=value" (optional), where value is the names of additional subfolders, separated by pipes, at
#' the same level as the data folder in the input path that are to be copied with a symbolic link to the
#' output path.
+#'
+#' 13. "nomVal=value" (optional). A named list of term-nominal value pairs that will be passed to the nominal calibration
+#' function def.cal.conv.nmnl.R., e.g. "nomVal:CVAL_B1".
+#' Multiple values separated by pipes (|). Use the format: "nomVal=name1:value1|name2:value2|name3:value3", etc.
+#'
+#' 14. "nomCalID=value" (optional). A named list of term-calibration ID pairs that will be passed to the nominal calibration
+#' function def.cal.conv.nmnl.R., e.g. "speed:CVAL_B1".
+#' Multiple values separated by pipes (|). Use the format: "nomCalID=term1:CalID1|term2:CalID2", etc.
#'
#' Note: This script implements logging described in \code{\link[NEONprocIS.base]{def.log.init}},
#' which uses system environment variables if available.
@@ -214,10 +222,12 @@
#' Rscript flow.cal.conv.R "DirIn=$DIR_IN" "DirOut=/pfs/out" "FileSchmData=/avro_schemas/dp0p/prt_calibrated.avsc" "FileSchmQf=/avro_schemas/dp0p/flags_calibration.avsc" ConvFuncTerm1=def.cal.conv.poly:resistance" "NumDayExpiMax=NA" "UcrtFuncTerm1=def.ucrt.meas.cnst:resistance","UcrtFuncTerm2=def.ucrt.fdas.rstc.poly:resistance"
#'
#' Stepping through the code in Rstudio
-#' Sys.setenv(DIR_IN='/scratch/pfs/prt_calibration_filter')
-#' log <- NEONprocIS.base::def.log.init(Lvl = "debug")
-#' arg <- c("DirIn=$DIR_IN", "DirOut=/scratch/pfs/out", "FileSchmData=/scratch/pfs/avro_schemas/dp0p/prt_calibrated.avsc", "FileSchmQf=/scratch/pfs/avro_schemas/dp0p/flags_calibration.avsc", ConvFuncTerm1=def.cal.conv.poly:resistance", "NumDayExpiMax=NA", "UcrtFuncTerm1=def.ucrt.meas.cnst:resistance,"UcrtFuncTerm2=def.ucrt.fdas.rstc.poly:resistance")
-#' # Then copy and paste rest of workflow into the command window
+# log <- NEONprocIS.base::def.log.init(Lvl = "debug")
+# Sys.setenv(DIR_IN='~/pfs/rmyoung_calibration_group_and_convert_test/rmyoung/2025/12/18')
+# arg <- c("DirIn=$DIR_IN", "DirOut=~/pfs/out","DirErr=~/pfs/out/errored_datums","ConvFuncTerm1=def.cal.conv.nmnl:speed","ConvFuncTerm2=def.cal.conv.nmnl:direction",
+# "TermQf=speed|direction","UcrtFuncTerm1=def.ucrt.meas.cnst:speed","UcrtFuncTerm1=def.ucrt.meas.cnst:direction","nomVal=speed:0.1666667|direction:355",
+# "nomCalID=speed:CVALB1|direction:CVALA1","FileSchmData=~/pfs/rmyoung_avro_schemas/rmyoung/rmyoung_calibrated.avsc" )
+
#' @seealso None currently
@@ -278,6 +288,8 @@
# in as metadata to specified calibration and uncertainty functions
# Refactor to allow greater flexibility in custom functions, like calibrating multiple
# variables in a single function call, creating new variables, etc.
+# Nora Catolico (2026-05-05)
+# Incorporate an optional input nomVal and nomCalID to use with the nominal calibration function
##############################################################################################
options(digits.secs = 3)
library(foreach)
@@ -313,6 +325,8 @@ Para <-
"FileSchmQf",
base::paste0("ConvFuncTerm",1:100),
"NumDayExpiMax",
+ "nomVal",
+ "nomCalID",
"TermQf",
base::paste0("UcrtFuncTerm",1:100),
"FileUcrtFdas",
@@ -339,6 +353,7 @@ log$debug(base::paste0('Terms to output calibration flags: ',
base::paste0(Para$TermQf, collapse = ',')
))
+
# Read in the schemas so we only have to do it once and not every
# time in the avro writer.
if (!base::is.null(Para$FileSchmData)) {
@@ -484,6 +499,62 @@ if (!base::is.null(Para$Meta) &&
# Add any FDAS uncertainty coefs to Meta
Meta$ucrtCoefFdas <- ucrtCoefFdas
+
+# Nominal calibration values
+if(!base::is.null(Para$nomVal) &&
+ base::length(Para$nomVal) %% 2 > 0){
+ log$fatal('Input argument PathnomVal must contain name:path pairs, separated by pipes.')
+ stop()
+}
+if (!base::is.null(Para$nomVal) &&
+ base::length(Para$nomVal) > 0) {
+ nomVal <-
+ NEONprocIS.base::def.vect.pars.pair(
+ vect = Para$nomVal,
+ NameCol = c('term', 'value'),
+ log = log
+ )
+ if(base::any(base::duplicated(nomVal$term))){
+ log$fatal('Names of nomVal argument must be unique (e.g. nomVal=term1:value1|term2:value2).')
+ stop()
+ }
+ log$debug(base::paste0(
+ 'Additional nomValdata for use in nominal calibration function: ',
+ paste0(Para$nomVal)
+ ))
+} else {
+ nomVal <- NULL
+ log$debug('Additional nomValdata for use in nominal calibration function: None')
+}
+
+#Nominal calibration IDs
+if(!base::is.null(Para$nomCalID) &&
+ base::length(Para$nomCalID) %% 2 > 0){
+ log$fatal('Input argument PathnomCalID must contain name:path pairs, separated by pipes.')
+ stop()
+}
+if (!base::is.null(Para$nomCalID) &&
+ base::length(Para$nomCalID) > 0) {
+ nomCalID <-
+ NEONprocIS.base::def.vect.pars.pair(
+ vect = Para$nomCalID,
+ NameCol = c('term', 'ID'),
+ log = log
+ )
+ if(base::any(base::duplicated(nomCalID$term))){
+ log$fatal('Names of nomCalID argument must be unique (e.g. nomCalID=term1:ID1|term2:ID2).')
+ stop()
+ }
+ log$debug(base::paste0(
+ 'Additional nomCalID data for use in nominal calibration function: ',
+ paste0(Para$nomCalID)
+ ))
+} else {
+ nomCalID <- NULL
+ log$debug('Additional nomCalID data for use in nominal calibration function: None')
+}
+
+
# Retrieve optional subdirectories to copy over
DirSubCopy <- base::unique(Para$DirSubCopy)
log$debug(base::paste0('Additional subdirectories to copy: ',base::paste0(DirSubCopy,collapse=',')))
@@ -521,6 +592,8 @@ foreach::foreach(idxDirIn = DirIn) %dopar% {
FuncConv=FuncConv,
FuncUcrt=FuncUcrt,
TermQf=Para$TermQf,
+ nomVal=nomVal,
+ nomCalID=nomCalID,
NumDayExpiMax=NumDayExpiMax,
SchmDataOutList=SchmDataOutList,
SchmQf=SchmQf,
diff --git a/flow/flow.cal.conv/wrap.cal.conv.R b/flow/flow.cal.conv/wrap.cal.conv.R
index c07fcb377..f69461190 100644
--- a/flow/flow.cal.conv/wrap.cal.conv.R
+++ b/flow/flow.cal.conv/wrap.cal.conv.R
@@ -70,6 +70,15 @@
#' see return information). Thus, ensure that the column names of data frames for the
#' same variable (list name) are unique. In the standard measurement and FDAS uncertainty functions,
#' the output list names will match the name of the L0 variable specified in \code{var}.\cr
+#'
+#' @param nomVal (optional). Parsed nominal calibration values, passed through from
+#' \code{flow.cal.conv.R}. This is not a single numeric value; custom callers should
+#' supply the same table/data-frame structure produced upstream for nominal calibration.
+#'
+#' @param nomCalID (optional). Parsed nominal calibration identifiers, passed through
+#' from \code{flow.cal.conv.R}. This is not a single character string; custom callers
+#' should supply the corresponding table/data-frame structure used with
+#' \code{nomVal} for nominal calibration lookups.
#'
#' @param TermQf (optional) A character vector of L0 terms/variables for which to provide calibration
#' flags. For example, if calibration information is expected for the terms "resistance" and
@@ -220,6 +229,8 @@ wrap.cal.conv <- function(DirIn,
FuncConv=NULL,
FuncUcrt=NULL,
TermQf=NULL,
+ nomVal=NULL,
+ nomCalID=NULL,
NumDayExpiMax=NA,
SchmDataOutList=NULL,
SchmQf=NULL,
@@ -408,6 +419,8 @@ wrap.cal.conv <- function(DirIn,
data = data,
calSlct = calSlct,
FuncConv = FuncConv,
+ nomVal = nomVal,
+ nomCalID = nomCalID,
Meta = Meta,
log = log
)
diff --git a/modules_combined/calibration_group_and_convert/Dockerfile b/modules_combined/calibration_group_and_convert/Dockerfile
index 78b956954..49ec08978 100644
--- a/modules_combined/calibration_group_and_convert/Dockerfile
+++ b/modules_combined/calibration_group_and_convert/Dockerfile
@@ -3,7 +3,7 @@
# docker build -t neon-is-cal-grp-conv -f ./modules_combined/calibration_group_and_convert/Dockerfile .
# Start with the calibration package image.
-FROM us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-pack-cal-r:v3.1.4
+FROM us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-pack-cal-r:sha-9e19295
# maintainer handle
diff --git a/pack/NEONprocIS.cal/NAMESPACE b/pack/NEONprocIS.cal/NAMESPACE
index 8125228a0..677409541 100755
--- a/pack/NEONprocIS.cal/NAMESPACE
+++ b/pack/NEONprocIS.cal/NAMESPACE
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand
export(def.cal.conv.enviro.multi.out)
+export(def.cal.conv.nmnl)
export(def.cal.conv.poly)
export(def.cal.conv.poly.a0.as.a1)
export(def.cal.conv.poly.aepg600m)
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.enviro.multi.out.R b/pack/NEONprocIS.cal/R/def.cal.conv.enviro.multi.out.R
index 98f3a25de..ccf5f896b 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.enviro.multi.out.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.enviro.multi.out.R
@@ -29,6 +29,18 @@
#' @param Meta (Optional) List object containing additional metadata for use in
#' this function as needed. Defaults to an empty list, but this example requires that the list
#' item Meta$Locations is input to work properly.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -68,11 +80,15 @@
# Teresa Burlingame (2026-01-20)
# adjusting code so that depth variable is more robust to missing calibration files
# moving csv read so that it is not read over and over again.
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.enviro.multi.out <- function(data = data.frame(data=base::numeric(0)),
varConv = setdiff(base::names(data),c('source_id','site_id','readout_time')),
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.nmnl.R b/pack/NEONprocIS.cal/R/def.cal.conv.nmnl.R
new file mode 100644
index 000000000..0cbb831f4
--- /dev/null
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.nmnl.R
@@ -0,0 +1,192 @@
+##############################################################################################
+#' @title Convert nominal cal to calibrated data
+
+#' @author
+#' Kaelin Cawley \email{kcawley@battelleecology.org}
+
+#' @description
+#' Definition function. Apply NEON calibration CVALB1 to convert nominal data to calibrated data.
+
+#' @param data Data frame of nominally calibrated sensor readings. This data frame must have
+#' a column called "readout_time" with POSIXct timestamps
+
+#' @param nomVal A numeric value used for nominal calibration.
+
+#' @param nomCalID A character string that identifies the calibration value that should be used, e.g. CVAL_B1
+#'
+#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
+#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+
+#' @param varConv A character string of the target variables (columns) in the data frame \code{data} for
+#' which calibrated output will be computed (all other columns will be ignored). Defaults to the first
+#' column in \code{data}.
+
+#' @param calSlct A named list of data frames, each list element corresponding to a
+#' variable (column) to calibrate. The data frame in each list element holds
+#' information about the calibration files and time periods that apply to the variable,
+#' as returned from NEONprocIS.cal::def.cal.slct. See documentation for that function.
+
+#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
+#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
+#' created and used within the function.
+
+#' @return The input data frame, with the columns specified in input \code{varConv} updated with
+#' calibrations applied.
+
+#' @references
+#' License: (example) GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007
+#' NEON.DOC.000785 TIS Calibrated Measurements and Level 1 Data Products Uncertainty Budget Plan
+
+#' @keywords Currently none
+
+#' @examples
+#' Not Run
+#' data=data.frame(readout_time=as.POSIXct('2025-01-01','2025-01-02','2025-01-03'),var1=c(1,2,3),var2=c(4,5,6))
+#' calSlct <- NEONprocIS.cal::wrap.cal.slct(
+#' DirCal = '/path/to/calibration/files',
+#' NameVarExpc = c('var1','var2'),
+#' TimeBgn = as.POSIXct('2025-01-01'),
+#' TimeEnd = as.POSIXct('2025-01-04'),
+#' )
+#' dataCal <- def.cal.conv.nmnl(data=data,nomVal=c(15/90,355),nomCalID=c(CVAL_A1,CVAL_B1),varConv=c('var1','var2'),calSlct=calSlct)
+
+#' @seealso \link[NEONprocIS.cal]{def.read.cal.xml}
+#' @seealso \link[NEONprocIS.cal]{def.cal.conv.poly}
+#' @seealso \link[NEONprocIS.cal]{def.cal.conv.poly.m}
+#' @seealso \link[NEONprocIS.cal]{wrap.cal.conv.dp0p}
+
+#' @export
+
+# changelog and author contributions / copyrights
+# Cove Sturtevant (2020-07-28)
+# original creation, from def.cal.conv.poly
+# Kaelin Cawley (2026-02-06)
+# created new function to be used for RMyoung 05108 buoy wind speed data
+# Nora Catolico (2026-05-05)
+# updates to work with cal package
+##############################################################################################
+# # For Testing flow.cal.conv.R with this function
+# setwd("/home/NEON/kcawley/NEON-IS-data-processing/flow/flow.cal.conv")
+# # FileSchmData=$FILE_SCHEMA_DATA_AQUATROLL
+# # FileSchmQf=$FILE_SCHEMA_FLAGS_AQUATROLL
+# DirSubCopy=flags
+#
+# Sys.setenv(DIR_IN='/scratch/pfs/rmyoung_calibration_group_and_convert_test')
+# log <- NEONprocIS.base::def.log.init(Lvl = "debug")
+# NumDayExpiMax = as.data.frame(matrix(data = c("speed",365), nrow = 1, ncol = 2))
+# names(NumDayExpiMax) <- c('var','NumDayExpiMax')
+# NumDayExpiMax$NumDayExpiMax <- as.numeric(NumDayExpiMax$NumDayExpiMax)
+# arg <- c(DirIn=$DIR_IN,
+# DirOut="/pfs/out",
+# dirErr="/pfs/out/errored_datums",
+# ConvFuncTerm1=def.cal.conv.nmnl:speed,
+# NumDayExpiMax = NumDayExpiMax,
+# UcrtFuncTerm1=def.ucrt.meas.cnst:speed)
+# # Then copy and paste rest of workflow into the command window
+#
+#
+# # For Testing speed calibration
+# data = NEONprocIS.base::def.read.parq(NameFile = '~/pfs/rmyoung_data_source_trino/rmyoung/2025/12/14/32356/data/rmyoung_32356_2025-12-14.parquet')
+# nomVal = 15/90
+# nomCalID = 'CVAL_B1'
+# varConv = base::names(data)[4] #speed
+#
+# calSlct <- NEONprocIS.cal::wrap.cal.slct(
+# DirCal = '~/pfs/rmyoung_calibration_group_and_convert_test/rmyoung/2025/12/14/32356/calibration',
+# NameVarExpc = c('speed'),
+# TimeBgn = as.POSIXct('2025-12-13'),
+# TimeEnd = as.POSIXct('2025-12-15'),
+# NumDayExpiMax = NumDayExpiMax
+# )
+# log = NULL
+##############################################################################################
+def.cal.conv.nmnl <- function(data = data.frame(data=base::numeric(0)),
+ nomVal,
+ nomCalID,
+ varConv = base::names(data)[1],
+ calSlct = NULL,
+ Meta=list(),
+ log = NULL) {
+ # Intialize logging if needed
+ if (base::is.null(log)) {
+ log <- NEONprocIS.base::def.log.init()
+ }
+
+ # Ensure input is data frame with variables to be calibrated
+ chk <- NEONprocIS.base::def.validate.dataframe(dfIn=data,TestNameCol=c(varConv,'readout_time'),TestEmpty=FALSE, log = log)
+ if (!chk) {
+ stop()
+ }
+
+ # Basic starting info
+ timeMeas <- data$readout_time
+
+ if(!("POSIXt" %in% base::class(timeMeas))){
+ log$error('Variable readout_time must be of class POSIXt')
+ stop()
+ }
+
+ # Run through the variable to be calibrated
+ # Check to see if data to be calibrated is a numeric array
+ chk <-
+ NEONprocIS.base::def.validate.vector(data[[varConv]], TestEmpty = FALSE, TestNumc = TRUE, log = log)
+ if (!chk) {
+ stop()
+ }
+
+ # Pull cal file info for this variable and initialize the output
+ calSlctIdx <- calSlct[[varConv]]
+ dataConvIdx <- data[[varConv]]
+ dataConvOutIdx <- as.numeric(NA)*dataConvIdx
+
+ # Skip calibration if no cal info supplied
+ if(base::is.null(calSlctIdx)){
+ log$warn(base::paste0('No applicable calibration files available for ',varConv, '. Returning NA for calibrated output.'))
+ calSlctIdx <- base::data.frame()
+ }
+
+ #retrieve appropriate nominal value and cal ID
+ nomValIdx <- as.numeric(nomVal$value[nomVal$term==varConv])
+ nomCalIDIdx <- nomCalID$ID[nomCalID$term==varConv]
+
+ # Run through each calibration file and apply the calibration function for the applicable time period
+ for(idxRow in base::seq_len(base::nrow(calSlctIdx))){
+
+ # What records in the data correspond to this cal file?
+ setCal <- timeMeas >= calSlctIdx$timeBgn[idxRow] & timeMeas < calSlctIdx$timeEnd[idxRow]
+
+ # If a calibration file is available for this period, open it and get calibration information
+ if(!base::is.na(calSlctIdx$file[idxRow])){
+ fileCal <- base::paste0(calSlctIdx$path[idxRow],calSlctIdx$file[idxRow])
+ infoCal <- NEONprocIS.cal::def.read.cal.xml(NameFile=fileCal,Vrbs=TRUE,log=log)
+ } else {
+ infoCal <- NULL
+ }
+
+ # If infoCal is NULL, return NA data
+ if (is.null(infoCal)) {
+ dataConvOutIdx[setCal] <- as.numeric(NA)
+ next
+ }
+
+ # Remove the nominal value only for records covered by this calibration file
+ dataConvOutIdx[setCal] <- data[[varConv]][setCal]/nomValIdx
+
+ # Apply the value associated with the nomCalID only to this calibration period
+ dataConvOutIdx[setCal] <- dataConvOutIdx[setCal] *
+ as.numeric(infoCal$cal$Value[infoCal$cal$Name==nomCalIDIdx])
+
+ } # End loop around calibration files
+
+ # Add calibrated data and retain raw data
+ currNames <- names(data)
+ nameToAdd <- paste0(varConv,"Calibrated")
+
+ data[[ncol(data)+1]] <- NA
+ names(data) <- c(currNames,nameToAdd)
+
+ data[[nameToAdd]] <- dataConvOutIdx
+
+ return(data)
+
+}
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.poly.R b/pack/NEONprocIS.cal/R/def.cal.conv.poly.R
index 9604a299d..76c6b7809 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.poly.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.poly.R
@@ -22,6 +22,12 @@
#'
#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -80,11 +86,15 @@
# Cove Sturtevant (2025-08-10)
# Refactor to loop through applicable calibration files within this function
# Also enable multiple variables to be calibrated with this function call
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.poly <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.poly.a0.as.a1.R b/pack/NEONprocIS.cal/R/def.cal.conv.poly.a0.as.a1.R
index 453c96e39..3a4cdce4a 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.poly.a0.as.a1.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.poly.a0.as.a1.R
@@ -24,6 +24,12 @@
#'
#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -63,11 +69,15 @@
# Cove Sturtevant (2025-08-10)
# Refactor to loop through applicable calibration files within this function
# Also enable multiple variables to be calibrated with this function call
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.poly.a0.as.a1 <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.poly.aepg600m.R b/pack/NEONprocIS.cal/R/def.cal.conv.poly.aepg600m.R
index d613f5d50..5935a90f4 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.poly.aepg600m.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.poly.aepg600m.R
@@ -24,6 +24,12 @@
#'
#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -64,11 +70,15 @@
# Cove Sturtevant (2025-08-10)
# Refactor to loop through applicable calibration files within this function
# Also enable multiple variables to be calibrated with this function call
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.poly.aepg600m <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.poly.b.R b/pack/NEONprocIS.cal/R/def.cal.conv.poly.b.R
index f2ad4c561..a6dc6a89e 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.poly.b.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.poly.b.R
@@ -22,6 +22,12 @@
#'
#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -66,11 +72,15 @@
# Cove Sturtevant (2025-08-10)
# Refactor to loop through applicable calibration files within this function
# Also enable multiple variables to be calibrated with this function call
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.poly.b <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.poly.h.R b/pack/NEONprocIS.cal/R/def.cal.conv.poly.h.R
index e5dd5e4eb..56d7311b6 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.poly.h.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.poly.h.R
@@ -22,6 +22,12 @@
#'
#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -60,11 +66,15 @@
# Cove Sturtevant (2025-08-10)
# Refactor to loop through applicable calibration files within this function
# Also enable multiple variables to be calibrated with this function call
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.poly.h <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.poly.l.R b/pack/NEONprocIS.cal/R/def.cal.conv.poly.l.R
index 4d83b446c..9cd3e7b85 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.poly.l.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.poly.l.R
@@ -22,6 +22,12 @@
#'
#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -60,11 +66,15 @@
# Cove Sturtevant (2025-08-10)
# Refactor to loop through applicable calibration files within this function
# Also enable multiple variables to be calibrated with this function call
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.poly.l <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.poly.m.R b/pack/NEONprocIS.cal/R/def.cal.conv.poly.m.R
index d52111592..02a8c6904 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.poly.m.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.poly.m.R
@@ -22,6 +22,12 @@
#'
#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -65,11 +71,15 @@
# Cove Sturtevant (2025-08-10)
# Refactor to loop through applicable calibration files within this function
# Also enable multiple variables to be calibrated with this function call
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.poly.m <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.poly.split.R b/pack/NEONprocIS.cal/R/def.cal.conv.poly.split.R
index 49d4e0337..86be91d53 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.poly.split.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.poly.split.R
@@ -22,6 +22,12 @@
#'
#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -62,11 +68,15 @@
# Cove Sturtevant (2025-08-10)
# Refactor to loop through applicable calibration files within this function
# Also enable multiple variables to be calibrated with this function call
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.poly.split <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.swc.test.R b/pack/NEONprocIS.cal/R/def.cal.conv.swc.test.R
index 925a90ed3..86fc1f6a9 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.swc.test.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.swc.test.R
@@ -20,8 +20,14 @@
#' information about the calibration files and time periods that apply to the variable,
#' as returned from NEONprocIS.cal::def.cal.slct. See documentation for that function.
#'
-#' @param Meta (Optional) List object containing additional metadata for use in
-#' this function as needed. Defaults to an empty list.
+#' @param Meta Unused in this function. Defaults to an empty list. See the inputs to
+#' NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -51,11 +57,15 @@
# changelog and author contributions / copyrights
# Cove Sturtevant (2025-08-08)
# original creation
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.swc.test <- function(data = data.frame(data=base::numeric(0)),
varConv = base::names(data)[1],
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/def.cal.conv.test.multi.out.R b/pack/NEONprocIS.cal/R/def.cal.conv.test.multi.out.R
index 4aac64024..b8e41a3f6 100644
--- a/pack/NEONprocIS.cal/R/def.cal.conv.test.multi.out.R
+++ b/pack/NEONprocIS.cal/R/def.cal.conv.test.multi.out.R
@@ -26,6 +26,12 @@
#' @param Meta (Optional) List object containing additional metadata for use in
#' this function as needed. Defaults to an empty list, but this example requires that the list
#' item Meta$Locations is input to work properly.
+#'
+#' @param nomVal Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
+#'
+#' @param nomCalID Unused in this function. Defaults to NULL. See the inputs to
+#' NEONprocIS.cal::def.cal.conv.nmnl for what this input is.
#'
#' @param log A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
#' output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
@@ -56,11 +62,15 @@
# changelog and author contributions / copyrights
# Cove Sturtevant (2025-10-08)
# original creation
+# Nora Catolico (2026-05-05)
+# Add nomVal and nomCalID - unused in this function
##############################################################################################
def.cal.conv.test.multi.out <- function(data = data.frame(data=base::numeric(0)),
varConv = setdiff(base::names(data),c('source_id','site_id','readout_time')),
calSlct=NULL,
Meta=list(),
+ nomVal=NULL,
+ nomCalID=NULL,
log = NULL) {
# Intialize logging if needed
if (base::is.null(log)) {
diff --git a/pack/NEONprocIS.cal/R/wrap.cal.conv.dp0p.R b/pack/NEONprocIS.cal/R/wrap.cal.conv.dp0p.R
index a7840f672..1873d81cb 100644
--- a/pack/NEONprocIS.cal/R/wrap.cal.conv.dp0p.R
+++ b/pack/NEONprocIS.cal/R/wrap.cal.conv.dp0p.R
@@ -26,6 +26,15 @@
#' as the calibration conversion function is able to handle the case, for example if multiple L0 terms are used to create
#' a single calibrated output. \cr
#'
+#' @param nomVal (optional). Parsed nominal calibration value table forwarded from
+#' \code{flow.cal.conv}. This is not limited to a single numeric value; it should be
+#' provided in the tabular form expected by downstream nominal calibration functions.
+#'
+#' @param nomCalID (optional). Parsed nominal calibration identifier table forwarded
+#' from \code{flow.cal.conv}. This is not limited to a single character string; it
+#' should be provided in the tabular form expected by downstream nominal calibration
+#' functions.
+#'
#' @param Meta (optional). A named list (default is an empty list) containing additional metadata to pass to
#' calibration and uncertainty functions. This can contain whatever information might be needed in the
#' calibration and/or uncertainty functions in addition to calibration and uncertainty information.
@@ -68,6 +77,8 @@
wrap.cal.conv.dp0p <- function(data,
calSlct,
FuncConv,
+ nomVal=NULL,
+ nomCalID=NULL,
Meta=list(),
log=NULL){
# initialize logging if necessary
@@ -89,6 +100,8 @@ wrap.cal.conv.dp0p <- function(data,
data <- base::do.call(FuncConvIdx,args=base::list(data=data,
varConv=varConvIdx,
calSlct=calSlct,
+ nomVal=nomVal,
+ nomCalID=nomCalID,
Meta=Meta,
log=log)
)
diff --git a/pack/NEONprocIS.cal/data-raw/DATASET.R b/pack/NEONprocIS.cal/data-raw/DATASET.R
new file mode 100644
index 000000000..6514a9443
--- /dev/null
+++ b/pack/NEONprocIS.cal/data-raw/DATASET.R
@@ -0,0 +1,3 @@
+## code to prepare `DATASET` dataset goes here
+
+usethis::use_data(DATASET, overwrite = TRUE)
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.enviro.multi.out.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.enviro.multi.out.Rd
index 03806d1a0..eb89a4b7f 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.enviro.multi.out.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.enviro.multi.out.Rd
@@ -9,6 +9,8 @@ def.cal.conv.enviro.multi.out(
varConv = setdiff(base::names(data), c("source_id", "site_id", "readout_time")),
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -29,6 +31,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
this function as needed. Defaults to an empty list, but this example requires that the list
item Meta$Locations is input to work properly.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.nmnl.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.nmnl.Rd
new file mode 100644
index 000000000..fe420e590
--- /dev/null
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.nmnl.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/def.cal.conv.nmnl.R
+\name{def.cal.conv.nmnl}
+\alias{def.cal.conv.nmnl}
+\title{Convert nominal cal to calibrated data}
+\usage{
+def.cal.conv.nmnl(
+ data = data.frame(data = base::numeric(0)),
+ nomVal,
+ nomCalID,
+ varConv = base::names(data)[1],
+ calSlct = NULL,
+ Meta = list(),
+ log = NULL
+)
+}
+\arguments{
+\item{data}{Data frame of nominally calibrated sensor readings. This data frame must have
+a column called "readout_time" with POSIXct timestamps}
+
+\item{nomVal}{A numeric value used for nominal calibration.}
+
+\item{nomCalID}{A character string that identifies the calibration value that should be used, e.g. CVAL_B1}
+
+\item{varConv}{A character string of the target variables (columns) in the data frame \code{data} for
+which calibrated output will be computed (all other columns will be ignored). Defaults to the first
+column in \code{data}.}
+
+\item{calSlct}{A named list of data frames, each list element corresponding to a
+variable (column) to calibrate. The data frame in each list element holds
+information about the calibration files and time periods that apply to the variable,
+as returned from NEONprocIS.cal::def.cal.slct. See documentation for that function.}
+
+\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
+NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+
+\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
+output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
+created and used within the function.}
+}
+\value{
+The input data frame, with the columns specified in input \code{varConv} updated with
+calibrations applied.
+}
+\description{
+Definition function. Apply NEON calibration CVALB1 to convert nominal data to calibrated data.
+}
+\examples{
+Not Run
+data=data.frame(readout_time=as.POSIXct('2025-01-01','2025-01-02','2025-01-03'),var1=c(1,2,3),var2=c(4,5,6))
+calSlct <- NEONprocIS.cal::wrap.cal.slct(
+ DirCal = '/path/to/calibration/files',
+ NameVarExpc = c('var1','var2'),
+ TimeBgn = as.POSIXct('2025-01-01'),
+ TimeEnd = as.POSIXct('2025-01-04'),
+ )
+dataCal <- def.cal.conv.nmnl(data=data,nomVal=c(15/90,355),nomCalID=c(CVAL_A1,CVAL_B1),varConv=c('var1','var2'),calSlct=calSlct)
+}
+\references{
+License: (example) GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007
+NEON.DOC.000785 TIS Calibrated Measurements and Level 1 Data Products Uncertainty Budget Plan
+}
+\seealso{
+\link[NEONprocIS.cal]{def.read.cal.xml}
+
+\link[NEONprocIS.cal]{def.cal.conv.poly}
+
+\link[NEONprocIS.cal]{def.cal.conv.poly.m}
+
+\link[NEONprocIS.cal]{wrap.cal.conv.dp0p}
+}
+\author{
+Kaelin Cawley \email{kcawley@battelleecology.org}
+}
+\keyword{Currently}
+\keyword{none}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.poly.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.poly.Rd
index fd4fa5e8f..0ba4eb1d5 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.poly.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.poly.Rd
@@ -9,6 +9,8 @@ def.cal.conv.poly(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -28,6 +30,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.poly.a0.as.a1.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.poly.a0.as.a1.Rd
index 55d0fd36f..6f34fc4bc 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.poly.a0.as.a1.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.poly.a0.as.a1.Rd
@@ -9,6 +9,8 @@ def.cal.conv.poly.a0.as.a1(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -28,6 +30,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.poly.aepg600m.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.poly.aepg600m.Rd
index 93ff5f3ba..e6c4263c2 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.poly.aepg600m.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.poly.aepg600m.Rd
@@ -10,6 +10,8 @@ def.cal.conv.poly.aepg600m(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -29,6 +31,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.poly.b.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.poly.b.Rd
index e7f2306c9..eb81c8ea7 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.poly.b.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.poly.b.Rd
@@ -9,6 +9,8 @@ def.cal.conv.poly.b(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -28,6 +30,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.poly.h.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.poly.h.Rd
index d4e63140a..770eccff5 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.poly.h.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.poly.h.Rd
@@ -9,6 +9,8 @@ def.cal.conv.poly.h(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -28,6 +30,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.poly.l.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.poly.l.Rd
index 0760badd4..57517ee08 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.poly.l.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.poly.l.Rd
@@ -9,6 +9,8 @@ def.cal.conv.poly.l(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -28,6 +30,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.poly.m.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.poly.m.Rd
index f0a9e2eba..0a182900d 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.poly.m.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.poly.m.Rd
@@ -9,6 +9,8 @@ def.cal.conv.poly.m(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -28,6 +30,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.poly.split.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.poly.split.Rd
index aa4627b2d..dc57c38d7 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.poly.split.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.poly.split.Rd
@@ -9,6 +9,8 @@ def.cal.conv.poly.split(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -28,6 +30,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.swc.test.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.swc.test.Rd
index 23d76c1e3..d02c220e7 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.swc.test.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.swc.test.Rd
@@ -9,6 +9,8 @@ def.cal.conv.swc.test(
varConv = base::names(data)[1],
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -25,8 +27,14 @@ variable (column) to calibrate. The data frame in each list element holds
information about the calibration files and time periods that apply to the variable,
as returned from NEONprocIS.cal::def.cal.slct. See documentation for that function.}
-\item{Meta}{(Optional) List object containing additional metadata for use in
-this function as needed. Defaults to an empty list.}
+\item{Meta}{Unused in this function. Defaults to an empty list. See the inputs to
+NEONprocIS.cal::wrap.cal.conv.dp0p for what this input is.}
+
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
diff --git a/pack/NEONprocIS.cal/man/def.cal.conv.test.multi.out.Rd b/pack/NEONprocIS.cal/man/def.cal.conv.test.multi.out.Rd
index e8ff53b96..ef6ce9577 100644
--- a/pack/NEONprocIS.cal/man/def.cal.conv.test.multi.out.Rd
+++ b/pack/NEONprocIS.cal/man/def.cal.conv.test.multi.out.Rd
@@ -9,6 +9,8 @@ def.cal.conv.test.multi.out(
varConv = setdiff(base::names(data), c("source_id", "site_id", "readout_time")),
calSlct = NULL,
Meta = list(),
+ nomVal = NULL,
+ nomCalID = NULL,
log = NULL
)
}
@@ -29,6 +31,12 @@ as returned from NEONprocIS.cal::def.cal.slct. See documentation for that functi
this function as needed. Defaults to an empty list, but this example requires that the list
item Meta$Locations is input to work properly.}
+\item{nomVal}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
+\item{nomCalID}{Unused in this function. Defaults to NULL. See the inputs to
+NEONprocIS.cal::def.cal.conv.nmnl for what this input is.}
+
\item{log}{A logger object as produced by NEONprocIS.base::def.log.init to produce structured log
output in addition to standard R error messaging. Defaults to NULL, in which the logger will be
created and used within the function.}
diff --git a/pack/NEONprocIS.cal/man/wrap.cal.conv.dp0p.Rd b/pack/NEONprocIS.cal/man/wrap.cal.conv.dp0p.Rd
index 62ef0701d..dbba19598 100644
--- a/pack/NEONprocIS.cal/man/wrap.cal.conv.dp0p.Rd
+++ b/pack/NEONprocIS.cal/man/wrap.cal.conv.dp0p.Rd
@@ -4,7 +4,15 @@
\alias{wrap.cal.conv.dp0p}
\title{Wrapper for applying calibration conversion to NEON L0 data}
\usage{
-wrap.cal.conv.dp0p(data, calSlct, FuncConv, Meta = list(), log = NULL)
+wrap.cal.conv.dp0p(
+ data,
+ calSlct,
+ FuncConv,
+ nomVal = NULL,
+ nomCalID = NULL,
+ Meta = list(),
+ log = NULL
+)
}
\arguments{
\item{data}{Data frame of L0 data. Must include POSIXct time variable readout_time.}
@@ -25,6 +33,15 @@ by pipes (e.g. "resistance|voltage") or no term at all (indicated by an NA). The
as the calibration conversion function is able to handle the case, for example if multiple L0 terms are used to create
a single calibrated output. \cr}
+\item{nomVal}{(optional). Parsed nominal calibration value table forwarded from
+\code{flow.cal.conv}. This is not limited to a single numeric value; it should be
+provided in the tabular form expected by downstream nominal calibration functions.}
+
+\item{nomCalID}{(optional). Parsed nominal calibration identifier table forwarded
+from \code{flow.cal.conv}. This is not limited to a single character string; it
+should be provided in the tabular form expected by downstream nominal calibration
+functions.}
+
\item{Meta}{(optional). A named list (default is an empty list) containing additional metadata to pass to
calibration and uncertainty functions. This can contain whatever information might be needed in the
calibration and/or uncertainty functions in addition to calibration and uncertainty information.}
diff --git a/pack/NEONprocIS.cal/tests/testthat/calibrations/nominal/calibration_nominal.xml b/pack/NEONprocIS.cal/tests/testthat/calibrations/nominal/calibration_nominal.xml
new file mode 100644
index 000000000..6239f3cd2
--- /dev/null
+++ b/pack/NEONprocIS.cal/tests/testthat/calibrations/nominal/calibration_nominal.xml
@@ -0,0 +1,135 @@
+
+
+ 32356
+
+
+
+
+ 21000000000084
+ 10000000100349
+ 1.0.0
+ 1.0
+ 0
+
+
+
+ 22.70
+ 22.90
+ 22.80
+ 0.07
+
+
+ 845.80
+ 846.10
+ 845.92
+ 0.06
+
+
+ 39.30
+ 41.00
+ 40.51
+ 0.30
+
+
+
+ CVALB1
+ 0.166635
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 100000000025811000000000254130000000012412
+ 0
+
+ U_CVALD3
+ 26
+
+ U_CVALD2
+ 60
+
+ U_CVALD1
+ 74
+
+ U_CVALA3
+ 0.061643
+
+ U_CVALA2
+ 0.071085
+
+ U_CVALA1
+ 0.094090
+
+ 0.060532
+
+ 2025-11-12T19:07:03.7630
+ 2025-11-12T19:29:07.3845
+
+ 411595
+
+
+ 2025-11-12T19:29:07.3845
+ 2027-01-06T19:29:07.3845
+
+
+
+
+
+
+
+
+ 30000000001133
+ smatthews
+ WO86473
+ 32356
+ 30000000016532
+
+ L1B300
+ 0348380000
+
+
+
+ L1B300
+
+
+ 200.000000,9.418275,0.088164,0.000000,0.000000,0.000000
+300.000000,14.529920,0.029079,0.000000,0.000000,0.000000
+400.000000,20.493370,0.374879,0.000000,0.000000,0.000000
+500.000000,25.317856,0.426593,0.000000,0.000000,0.000000
+600.000000,29.989468,0.078107,0.000000,0.000000,0.000000
+700.000000,34.995853,0.169558,0.000000,0.000000,0.000000
+800.000000,39.887359,0.269881,0.000000,0.000000,0.000000
+900.000000,44.985617,0.279626,0.000000,0.000000,0.000000
+1000.000000,49.995250,0.057281,0.000000,0.000000,0.000000
+2000.000000,99.555131,0.064226,0.000000,0.000000,0.000000
+3000.000000,150.013196,0.096637,0.000000,0.000000,0.000000
+4000.000000,200.061832,0.204746,0.000000,0.000000,0.000000
+5000.000000,249.998423,0.195165,0.000000,0.000000,0.000000
+NaN,0.000000,0.000000,0.000000,0.000000,0.000000
+3845819310.259780,22.800000,40.300000,846.000000,200.000000,10.072919
+3845819311.496517,22.800000,40.300000,846.000000,200.000000,9.425051
+
+ 2.060000,0.011650,0.400000,0.364000,0.000000,0.000000,5,m/s
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/pack/NEONprocIS.cal/tests/testthat/test-cal-conv-nmnl.R b/pack/NEONprocIS.cal/tests/testthat/test-cal-conv-nmnl.R
new file mode 100644
index 000000000..5e9bb3b19
--- /dev/null
+++ b/pack/NEONprocIS.cal/tests/testthat/test-cal-conv-nmnl.R
@@ -0,0 +1,167 @@
+##############################################################################################
+#' @title Unit test of NEON nominal calibration conversion
+
+#' @author
+#' Nora Catolico \email{ncatolico@battelleecology.org}
+
+#' @description
+#' Run unit tests for nominal calibration conversion function. The unit tests include positive and negative scenarios.
+#' The positive test is for a case when all the params to the function are valid
+#' The negative tests are when a param(s) is empty or does not have invalid values
+
+#' @param data Data frame of nominally calibrated sensor readings. This data frame must have
+#' a column called "readout_time" with POSIXct timestamps
+#' @param nomVal A numeric value used for nominal calibration.
+#' @param nomCalID A character string that identifies the calibration value that should be used, e.g. CVAL_B1
+#' @param varConv A character string of the target variables (columns) in the data frame \code{data} for
+#' which calibrated output will be computed (all other columns will be ignored). Defaults to the first
+#' column in \code{data}.
+#' @param calSlct A named list of data frames, each list element corresponding to a
+#' variable (column) to calibrate. The data frame in each list element holds
+#' information about the calibration files and time periods that apply to the variable,
+#' as returned from NEONprocIS.cal::def.cal.slct. See documentation for that function.
+
+#' @return TRUE when a test passes. Log errors when fails and moves on to the next test. \cr
+
+#' @references
+#' License: (example) GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007
+#' NEON.DOC.000785 TIS Calibrated Measurements and Level 1 Data Products Uncertainty Budget Plan
+
+#' @keywords Currently none
+
+#' @examples
+#' To run with testthat:
+#' devtools::test(pkg="/NEON-IS-data-processing/pack/NEONprocIS.cal")
+#' an example, devtools::test(pkg="C:/projects/NEON-IS-data-processing/pack/NEONprocIS.cal")
+
+#' @seealso \link[NEONprocIS.cal]{def.read.cal.xml}
+#'
+#' @export
+
+# changelog and author contributions / copyrights
+# Nora Catolico (2026-05-05)
+# original creation based on test-cal-conv-poly.R
+##############################################################################################
+# Define test context
+context("\n nominal calibration conversion\n")
+
+# Test nominal calibration conversion
+test_that("testing nominal calibration conversion", {
+
+ testDir = "calibrations/nominal/"
+ testFileCal = "calibration_nominal.xml"
+ testFileCalPath <- fs::path(testDir, testFileCal)
+
+ # Create data to calibrate - nominal wind speed data
+ # that will be converted to actual speeds using nominal calibration
+ data <- c(15, 30, 45, 60, 75, 90)
+ readout_time <- as.POSIXct(c('2025-12-13 00:00:00','2025-12-13 06:00:00','2025-12-13 12:00:00',
+ '2025-12-14 00:00:00','2025-12-14 06:00:00','2025-12-15 00:00:00'),tz='GMT')
+ data = data.frame(readout_time=readout_time, speed=data)
+
+ metaCal <- NEONprocIS.cal::def.cal.meta(fileCal=testFileCalPath)
+ TimeBgn <- base::as.POSIXct('2025-12-13',tz='GMT')
+ TimeEnd <- base::as.POSIXct('2025-12-15',tz='GMT')
+ calSlct <- list(speed=NEONprocIS.cal::def.cal.slct(metaCal=metaCal,TimeBgn=TimeBgn,TimeEnd=TimeEnd))
+
+ # Create nominal value and calibration ID data frames as expected by the function
+ nomVal <- data.frame(term=c('speed'), value=c(0.16667))
+ nomCalID <- data.frame(term=c('speed'), ID=c('CVALB1'))
+
+ ##########
+ ########## Happy paths:::: data and cal not empty and have valid values
+ ##########
+
+ cat("\n |====== Positive test:: ==========|\n")
+ cat("\n |------ data and cal are not empty and have valid values |\n")
+
+ calibrated <-
+ NEONprocIS.cal::def.cal.conv.nmnl(data = data,
+ nomVal = nomVal,
+ nomCalID = nomCalID,
+ varConv='speed',
+ calSlct=calSlct)
+
+ # Check that calibrated data is created with appropriate column name
+ testthat::expect_true('speedCalibrated' %in% names(calibrated))
+
+ # Check the data inside the valid date range are calibrated correctly
+ # Expected: (15/90)/(15/90) * CVAL_B1, (30/90)/(15/90) * CVAL_B1, etc.
+ # Where CVAL_B1 is retrieved from the calibration file
+ testthat::expect_true(!all(is.na(calibrated$speedCalibrated[2:5])))
+
+
+ cat("\n |====== Positive test:: ==========|\n")
+ cat("\n |------ valid calibration date range inclusive of start date, exclusive of end date |\n")
+
+ # Check the first and last dates, which fall on the boundaries of the valid cal periods
+ testthat::expect_true(!is.na(calibrated$speedCalibrated[1]) || !is.na(calibrated$speedCalibrated[6]))
+
+
+ cat("\n |======= Positive test:: ============|\n")
+ cat("\n |------ data is before the valid date range of the cal. Return NA values. |\n\n")
+
+ data$readout_time <- as.POSIXct(c('2025-10-13','2025-10-14','2025-10-15','2025-10-16','2025-10-17','2025-10-18'),tz='GMT')
+
+ calibrated <- NEONprocIS.cal::def.cal.conv.nmnl(data = data,
+ nomVal = nomVal,
+ nomCalID = nomCalID,
+ varConv='speed',
+ calSlct=calSlct)
+
+ testthat::expect_true(all(is.na(calibrated$speedCalibrated)))
+
+
+ cat("\n |======= Positive test:: ============|\n")
+ cat("\n |------ No cals specified for 'speed'. Returns NA |\n\n")
+ calSlctNoVar <- list(temp=NEONprocIS.cal::def.cal.slct(metaCal=metaCal,TimeBgn=TimeBgn,TimeEnd=TimeEnd))
+ calibrated <- NEONprocIS.cal::def.cal.conv.nmnl(data = data,
+ nomVal = nomVal,
+ nomCalID = nomCalID,
+ varConv='speed',
+ calSlct=calSlctNoVar)
+ testthat::expect_true (all(is.na(calibrated$speedCalibrated)))
+
+
+ #
+ cat("\n |======= Negative test:: ============|\n")
+ cat("\n |------ Cannot calibrate character variable |\n\n")
+ #
+
+ data_char <- data
+ data_char$speed <- as.character(data_char$speed)
+ data$readout_time <- as.POSIXct(c('2025-12-13 00:00:00','2025-12-13 06:00:00','2025-12-13 12:00:00',
+ '2025-12-14 00:00:00','2025-12-14 06:00:00','2025-12-15 00:00:00'),tz='GMT')
+
+ calibrated <- try(NEONprocIS.cal::def.cal.conv.nmnl(data = data_char,
+ nomVal = nomVal,
+ nomCalID = nomCalID,
+ varConv='speed',
+ calSlct=calSlct), silent = TRUE)
+ testthat::expect_true((class(calibrated)[1] == "try-error"))
+
+ #
+ cat("\n |======= Negative test:: ============|\n")
+ cat("\n |------ data missing readout_time variable |\n\n")
+
+ calibrated <- try(NEONprocIS.cal::def.cal.conv.nmnl(data = data[,-1],
+ nomVal = nomVal,
+ nomCalID = nomCalID,
+ varConv='speed',
+ calSlct=calSlct), silent = TRUE)
+
+ testthat::expect_true((class(calibrated)[1] == "try-error"))
+
+ #
+ cat("\n |======= Negative test:: ============|\n")
+ cat("\n |------ readout_time not POSIXt |\n\n")
+ data$readout_time <- as.character(data$readout_time)
+ calibrated <- try(NEONprocIS.cal::def.cal.conv.nmnl (data = data,
+ nomVal = nomVal,
+ nomCalID = nomCalID,
+ varConv='speed',
+ calSlct=calSlct),
+ silent=TRUE)
+ testthat::expect_true ("try-error" %in% class(calibrated))
+
+})
diff --git a/pipe/hmr3300/hmr3300_data_source_gcs.yaml b/pipe/hmr3300/hmr3300_data_source_gcs.yaml
new file mode 100644
index 000000000..c83863ffb
--- /dev/null
+++ b/pipe/hmr3300/hmr3300_data_source_gcs.yaml
@@ -0,0 +1,76 @@
+---
+pipeline:
+ name: tchain_data_source_gcs
+transform:
+ image_pull_secrets:
+ - battelleecology-quay-read-all-pull-secret
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-l0-gcs-loader:v2.1.0
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-strict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # Get today's date for evaluating kafka data retention period
+ date_today=$(date -u +%Y-%m-%d)
+ kafka_min_date=$(date -u -d "$KAFKA_RETENTION_DAYS days ago" +%Y-%m-%d)
+
+ # Get data from GCS bucket
+ echo "Processing $import_trigger"
+ p=${import_trigger#/pfs}
+ IFS="/"; arr=($p); unset IFS;
+ source_type=${arr[2]}
+ year=${arr[3]}
+ month=${arr[4]}
+ day=${arr[5]}
+ if [ $(date -u +%s -d $year-$month-$day) -lt $(date -u +%s -d $kafka_min_date) ]
+ then
+ echo "Extracting $year-$month-$day for $source_type from GCS"
+ python3 -m l0_gcs_loader.l0_gcs_loader
+ else
+ echo "$year/$month/$day is within the Kafka retention period and should be loaded from Kafka. Skipping..."
+ fi
+
+ EOF
+ env:
+ LOG_LEVEL: INFO
+ OUT_PATH: /pfs/out
+ KAFKA_RETENTION_DAYS: "15"
+ BUCKET_VERSION_PATH: "v2" # The root path of the bucket, indicative of the version (e.g. v2)
+ SOURCE_TYPE_INDEX: "3"
+ YEAR_INDEX: "4"
+ MONTH_INDEX: "5"
+ DAY_INDEX: "6"
+ # BUCKET_NAME: neon-l0-ingest # Always pull from prod bucket
+ secrets:
+ - name: l0-bucket # Using this secret will use the dev/cert/prod bucket linked to the Pachyderm environment
+ env_var: BUCKET_NAME
+ key: LO_BUCKET
+
+input:
+ pfs:
+ name: import_trigger
+ repo: tchain_cron_daily_and_date_control
+ # Glob must be daily
+ glob: "/tchain/*/*/*"
+output_branch: master
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 400M
+ cpu: 0.5
+resource_limits:
+ memory: 800M
+ cpu: 1.5
+sidecar_resource_requests:
+ memory: 2.4G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/hmr3300/hmr3300_data_source_kafka.yaml b/pipe/hmr3300/hmr3300_data_source_kafka.yaml
index ae9022afe..019f9340f 100644
--- a/pipe/hmr3300/hmr3300_data_source_kafka.yaml
+++ b/pipe/hmr3300/hmr3300_data_source_kafka.yaml
@@ -49,7 +49,7 @@ transform:
- "-c"
- |-
/bin/bash <<'EOF'
- # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ # Use bash-strict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
set -euo pipefail
IFS=$'\n\t'
diff --git a/pipe/hmr3300/hmr3300_data_source_trino.yaml b/pipe/hmr3300/hmr3300_data_source_trino.yaml
index bf285f208..d0152da9a 100644
--- a/pipe/hmr3300/hmr3300_data_source_trino.yaml
+++ b/pipe/hmr3300/hmr3300_data_source_trino.yaml
@@ -8,7 +8,7 @@ transform:
- "-c"
- |-
/bin/bash <<'EOF'
- # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ # Use bash-strict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
set -euo pipefail
IFS=$'\n\t'
diff --git a/pipe/hmr3300/hmr3300_fill_date_gaps_and_regularize.yaml b/pipe/hmr3300/hmr3300_fill_date_gaps_and_regularize.yaml
new file mode 100644
index 000000000..9ace2e033
--- /dev/null
+++ b/pipe/hmr3300/hmr3300_fill_date_gaps_and_regularize.yaml
@@ -0,0 +1,100 @@
+---
+pipeline:
+ name: hmr3300_fill_date_gaps_and_regularize
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-gf-rglr:v1.2.0
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-strict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # Refresh interim directories with each datum (otherwise they persist and cause probs)
+ rm -rf $OUT_PATH
+ mkdir -p $OUT_PATH
+
+ # Run first module - date-gap-filler (using environment variables below as input parameters)
+ python3 -m date_gap_filler.date_gap_filler_main
+
+ # Run second module - regularize
+ Rscript ./flow.rglr.R \
+ DirIn=/tmp/pfs/date_filled \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums \
+ "DirRglr=data|uncertainty_data|flags" \
+ MethRglr=CybiEc \
+ WndwRglr=Trlg \
+ IdxWndw=IdxWndwMin \
+ RptTimeWndw=FALSE \
+ DropNotNumc=FALSE \
+ "DirSubCopy=location|uncertainty_coef"
+ EOF
+ env:
+ # Environment variables for date gap filler
+ LOG_LEVEL: INFO
+ OUT_PATH: /tmp/pfs/date_filled
+ OUTPUT_DIRECTORIES: data,location,uncertainty_data,uncertainty_coef,flags
+ DATA_SOURCE_TYPE_INDEX: '3'
+ DATA_YEAR_INDEX: '4'
+ DATA_MONTH_INDEX: '5'
+ DATA_DAY_INDEX: '6'
+ DATA_LOCATION_INDEX: '7'
+ DATA_TYPE_INDEX: '8'
+ LOCATION_SOURCE_TYPE_INDEX: '3'
+ LOCATION_YEAR_INDEX: '4'
+ LOCATION_MONTH_INDEX: '5'
+ LOCATION_DAY_INDEX: '6'
+ LOCATION_INDEX: '7'
+ EMPTY_FILE_TYPE_INDEX: '4'
+ LINK_TYPE: COPY # options are COPY or SYMLINK. Use COPY for combined modules.
+ # Environment variables for regularizer
+ PARALLELIZATION_INTERNAL: '3' # Parallelization within R. If increased, adjust resource requests appropriately.
+input:
+ cross:
+ - pfs:
+ name: EMPTY_FILE_PATH
+ repo: hmr3300_empty_files
+ glob: /hmr3300
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - group:
+ - pfs:
+ name: DATA_PATH
+ repo: hmr3300_location_group_and_restructure
+ glob: /(*/*/*/*)
+ group_by: $1
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - join:
+ - pfs:
+ name: LOCATION_PATH
+ repo: hmr3300_location_active_dates_assignment
+ glob: /(*/*/*/*)
+ joinOn: $1
+ group_by: $1
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: DATE_LIMITER_PATH
+ repo: hmr3300_cron_daily_and_date_control
+ glob: /(*/*/*/*)
+ joinOn: $1
+ group_by: $1
+ empty_files: true # This can remain true even if LINK_TYPE=COPY
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 2G
+ cpu: 3.3
+resource_limits:
+ memory: 3G
+ cpu: 4.5
+sidecar_resource_requests:
+ memory: 3G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/hmr3300/hmr3300_location_active_dates_assignment.yaml b/pipe/hmr3300/hmr3300_location_active_dates_assignment.yaml
new file mode 100644
index 000000000..c8fa6edfb
--- /dev/null
+++ b/pipe/hmr3300/hmr3300_location_active_dates_assignment.yaml
@@ -0,0 +1,48 @@
+---
+pipeline:
+ name: hmr3300_location_active_dates_assignment
+transform:
+ cmd: ["/bin/bash"]
+ stdin:
+ - "#!/bin/bash"
+ - export ERR_PATH="/pfs/out/errored_datums$FILE_YEAR"
+ - Rscript
+ ./flow.loc.grp.asgn.R
+ DirIn=$DIR_IN
+ DirOut=/pfs/out
+ DirErr=$ERR_PATH
+ FileYear=$FILE_YEAR
+ TypeFile=namedLocation
+ "Prop=HOR|VER|name|description|site|Data Rate|active_periods|ThermistorDepth501|ThermistorDepth502|ThermistorDepth503|ThermistorDepth504|ThermistorDepth505|ThermistorDepth506|ThermistorDepth507|ThermistorDepth508|ThermistorDepth509|ThermistorDepth510|ThermistorDepth511"
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-grp-asgn:v1.3.0
+ # image_pull_secrets:
+ # - battelleecology-quay-read-all-pull-secret
+ env:
+ LOG_LEVEL: INFO
+input:
+ cross:
+ - pfs:
+ name: DIR_IN
+ repo: hmr3300_location_loader
+ glob: /hmr3300/*
+ - pfs:
+ name: FILE_YEAR
+ repo: hmr3300_cron_daily_and_date_control
+ glob: /data_year*.txt
+parallelism_spec:
+ constant: 4
+autoscaling: true
+resource_requests:
+ memory: 210M
+ cpu: 1.2
+resource_limits:
+ memory: 500M
+ cpu: 1.6
+sidecar_resource_requests:
+ memory: 2G
+ cpu: 0.3
+datum_set_spec:
+ number: 5
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/hmr3300/hmr3300_location_asset.yaml b/pipe/hmr3300/hmr3300_location_asset.yaml
new file mode 100644
index 000000000..22cff181a
--- /dev/null
+++ b/pipe/hmr3300/hmr3300_location_asset.yaml
@@ -0,0 +1,39 @@
+---
+pipeline:
+ name: hmr3300_location_asset
+transform:
+ # image_pull_secrets:
+ # - battelleecology-quay-read-all-pull-secret
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-asset-loader:v1.1.0
+ cmd:
+ - /bin/bash
+ stdin:
+ - '#!/bin/bash'
+ - python3 -m location_asset_loader.location_asset_loader_main
+ env:
+ OUT_PATH: /pfs/out
+ # ERR_PATH can be changed, it is user specified
+ ERR_PATH: /pfs/out/errored_datums
+ LOG_LEVEL: INFO
+ SOURCE_TYPE: hmr3300
+ secrets:
+ - name: pdr-secret
+ mount_path: /var/db_secret
+input:
+ pfs:
+ repo: hmr3300_cron_daily_and_date_control_tick
+ glob: /*
+ empty_files: true
+autoscaling: true
+resource_requests:
+ memory: 100M
+ cpu: 0.15
+resource_limits:
+ memory: 300M
+ cpu: 0.5
+sidecar_resource_requests:
+ memory: 500M
+ cpu: 0.2
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
\ No newline at end of file
diff --git a/pipe/hmr3300/hmr3300_location_asset_assignment.yaml b/pipe/hmr3300/hmr3300_location_asset_assignment.yaml
new file mode 100644
index 000000000..51ef1bb7f
--- /dev/null
+++ b/pipe/hmr3300/hmr3300_location_asset_assignment.yaml
@@ -0,0 +1,48 @@
+---
+pipeline:
+ name: hmr3300_location_asset_assignment
+transform:
+ cmd: ["/bin/bash"]
+ stdin:
+ - "#!/bin/bash"
+ - export ERR_PATH="/pfs/out/errored_datums$FILE_YEAR"
+ - Rscript
+ ./flow.loc.grp.asgn.R
+ DirIn=$DIR_IN
+ DirOut=/pfs/out
+ DirErr=$ERR_PATH
+ FileYear=$FILE_YEAR
+ TypeFile=asset
+ "Prop=HOR|VER|install_date|remove_date|name|site|Data Rate|locations|ThermistorDepth501|ThermistorDepth502|ThermistorDepth503|ThermistorDepth504|ThermistorDepth505|ThermistorDepth506|ThermistorDepth507|ThermistorDepth508|ThermistorDepth509|ThermistorDepth510|ThermistorDepth511"
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-grp-asgn:v1.3.0
+ # image_pull_secrets:
+ # - battelleecology-quay-read-all-pull-secret
+ env:
+ LOG_LEVEL: INFO
+input:
+ cross:
+ - pfs:
+ name: DIR_IN
+ repo: hmr3300_location_asset
+ glob: /hmr3300/*
+ - pfs:
+ name: FILE_YEAR
+ repo: hmr3300_cron_daily_and_date_control
+ glob: /data_year*.txt
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 400M
+ cpu: 1.5
+resource_limits:
+ memory: 800M
+ cpu: 2
+sidecar_resource_requests:
+ memory: 2G
+ cpu: 0.3
+datum_set_spec:
+ number: 5
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/hmr3300/hmr3300_location_group_and_restructure.yaml b/pipe/hmr3300/hmr3300_location_group_and_restructure.yaml
new file mode 100644
index 000000000..bdebcc22c
--- /dev/null
+++ b/pipe/hmr3300/hmr3300_location_group_and_restructure.yaml
@@ -0,0 +1,97 @@
+---
+pipeline:
+ name: hmr3300_location_group_and_restructure
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-grp-strc-comb:v1.2.1
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-strict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+ # Refresh interim directories with each datum (otherwise they persist and cause probs)
+ rm -rf /tmp/pfs/filter_joined
+ rm -rf /tmp/pfs/structured
+ rm -rf /tmp/pfs/structuredCopy
+ mkdir -p /tmp/pfs/filter_joined
+ # Run first module - filter-joiner (using environment variables below as input parameters)
+ python3 -m filter_joiner.filter_joiner_main
+ # Run second module - structure repo by location
+ Rscript ./flow.loc.repo.strc.R \
+ DirIn=/tmp/pfs/filter_joined \
+ DirOut=/tmp/pfs/structured \
+ DirErr=/pfs/out/errored_datums \
+ Comb=TRUE
+ # Copy output to another interim folder to destroy links (cannot daisy chain links from pfs input to output)
+ cp -rL /tmp/pfs/structured /tmp/pfs/structuredCopy || : # Allow to fail without exit code (happens if step above produced no output)
+ rm -rf /tmp/pfs/filter_joined
+ rm -rf /tmp/pfs/structured
+ # Run third module - merge data by location
+ Rscript ./flow.loc.data.trnc.comb.R \
+ DirIn=/tmp/pfs/structuredCopy \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums \
+ "DirSubCombData=data|flags|uncertainty_data" \
+ DirSubCombUcrt=uncertainty_coef \
+ DirSubCopy=location
+ EOF
+ env:
+ # Environment variables for filter-joiner
+ CONFIG: |
+ ---
+ # In Pachyderm root will be index 0, 'pfs' index 1, and the repo name index 2.
+ # Metadata indices will typically begin at index 3.
+ input_paths:
+ - path:
+ name: DATA_PATH
+ # Filter for data directory
+ glob_pattern: /pfs/DATA_PATH/hmr3300/*/*/*/*/**
+ # Join on named location (already joined below by source type and day)
+ join_indices: [7]
+ outer_join: true
+ - path:
+ name: LOCATION_PATH
+ # Filter for data directory
+ glob_pattern: /pfs/LOCATION_PATH/hmr3300/*/*/*/*/**
+ # Join on named location (already joined below by source type and day)
+ join_indices: [7]
+ OUT_PATH: /tmp/pfs/filter_joined
+ LOG_LEVEL: INFO
+ RELATIVE_PATH_INDEX: "3"
+ LINK_TYPE: COPY # options are COPY or SYMLINK. Use COPY for combined module.
+ # Environment variables for R modules
+ PARALLELIZATION_INTERNAL: '3'
+input:
+ join:
+ - pfs:
+ name: DATA_PATH
+ repo: hmr3300_calibration_group_and_convert
+ glob: /hmr3300/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ outer_join: true
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: LOCATION_PATH
+ repo: hmr3300_location_asset_assignment
+ glob: /hmr3300/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 1G
+ cpu: 3.5
+resource_limits:
+ memory: 2G
+ cpu: 4.5
+sidecar_resource_requests:
+ memory: 2G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/hmr3300/hmr3300_location_loader.yaml b/pipe/hmr3300/hmr3300_location_loader.yaml
new file mode 100644
index 000000000..6fbc7d11a
--- /dev/null
+++ b/pipe/hmr3300/hmr3300_location_loader.yaml
@@ -0,0 +1,43 @@
+---
+pipeline:
+ name: hmr3300_location_loader
+transform:
+ # image_pull_secrets:
+ # - battelleecology-quay-read-all-pull-secret
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-loader:v1.0.0
+ cmd:
+ - /bin/bash
+ stdin:
+ - '#!/bin/bash'
+ - python3 -m location_loader.location_loader_main
+ env:
+ LOCATION_TYPE: CONFIG
+ SOURCE_TYPE: hmr3300
+ OUT_PATH: /pfs/out
+ # ERR_PATH can be changed, it is user specified
+ ERR_PATH: /pfs/out/errored_datums
+ LOG_LEVEL: INFO
+ secrets:
+ - name: pdr-secret
+ mount_path: /var/db_secret
+input:
+ pfs:
+ repo: hmr3300_cron_daily_and_date_control_tick
+ glob: /*
+ empty_files: true
+autoscaling: true
+resource_requests:
+ memory: 100M
+ cpu: 0.1
+resource_limits:
+ memory: 300M
+ cpu: 0.5
+sidecar_resource_requests:
+ memory: 500M
+ cpu: 0.1
+sidecar_resource_limits:
+ memory: 2Gi
+ cpu: 1.2
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
\ No newline at end of file
diff --git a/pipe/hmr3300/pipe_list_hmr3300.txt b/pipe/hmr3300/pipe_list_hmr3300.txt
index daa4f9f4f..92bcc2b08 100644
--- a/pipe/hmr3300/pipe_list_hmr3300.txt
+++ b/pipe/hmr3300/pipe_list_hmr3300.txt
@@ -1,2 +1,10 @@
hmr3300_cron_daily_and_date_control.yaml
+hmr3300_data_source_gcs.yaml
hmr3300_data_source_kafka.yaml
+hmr3300_location_asset.yaml
+hmr3300_location_loader.yaml
+hmr3300_location_asset_assignment.yaml
+hmr3300_location_active_dates_assignment.yaml
+hmr3300_location_group_and_restructure.yaml
+hmr3300_fill_date_gaps_and_regularize.yaml
+
diff --git a/pipe/hmr3300/pipe_list_hmr3300_initial_load.txt b/pipe/hmr3300/pipe_list_hmr3300_initial_load.txt
new file mode 100644
index 000000000..cfa2b5d96
--- /dev/null
+++ b/pipe/hmr3300/pipe_list_hmr3300_initial_load.txt
@@ -0,0 +1,3 @@
+hmr3300_cron_daily_and_date_control.yaml
+hmr3300_data_source_trino.yaml
+hmr3300_data_source_kafka.yaml
\ No newline at end of file
diff --git a/pipe/hmr3300/site-list.json b/pipe/hmr3300/site-list.json
index 746ef3f81..25c42adc1 100644
--- a/pipe/hmr3300/site-list.json
+++ b/pipe/hmr3300/site-list.json
@@ -2,45 +2,5 @@
{
"site" : "BARC",
"kafka_start_date" : "2024-08-11"
- },
- {
- "site" : "BLWA",
- "kafka_start_date" : "2024-08-22"
- },
- {
- "site" : "CRAM",
- "kafka_start_date" : "2024-07-20"
- },
- {
- "site" : "FLNT",
- "kafka_start_date" : "2024-08-11"
- },
- {
- "site" : "HQTW",
- "kafka_start_date" : "2023-03-03"
- },
- {
- "site" : "LIRO",
- "kafka_start_date" : "2024-08-10"
- },
- {
- "site" : "PRLA",
- "kafka_start_date" : "2024-08-10"
- },
- {
- "site" : "PRPO",
- "kafka_start_date" : "2024-08-10"
- },
- {
- "site" : "SUGG",
- "kafka_start_date" : "2024-08-11"
- },
- {
- "site" : "TOMB",
- "kafka_start_date" : "2024-08-10"
- },
- {
- "site" : "TOOK",
- "kafka_start_date" : "2024-08-10"
}
]
\ No newline at end of file
diff --git a/pipe/rmyoung/pipe_list_rmyoung.txt b/pipe/rmyoung/pipe_list_rmyoung.txt
deleted file mode 100644
index 4416bb05a..000000000
--- a/pipe/rmyoung/pipe_list_rmyoung.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-rmyoung_cron_daily_and_date_control.yaml
-rmyoung_data_source_kafka.yaml
diff --git a/pipe/rmyoung/pipe_list_rmyoung_development.txt b/pipe/rmyoung/pipe_list_rmyoung_development.txt
deleted file mode 100644
index 6726ca03a..000000000
--- a/pipe/rmyoung/pipe_list_rmyoung_development.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-rmyoung_cron_daily_and_date_control.yaml
-rmyoung_data_source_kafka.yaml
-rmyoung_data_source_trino.yaml
\ No newline at end of file
diff --git a/pipe/rmyoung/rmyoung_data_source_kafka.yaml b/pipe/rmyoung/rmyoung_data_source_kafka.yaml
deleted file mode 100644
index 1c73d05e6..000000000
--- a/pipe/rmyoung/rmyoung_data_source_kafka.yaml
+++ /dev/null
@@ -1,186 +0,0 @@
----
-pipeline:
- name: rmyoung_data_source_kafka
-transform:
- image: us-central1-docker.pkg.dev/neon-shared-service/bei/neon-avro-kafka-loader:v6.0.5
- image_pull_secrets:
- - battelleecology-quay-read-all-pull-secret
- env:
- OUT_PATH: /pfs/out
- SOURCE_TYPE: "rmyoung"
- LOG_LEVEL: INFO
- YEAR_INDEX: "5"
- MONTH_INDEX: "6"
- DAY_INDEX: "7"
- KAFKA_RETENTION_DAYS: "15"
- secrets:
- - name: pachyderm-kafka-auth
- env_var: KAFKA_USER
- key: KAFKA_USER
- - name: pachyderm-kafka-auth
- env_var: KAFKA_PASSWORD
- key: KAFKA_PASSWORD
- - name: pachyderm-kafka-auth
- env_var: KAFKA_BROKER
- key: KAFKA_BROKER
- - name: pachyderm-kafka-auth
- env_var: KAFKA_LOG_TOPIC
- key: KAFKA_LOG_TOPIC
- - name: pachyderm-kafka-auth
- env_var: KAFKA_COMPACT_TOPIC
- key: KAFKA_COMPACT_TOPIC
- - name: l0-bucket
- env_var: BUCKET_NAME
- key: LO_BUCKET
- - name: pdr-secret
- env_var: PDR_HOST
- key: hostname
- - name: pdr-secret
- env_var: PDR_DBNAME
- key: database
- - name: pdr-secret
- env_var: PDR_USER
- key: username
- - name: pdr-secret
- env_var: PDR_PASSWORD
- key: password
- cmd:
- - sh
- - "-c"
- - |-
- /bin/bash <<'EOF'
- # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
- set -euo pipefail
- IFS=$'\n\t'
-
- # Get GCP zone
- meta=$(curl -sH "Metadata-Flavor: Google" "http://metadata/computeMetadata/v1/instance/zone")
- zone=$(echo "${meta}" | cut -d "/" -f 4)
- echo "GCP Zone: ${zone}"
-
- # Get today's date for evaluating kafka data retention period
- date_today=$(date -u +%Y-%m-%d)
- kafka_min_date=$(date -u -d "${KAFKA_RETENTION_DAYS} days ago" +%Y-%m-%d)
-
- # Get date from input path. Terminal path structure must be /SOURCE_TYPE/YYYY/MM/DD/SITE_FILE
- # Datum must be set at /SOURCE_TYPE/YYYY/MM/DD or /SOURCE_TYPE/YYYY/MM/DD/SITE_FILE
- date_path=$(echo "${import_trigger}" | cut -f "${YEAR_INDEX},${MONTH_INDEX},${DAY_INDEX}" -d "/")
- echo "${date_path}"
- date_str=$(date -u +%Y-%m-%d -d "${date_path}")
-
- # Get each site to run
- if [[ -f ${import_trigger} ]]; then
- import_trigger_glob="${import_trigger}"
- else
- import_trigger_glob="${import_trigger}/*"
- fi
-
- mkdir -p "${OUT_PATH}/${SOURCE_TYPE}"
-
- for site_kafka in ${import_trigger_glob}; do
- site_file=$(basename "${site_kafka}") # Strip off any path prefix
- site=$(echo "${site_file}" | cut -f 1 -d "." --only-delimited) # Extract the site from site.kafka. Ignore site-only files (e.g. CPER vs. CPER.kafka)
- type=$(echo "${site_file}" | cut -f 2 -d "." --only-delimited) # Extract the 'kafka' from site.kafka
- if [[ "${type}" != "kafka" ]]
- then
- echo "${site_file} is not indicated to be streaming from Kafka. Skipping..."
- continue
- elif [[ "$(date -u +%s -d "${date_str}")" -lt "$(date -u +%s -d "${kafka_min_date}")" ]]
- then
- echo -n "Cannot extract ${date_str} Kafka data for ${site}. "
- echo -n "Today's date (${date_today}) is beyond the Kafka retention period (${KAFKA_RETENTION_DAYS} days). Skipping..."
- continue
- fi
-
- # We are ok to run
- echo "Extracting ${date_str} kafka data for ${site}"
-
- # Make a directory to store the output files by site
- siteoutpath="${OUT_PATH}/${site}"
- mkdir -p "${siteoutpath}"
-
- # Get "current data" - data that came in on the specified day, which is the same day it was measured
- # Note: We cannot use the --removeoffset flag on the kafka loader (which removes the offsets from the filenames. This will often violate the Pachyderm requirement that different datums cannot write the same file)
- ./extract-kafka-sensor.py -s "${site}" -S "${SOURCE_TYPE}" -D "${siteoutpath}/${SOURCE_TYPE}" -d "${date_str}" --only current --consumer "client.rack=${zone}"
-
- # Get "non-current data" - data that came in on the specified day, which is NOT the same day it was measured
- date_str_1=$(date +%Y-%m-%d -d "${date_str} + 1 day")
- ./extract-kafka-sensor.py -s "${site}" -S "${SOURCE_TYPE}" -D "${siteoutpath}/${SOURCE_TYPE}" -d "${date_str_1}" --only noncurrent --consumer client.rack="${zone}"
-
- # Upload L0 files to bucket, compacting with any existing file with the same name
- if [[ -d "${siteoutpath}/${SOURCE_TYPE}" ]]; then
- linkdir=$(mktemp -d)
- shopt -s globstar
- out_parquet_glob="${siteoutpath}/**/*.parquet"
- # /pfs/out/ABBY/rmyoung/2023/01/01/12345/data/file.parquet
- echo "Linking output files to ${linkdir}"
- # set -x # Uncomment for debugging
- for f in ${out_parquet_glob}; do
- # Parse the path
- [[ "${f}" =~ ^${siteoutpath}/(.*)/([0-9]+)/([0-9]+)/([0-9]+)/(.*)/data/(.*)$ ]]
- fsourcetype="${BASH_REMATCH[1]}"
- fyear="${BASH_REMATCH[2]}"
- fmonth="${BASH_REMATCH[3]}"
- fday="${BASH_REMATCH[4]}"
- fsourceid="${BASH_REMATCH[5]}"
- fname="${BASH_REMATCH[6]}"
- # fname_out="${fsourcetype}_${fsourceid}_${fyear}-${fmonth}-${fday}.parquet" # Remove offsets from the filename
- outdir="${linkdir}/v2/${fsourcetype}/ms=${fyear}-${fmonth}/source_id=${fsourceid}"
- mkdir -p "${outdir}"
- ln -s "${f}" "${outdir}/${fname}"
- done
-
- # Upload to bucket, compacting with any existing file
- echo "Uploading and compacting for ${site} ${date_str}"
- linkdir2=$(mktemp -d)
- ./compact-bucket-copy.py --sourcepath "${linkdir}" -l "${linkdir2}" --stripoffset
- ./compact-bucket-copy.py --sourcepath "${linkdir2}" --destbucket "${BUCKET_NAME}"
-
- # Update the airflow triggering table
- echo "Updating airflow trigger table for ${site} ${date_str}"
- ./update-trigger-table.py -s "${site}" -S "${SOURCE_TYPE}" -D "${linkdir2}"
-
- # set +x # Uncomment for debugging
- rm -rf "${linkdir}"
- rm -rf "${linkdir2}"
-
- # Move the per site files to the structure expected by pachyderm
- cd "${siteoutpath}/${SOURCE_TYPE}/"
- dest="${OUT_PATH}/${SOURCE_TYPE}/"
- find . -type f | while read f; do
- d=$(dirname "$f")
- file=$(basename "$f")
- mkdir -p "$dest/$d"
- mv -f "$f" "$dest/$d/"
- done
- cd -
- # Remove the site directory
- rm -rf "${siteoutpath}"
- fi
-
- done
- EOF
-input:
- pfs:
- name: import_trigger
- repo: rmyoung_cron_daily_and_date_control
- # Must be datum by day (e.g. /SOURCE_TYPE/*/*/*) or by day/site (e.g. /SOURCE_TYPE/*/*/*/*)
- glob: "/rmyoung/*/*/*"
-parallelism_spec:
- constant: 3
-autoscaling: true
-resource_requests:
- memory: 300M
- cpu: 1.6
-resource_limits:
- memory: 1.5G
- cpu: 2
-sidecar_resource_requests:
- memory: 2G
- cpu: 0.5
-datum_set_spec:
- number: 1
-scheduling_spec:
- node_selector:
- cloud.google.com/compute-class: pach-pipeline-class
-
diff --git a/pipe/rmyoung/rmyoung_fill_date_gaps.yaml b/pipe/rmyoung/rmyoung_fill_date_gaps.yaml
new file mode 100644
index 000000000..ecc102051
--- /dev/null
+++ b/pipe/rmyoung/rmyoung_fill_date_gaps.yaml
@@ -0,0 +1,108 @@
+---
+pipeline:
+ name: rmyoung_fill_date_gaps
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-gap-fill-nonrglr:v1.0.3
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-strict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # Refresh interim directories with each datum (otherwise they persist and cause probs)
+ rm -r -f /tmp/pfs/interim
+ rm -rf $OUT_PATH
+ mkdir -p /tmp/pfs/interim
+ mkdir -p $OUT_PATH # R modules must have pfs in the repo structure
+
+ # Run first module - date-gap-filler (using environment variables below as input parameters)
+ python3 -m date_gap_filler.date_gap_filler_main
+
+ #run gap filler for nonregularized data
+ Rscript ./flow.gap.fill.nonrglr.R \
+ DirIn=/tmp/pfs/interim \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums \
+ "DirFill=data|flags" \
+ WndwFill="02" \
+ "DirSubCopy=location|uncertainty_coef" \
+ "FileSchm=data:$FILE_SCHEMA_DATA|flags:$FILE_SCHEMA_FLAGS"
+
+ EOF
+ env:
+ # Environment variables for date gap filler
+ LOG_LEVEL: INFO
+ OUT_PATH: /tmp/pfs/interim
+ OUTPUT_DIRECTORIES: data,location,uncertainty_coef,flags
+ DATA_SOURCE_TYPE_INDEX: '3'
+ DATA_YEAR_INDEX: '4'
+ DATA_MONTH_INDEX: '5'
+ DATA_DAY_INDEX: '6'
+ DATA_LOCATION_INDEX: '7'
+ DATA_TYPE_INDEX: '8'
+ LOCATION_SOURCE_TYPE_INDEX: '3'
+ LOCATION_YEAR_INDEX: '4'
+ LOCATION_MONTH_INDEX: '5'
+ LOCATION_DAY_INDEX: '6'
+ LOCATION_INDEX: '7'
+ EMPTY_FILE_TYPE_INDEX: '4'
+ LINK_TYPE: COPY # options are COPY or SYMLINK. Use COPY for combined modules.
+ # Environment variables for regularizer
+ PARALLELIZATION_INTERNAL: '3' # Parallelization within R. If increased, adjust resource requests appropriately.
+input:
+ cross:
+ - pfs:
+ name: EMPTY_FILE_PATH
+ repo: rmyoung_empty_files
+ glob: /rmyoung
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: FILE_SCHEMA_DATA
+ repo: rmyoung_avro_schemas
+ glob: /rmyoung/rmyoung_calibrated.avsc
+ - pfs:
+ name: FILE_SCHEMA_FLAGS
+ repo: rmyoung_avro_schemas
+ glob: /rmyoung/rmyoung_calibration_flags.avsc
+ - group:
+ - pfs:
+ name: DATA_PATH
+ repo: rmyoung_location_group_and_restructure
+ glob: /rmyoung/(*/*/*)
+ group_by: $1
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - join:
+ - pfs:
+ name: LOCATION_PATH
+ repo: rmyoung_location_active_dates_assignment
+ glob: /rmyoung/(*/*/*)
+ joinOn: $1
+ group_by: $1
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: DATE_LIMITER_PATH
+ repo: rmyoung_cron_daily_and_date_control
+ glob: /rmyoung/(*/*/*)
+ joinOn: $1
+ group_by: $1
+ empty_files: true # This can remain true even if LINK_TYPE=COPY
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 2G
+ cpu: 3.3
+resource_limits:
+ memory: 3G
+ cpu: 4.5
+sidecar_resource_requests:
+ memory: 3G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung/rmyoung_location_group_and_restructure.yaml b/pipe/rmyoung/rmyoung_location_group_and_restructure.yaml
new file mode 100644
index 000000000..21e4e6c2f
--- /dev/null
+++ b/pipe/rmyoung/rmyoung_location_group_and_restructure.yaml
@@ -0,0 +1,97 @@
+---
+pipeline:
+ name: rmyoung_location_group_and_restructure
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-grp-strc-comb:v1.2.3
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+ # Refresh interim directories with each datum (otherwise they persist and cause probs)
+ rm -rf /tmp/pfs/filter_joined
+ rm -rf /tmp/pfs/structured
+ rm -rf /tmp/pfs/structuredCopy
+ mkdir -p /tmp/pfs/filter_joined
+ # Run first module - filter-joiner (using environment variables below as input parameters)
+ python3 -m filter_joiner.filter_joiner_main
+ # Run second module - structure repo by location
+ Rscript ./flow.loc.repo.strc.R \
+ DirIn=/tmp/pfs/filter_joined \
+ DirOut=/tmp/pfs/structured \
+ DirErr=/pfs/out/errored_datums \
+ Comb=TRUE
+ # Copy output to another interim folder to destroy links (cannot daisy chain links from pfs input to output)
+ cp -rL /tmp/pfs/structured /tmp/pfs/structuredCopy || : # Allow to fail without exit code (happens if step above produced no output)
+ rm -rf /tmp/pfs/filter_joined
+ rm -rf /tmp/pfs/structured
+ # Run third module - merge data by location
+ Rscript ./flow.loc.data.trnc.comb.R \
+ DirIn=/tmp/pfs/structuredCopy \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums \
+ "DirSubCombData=data|flags" \
+ DirSubCombUcrt=uncertainty_coef \
+ DirSubCopy=location
+ EOF
+ env:
+ # Environment variables for filter-joiner
+ CONFIG: |
+ ---
+ # In Pachyderm root will be index 0, 'pfs' index 1, and the repo name index 2.
+ # Metadata indices will typically begin at index 3.
+ input_paths:
+ - path:
+ name: DATA_PATH
+ # Filter for data directory
+ glob_pattern: /pfs/DATA_PATH/rmyoung/*/*/*/*/**
+ # Join on named location (already joined below by day)
+ join_indices: [7]
+ outer_join: true
+ - path:
+ name: LOCATION_PATH
+ # Filter for data directory
+ glob_pattern: /pfs/LOCATION_PATH/rmyoung/*/*/*/*/**
+ # Join on named location (already joined below by day)
+ join_indices: [7]
+ OUT_PATH: /tmp/pfs/filter_joined
+ LOG_LEVEL: INFO
+ RELATIVE_PATH_INDEX: "3"
+ LINK_TYPE: COPY # options are COPY or SYMLINK. Use COPY for combined module.
+ # Environment variables for R modules
+ PARALLELIZATION_INTERNAL: '3'
+input:
+ join:
+ - pfs:
+ name: DATA_PATH
+ repo: rmyoung_calibration_group_and_convert
+ glob: /rmyoung/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ outer_join: true
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: LOCATION_PATH
+ repo: rmyoung_location_asset_assignment
+ glob: /rmyoung/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 2.2G
+ cpu: 3.3
+resource_limits:
+ memory: 4G
+ cpu: 4.5
+sidecar_resource_requests:
+ memory: 3G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung/site-list.json b/pipe/rmyoung/site-list.json
deleted file mode 100644
index 3c32017bb..000000000
--- a/pipe/rmyoung/site-list.json
+++ /dev/null
@@ -1,46 +0,0 @@
-[
- {
- "site" : "BARC",
- "kafka_start_date" : "2024-08-11"
- },
- {
- "site" : "BLWA",
- "kafka_start_date" : "2024-08-22"
- },
- {
- "site" : "CRAM",
- "kafka_start_date" : "2024-07-20"
- },
- {
- "site" : "HQTW",
- "kafka_start_date" : "2023-03-03"
- },
- {
- "site" : "FLNT",
- "kafka_start_date" : "2024-08-11"
- },
- {
- "site" : "LIRO",
- "kafka_start_date" : "2024-08-10"
- },
- {
- "site" : "PRLA",
- "kafka_start_date" : "2024-08-10"
- },
- {
- "site" : "PRPO",
- "kafka_start_date" : "2024-08-10"
- },
- {
- "site" : "SUGG",
- "kafka_start_date" : "2024-08-11"
- },
- {
- "site" : "TOMB",
- "kafka_start_date" : "2024-08-10"
- },
- {
- "site" : "TOOK",
- "kafka_start_date" : "2024-08-10"
- }
-]
\ No newline at end of file
diff --git a/pipe/rmyoung_hmr3300/pipe_list_rmyoung_hmr3300.txt b/pipe/rmyoung_hmr3300/pipe_list_rmyoung_hmr3300.txt
new file mode 100644
index 000000000..99635d023
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/pipe_list_rmyoung_hmr3300.txt
@@ -0,0 +1,14 @@
+rmyoung_hmr3300_cron_daily_and_date_control.yaml
+rmyoung_cron_daily_and_date_control.yaml
+rmyoung_hmr3300_data_source_trino.yaml
+rmyoung_hmr3300_data_source_kafka.yaml
+rmyoung_calibration_list_files.yaml
+rmyoung_calibration_loader.yaml
+rmyoung_calibration_assignment.yaml
+rmyoung_hmr3300_calibration_group_and_convert.yaml
+rmyoung_hmr3300_location_asset.yaml
+rmyoung_hmr3300_location_asset_assignment.yaml
+rmyoung_hmr3300_location_loader.yaml
+rmyoung_hmr3300_location_active_dates_assignment.yaml
+rmyoung_hmr3300_location_group_and_restructure.yaml
+rmyoung_hmr3300_fill_date_gaps.yaml
\ No newline at end of file
diff --git a/pipe/rmyoung_hmr3300/pipe_list_rmyoung_hmr3300_development.txt b/pipe/rmyoung_hmr3300/pipe_list_rmyoung_hmr3300_development.txt
new file mode 100644
index 000000000..99635d023
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/pipe_list_rmyoung_hmr3300_development.txt
@@ -0,0 +1,14 @@
+rmyoung_hmr3300_cron_daily_and_date_control.yaml
+rmyoung_cron_daily_and_date_control.yaml
+rmyoung_hmr3300_data_source_trino.yaml
+rmyoung_hmr3300_data_source_kafka.yaml
+rmyoung_calibration_list_files.yaml
+rmyoung_calibration_loader.yaml
+rmyoung_calibration_assignment.yaml
+rmyoung_hmr3300_calibration_group_and_convert.yaml
+rmyoung_hmr3300_location_asset.yaml
+rmyoung_hmr3300_location_asset_assignment.yaml
+rmyoung_hmr3300_location_loader.yaml
+rmyoung_hmr3300_location_active_dates_assignment.yaml
+rmyoung_hmr3300_location_group_and_restructure.yaml
+rmyoung_hmr3300_fill_date_gaps.yaml
\ No newline at end of file
diff --git a/pipe/rmyoung_hmr3300/rmyoung_calibration_assignment.yaml b/pipe/rmyoung_hmr3300/rmyoung_calibration_assignment.yaml
new file mode 100644
index 000000000..84991620c
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_calibration_assignment.yaml
@@ -0,0 +1,47 @@
+---
+pipeline:
+ name: rmyoung_calibration_assignment
+transform:
+ cmd: ["/bin/bash"]
+ stdin:
+ - "#!/bin/bash"
+ - export ERR_PATH="/pfs/out/errored_datums$FILE_YEAR"
+ - Rscript
+ ./flow.cal.asgn.R
+ DirIn=$DIR_IN
+ DirOut=/pfs/out
+ DirErr=$ERR_PATH
+ FileYear=$FILE_YEAR
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-cal-asgn:v2.0.3
+ # image_pull_secrets:
+ # - battelleecology-quay-read-all-pull-secret
+ env:
+ LOG_LEVEL: INFO
+input:
+ cross:
+ - pfs:
+ name: DIR_IN
+ repo: rmyoung_calibration_loader
+ glob: /*/*
+ - pfs:
+ name: FILE_YEAR
+ repo: rmyoung_cron_daily_and_date_control
+ glob: /data_year*.txt
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 200M
+ cpu: 0.8
+resource_limits:
+ memory: 600M
+ cpu: 1.5
+sidecar_resource_requests:
+ memory: 5G
+ cpu: 1
+datum_set_spec:
+ number: 5
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
+
diff --git a/pipe/rmyoung_hmr3300/rmyoung_calibration_list_files.yaml b/pipe/rmyoung_hmr3300/rmyoung_calibration_list_files.yaml
new file mode 100644
index 000000000..d64ea039f
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_calibration_list_files.yaml
@@ -0,0 +1,30 @@
+---
+pipeline:
+ name: rmyoung_calibration_list_files
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-cval-loader:v3.0.0
+ cmd: ["/bin/bash"]
+ env:
+ CVAL_INGEST_BUCKET: neon-cval
+ OUT_PATH: /pfs/out
+ stdin:
+ - "#!/bin/bash"
+ - python3 -m calval_loader.calval_loader
+input:
+ pfs:
+ repo: rmyoung_cron_daily_and_date_control_tick
+ glob: /*
+ empty_files: true
+autoscaling: true
+resource_requests:
+ memory: 500M
+ cpu: 0.4
+resource_limits:
+ memory: 1G
+ cpu: 1.5
+sidecar_resource_requests:
+ memory: 1G
+ cpu: 0.2
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/rmyoung_calibration_loader.yaml b/pipe/rmyoung_hmr3300/rmyoung_calibration_loader.yaml
new file mode 100644
index 000000000..defb8c57a
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_calibration_loader.yaml
@@ -0,0 +1,44 @@
+---
+pipeline:
+ name: rmyoung_calibration_loader
+transform:
+ # image_pull_secrets:
+ # - battelleecology-quay-read-all-pull-secret
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-cval-loader:v3.0.0
+ cmd:
+ - /bin/bash
+ stdin:
+ - '#!/bin/bash'
+ - python3 -m calval_loader.load_all_calval_files
+ env:
+ CVAL_INGEST_BUCKET: neon-cval
+ OUT_PATH: /pfs/out
+ LOG_LEVEL: INFO
+ SOURCE_TYPE: rmyoung
+ STARTING_PATH_INDEX: "5"
+ secrets:
+ - name: pdr-secret
+ mount_path: /var/db_secret
+input:
+ pfs:
+ name: IN_PATH
+ repo: rmyoung_calibration_list_files
+ glob: /*/*/*/*
+ empty_files: true
+parallelism_spec:
+ constant: 10
+autoscaling: true
+resource_requests:
+ memory: 500M
+ cpu: 0.5
+resource_limits:
+ memory: 1G
+ cpu: 1.5
+sidecar_resource_requests:
+ memory: 2G
+ cpu: 0.2
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung/rmyoung_cron_daily_and_date_control.yaml b/pipe/rmyoung_hmr3300/rmyoung_cron_daily_and_date_control.yaml
similarity index 89%
rename from pipe/rmyoung/rmyoung_cron_daily_and_date_control.yaml
rename to pipe/rmyoung_hmr3300/rmyoung_cron_daily_and_date_control.yaml
index 68a702f1c..49a09782a 100644
--- a/pipe/rmyoung/rmyoung_cron_daily_and_date_control.yaml
+++ b/pipe/rmyoung_hmr3300/rmyoung_cron_daily_and_date_control.yaml
@@ -11,8 +11,8 @@ transform:
# kafka_start_date in the site-list file is the first full day from which data began streaming via Kafka
# END_DATE can be set or unset (remove line entirely to unset). If unset, end date will be yesterday.
OUT_PATH: /pfs/out
- START_DATE: "2025-07-29" # Inclusive
- END_DATE: "2025-09-02" # Inclusive
+ START_DATE: "2025-12-14" # Inclusive
+ END_DATE: "2025-12-21" # Inclusive
SOURCE_TYPE: "rmyoung"
stdin:
- "#!/bin/bash"
@@ -27,7 +27,7 @@ input:
overwrite: true
- pfs:
name: SITE_FILE
- repo: rmyoung_site_list
+ repo: rmyoung_hmr3300_site_list
glob: /site-list.json
resource_requests:
memory: 500M
@@ -41,4 +41,4 @@ sidecar_resource_requests:
autoscaling: true
scheduling_spec:
node_selector:
- cloud.google.com/compute-class: pach-pipeline-class
+ cloud.google.com/compute-class: pach-pipeline-class
\ No newline at end of file
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_calibration_group_and_convert.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_calibration_group_and_convert.yaml
new file mode 100644
index 000000000..141e641e7
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_calibration_group_and_convert.yaml
@@ -0,0 +1,164 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_calibration_group_and_convert
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-cal-grp-conv:sha-1c9356d
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # Refresh interim directories with each datum (otherwise they persist and cause probs)
+ rm -rf $OUT_PATH
+ rm -rf /tmp/pfs/kafka_merged
+ mkdir -p $OUT_PATH # R modules must have pfs in the repo structure
+ mkdir -p /tmp/pfs/kafka_merged # Filter joiner relies on the same path positions among inputs (i.e. repo name in 2nd position)
+
+ # Get source type from the data path that is present: /SOURCE_TYPE/YYYY/MM/DD
+ if [ ${DATA_PATH_TRINO+x} ]; then
+ source_type_path=$DATA_PATH_TRINO
+ elif [ ${DATA_PATH_KAFKA+x} ]; then
+ source_type_path=$DATA_PATH_KAFKA
+ else
+ echo "No Trino or Kafka data path present for this datum; skipping"
+ exit 0
+ fi
+
+ source_type=$(echo $source_type_path | cut -f $SOURCE_TYPE_INDEX -d "/")
+ echo "source_type = $source_type"
+
+ # Run filter-joiner for joining cal and data (using environment variables below as input parameters)
+ python3 -m filter_joiner.filter_joiner_main
+
+ # Combine data from Kafka and Trino
+ if [ $source_type = "rmyoung" ]; then
+ Rscript ./flow.kfka.comb.R \
+ DirIn=$OUT_PATH \
+ DirOut=/tmp/pfs/kafka_merged \
+ DirErr=/pfs/out/errored_datums \
+ FileSchmL0=$SCHEMA_L0_RMYOUNG \
+ DirSubCopy=calibration
+ elif [ $source_type = "hmr3300" ]; then
+ Rscript ./flow.kfka.comb.R \
+ DirIn=$OUT_PATH \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums \
+ FileSchmL0=$SCHEMA_L0_HMR3300 \
+ DirSubCopy=calibration
+ fi
+
+ # Run calibration conversion module
+ #hmr3300 not calibrated, so only run for rmyoung
+ if [ $source_type = "rmyoung" ]; then
+ Rscript ./flow.cal.conv.R \
+ DirIn=/tmp/pfs/kafka_merged \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums\
+ ConvFuncTerm1=def.cal.conv.nmnl:speed \
+ ConvFuncTerm2=def.cal.conv.nmnl:direction \
+ "nomVal=speed:0.1666667|direction:355" \
+ "nomCalID=speed:CVALB1|direction:CVALA1" \
+ "TermQf=speed|direction" \
+ UcrtFuncTerm1=def.ucrt.meas.cnst:speed \
+ UcrtFuncTerm1=def.ucrt.meas.cnst:direction \
+ FileSchmData=$SCHEMA_CAL_RMYOUNG \
+ FileSchmQf=$SCHEMA_FLAGS_RMYOUNG
+ fi
+ EOF
+ env:
+ # Environment variables for filter-joiner.
+ CONFIG: |
+ ---
+ # Configuration for filter-joiner module that will bring together the data and calibrations
+ # In Pachyderm root will be index 0, 'pfs' index 1, and the repo name index 2.
+ # Metadata indices will typically begin at index 3.
+ input_paths:
+ - path:
+ name: DATA_PATH_KAFKA
+ # Filter for data directory
+ glob_pattern: /pfs/DATA_PATH_KAFKA/*/*/*/*/*/**
+ # Join on named location (already joined below by source type and day)
+ join_indices: [7]
+ outer_join: true
+ - path:
+ name: DATA_PATH_TRINO
+ # Filter for data directory
+ glob_pattern: /pfs/DATA_PATH_TRINO/*/*/*/*/*/**
+ # Join on named location (already joined below by source type and day)
+ join_indices: [7]
+ outer_join: true
+ - path:
+ name: CALIBRATION_PATH
+ glob_pattern: /pfs/CALIBRATION_PATH/*/*/*/*/*/*/**
+ # Join on named location (already joined below by day)
+ join_indices: [7]
+ outer_join: true
+ LOG_LEVEL: INFO
+ OUT_PATH: /tmp/pfs/filter_joined
+ RELATIVE_PATH_INDEX: "3" # Must be consistent across inputs
+ LINK_TYPE: COPY # options are COPY or SYMLINK.
+ # Environment variables for calibration module
+ PARALLELIZATION_INTERNAL: '3' # Option for calibration conversion module
+ # Environment variables for bash code
+ SOURCE_TYPE_INDEX: '4'
+input:
+ cross:
+ - pfs:
+ name: SCHEMA_L0_RMYOUNG
+ repo: rmyoung_hmr3300_avro_schemas
+ glob: /rmyoung/rmyoung_l0.avsc
+ - pfs:
+ name: SCHEMA_L0_HMR3300
+ repo: rmyoung_hmr3300_avro_schemas
+ glob: /hmr3300/hmr3300_l0.avsc
+ - pfs:
+ name: SCHEMA_CAL_RMYOUNG
+ repo: rmyoung_hmr3300_avro_schemas
+ glob: /rmyoung/rmyoung_calibrated.avsc
+ - pfs:
+ name: SCHEMA_FLAGS_RMYOUNG
+ repo: rmyoung_hmr3300_avro_schemas
+ glob: /rmyoung/rmyoung_calibration_flags.avsc
+ - join:
+ - pfs:
+ name: CALIBRATION_PATH
+ repo: rmyoung_calibration_assignment
+ glob: /rmyoung/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ outer_join: true
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: DATA_PATH_TRINO
+ repo: rmyoung_hmr3300_data_source_trino
+ glob: /(*)/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ outer_join: true
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: DATA_PATH_KAFKA
+ repo: rmyoung_hmr3300_data_source_kafka
+ glob: /(*)/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ outer_join: true
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 1.2G
+ cpu: 1.3
+resource_limits:
+ memory: 2G
+ cpu: 2
+sidecar_resource_requests:
+ memory: 1G
+ cpu: 0.5
+datum_set_spec:
+ number: 5
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_cron_daily_and_date_control.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_cron_daily_and_date_control.yaml
new file mode 100644
index 000000000..7e8b51f04
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_cron_daily_and_date_control.yaml
@@ -0,0 +1,59 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_cron_daily_and_date_control
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-date-cntl:v2.0.1
+ env:
+ # START_DATE ("YYYY-MM-DD") and END_DATE ("YYYY-MM-DD") indicate the max date range (inclusive) to create the /Y/M/D folder structure
+ # If START_DATE is not set (remove line entirely to unset), the start_date and/or the kafka_start_date for each site will be used, as indicated in the site-list json file
+ # start_date field in the site-list file is the earliest date to pull data from a site
+ # kafka_start_date in the site-list file is the first full day from which data began streaming via Kafka
+ # END_DATE can be set or unset (remove line entirely to unset). If unset, end date will be yesterday.
+ OUT_PATH: /pfs/out
+ START_DATE: "2025-12-14" # Inclusive
+ END_DATE: "2025-12-21" # Inclusive
+ SOURCE_TYPE_1: "rmyoung"
+ SOURCE_TYPE_2: "hmr3300"
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # rmyoung
+ export SOURCE_TYPE=$SOURCE_TYPE_1
+ python3 -m cron_daily_and_date_control.cron_daily_and_date_control_main
+
+ # hmr3300
+ export SOURCE_TYPE=$SOURCE_TYPE_2
+ python3 -m cron_daily_and_date_control.cron_daily_and_date_control_main
+
+ EOF
+input:
+ cross:
+ # This cron is the central driver for daily scheduled updates, such as data ingest and metadata loaders.
+ - cron:
+ name: tick
+ spec: "@never"
+ #spec: "0 0 * * *" # Run at 00:00 GMT
+ overwrite: true
+ - pfs:
+ name: SITE_FILE
+ repo: rmyoung_hmr3300_site_list
+ glob: /site-list.json
+resource_requests:
+ memory: 500M
+ cpu: 1
+resource_limits:
+ memory: 800M
+ cpu: 1.5
+sidecar_resource_requests:
+ memory: 500M
+ cpu: 0.5
+autoscaling: true
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_gcs.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_gcs.yaml
new file mode 100644
index 000000000..2e69fb66f
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_gcs.yaml
@@ -0,0 +1,76 @@
+---
+pipeline:
+ name: rmyoung_data_source_gcs
+transform:
+ image_pull_secrets:
+ - battelleecology-quay-read-all-pull-secret
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-l0-gcs-loader:v2.1.0
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-strict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # Get today's date for evaluating kafka data retention period
+ date_today=$(date -u +%Y-%m-%d)
+ kafka_min_date=$(date -u -d "$KAFKA_RETENTION_DAYS days ago" +%Y-%m-%d)
+
+ # Get data from GCS bucket
+ echo "Processing $import_trigger"
+ p=${import_trigger#/pfs}
+ IFS="/"; arr=($p); unset IFS;
+ source_type=${arr[2]}
+ year=${arr[3]}
+ month=${arr[4]}
+ day=${arr[5]}
+ if [ $(date -u +%s -d $year-$month-$day) -lt $(date -u +%s -d $kafka_min_date) ]
+ then
+ echo "Extracting $year-$month-$day for $source_type from GCS"
+ python3 -m l0_gcs_loader.l0_gcs_loader
+ else
+ echo "$year/$month/$day is within the Kafka retention period and should be loaded from Kafka. Skipping..."
+ fi
+
+ EOF
+ env:
+ LOG_LEVEL: INFO
+ OUT_PATH: /pfs/out
+ KAFKA_RETENTION_DAYS: "15"
+ BUCKET_VERSION_PATH: "v2" # The root path of the bucket, indicative of the version (e.g. v2)
+ SOURCE_TYPE_INDEX: "3"
+ YEAR_INDEX: "4"
+ MONTH_INDEX: "5"
+ DAY_INDEX: "6"
+ # BUCKET_NAME: neon-l0-ingest # Always pull from prod bucket
+ secrets:
+ - name: l0-bucket # Using this secret will use the dev/cert/prod bucket linked to the Pachyderm environment
+ env_var: BUCKET_NAME
+ key: LO_BUCKET
+
+input:
+ pfs:
+ name: import_trigger
+ repo: rmyoung_cron_daily_and_date_control
+ # Glob must be daily
+ glob: "/rmyoung/*/*/*"
+output_branch: master
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 400M
+ cpu: 0.5
+resource_limits:
+ memory: 800M
+ cpu: 1.5
+sidecar_resource_requests:
+ memory: 2.4G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_kafka.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_kafka.yaml
new file mode 100644
index 000000000..229f2bc9a
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_kafka.yaml
@@ -0,0 +1,166 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_data_source_kafka
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/bei/neon-avro-kafka-loader:v6.0.5
+ image_pull_secrets:
+ - battelleecology-quay-read-all-pull-secret
+ env:
+ OUT_PATH: /pfs/out
+ LOG_LEVEL: INFO
+ SOURCE_TYPE_INDEX: "4"
+ YEAR_INDEX: "5"
+ MONTH_INDEX: "6"
+ DAY_INDEX: "7"
+ KAFKA_RETENTION_DAYS: "15"
+ secrets:
+ - name: pachyderm-kafka-auth
+ env_var: KAFKA_USER
+ key: KAFKA_USER
+ - name: pachyderm-kafka-auth
+ env_var: KAFKA_PASSWORD
+ key: KAFKA_PASSWORD
+ - name: pachyderm-kafka-auth
+ env_var: KAFKA_BROKER
+ key: KAFKA_BROKER
+ - name: pachyderm-kafka-auth
+ env_var: KAFKA_LOG_TOPIC
+ key: KAFKA_LOG_TOPIC
+ - name: pachyderm-kafka-auth
+ env_var: KAFKA_COMPACT_TOPIC
+ key: KAFKA_COMPACT_TOPIC
+ - name: l0-bucket
+ env_var: BUCKET_NAME
+ key: LO_BUCKET
+ - name: pdr-secret
+ env_var: PDR_HOST
+ key: hostname
+ - name: pdr-secret
+ env_var: PDR_DBNAME
+ key: database
+ - name: pdr-secret
+ env_var: PDR_USER
+ key: username
+ - name: pdr-secret
+ env_var: PDR_PASSWORD
+ key: password
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+
+ # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # Get GCP zone
+ meta=$(curl -sH "Metadata-Flavor: Google" "http://metadata/computeMetadata/v1/instance/zone")
+ zone=$(echo $meta | cut -d "/" -f 4)
+ echo $zone
+
+ # Get today's date for evaluating kafka data retention period
+ date_today=$(date -u +%Y-%m-%d)
+ kafka_min_date=$(date -u -d "$KAFKA_RETENTION_DAYS days ago" +%Y-%m-%d)
+
+ # Get date from input path. Terminal path structure must be /SOURCE_TYPE/YYYY/MM/DD/SITE_FILE
+ # Datum must be set at /SOURCE_TYPE/YYYY/MM/DD or /SOURCE_TYPE/YYYY/MM/DD/SITE_FILE
+ source_type=$(echo $import_trigger | cut -f $SOURCE_TYPE_INDEX -d "/")
+ export SOURCE_TYPE=$source_type
+ date_path=$(echo $import_trigger | cut -f $YEAR_INDEX,$MONTH_INDEX,$DAY_INDEX -d "/")
+ echo $date_path
+ date_str=$(date -u +%Y-%m-%d -d $date_path)
+
+ # Get each site to run
+ if [[ -f ${import_trigger} ]]; then
+ import_trigger_glob="${import_trigger}"
+ else
+ import_trigger_glob="${import_trigger}/*"
+ fi
+
+ for site_kafka in $import_trigger_glob; do
+ site_file=$(basename $site_kafka) # Strip off any path prefix
+ site=$(echo $site_file | cut -f 1 -d "." --only-delimited) # Extract the site from site.kafka. Ignore site-only files (e.g. CPER vs. CPER.kafka)
+ type=$(echo $site_file | cut -f 2 -d "." --only-delimited) # Extract the 'kafka' from site.kafka
+ if [ "$type" != "kafka" ]
+ then
+ echo "$site_file is not indicated to be streaming from Kafka. Skipping..."
+ continue
+ elif [ "$(date -u +%s -d "$date_str")" -lt "$(date -u +%s -d "$kafka_min_date")" ]
+ then
+ echo -n "Cannot extract $date_str Kafka data for $site. "
+ echo -n "Today's date ($date_today) is beyond the Kafka retention period ($KAFKA_RETENTION_DAYS days). Skipping..."
+ continue
+ fi
+
+ # We are ok to run
+ echo "Extracting $date_str kafka data for $SOURCE_TYPE at $site"
+
+ # Get "current data" - data that came in on the specified day, which is the same day it was measured
+ # Note: We cannot use the --removeoffset flag on the kafka loader (which removes the offsets from the filenames. This will often violate the Pachyderm requirement that different datums cannot write the same file)
+ ./extract-kafka-sensor.py -s $site -S $SOURCE_TYPE -D "$OUT_PATH/$SOURCE_TYPE" -d $date_str --only current --consumer client.rack=$zone
+
+ # Get "non-current data" - data that came in on the specified day, which is NOT the same day it was measured
+ date_str_1=$(date +%Y-%m-%d -d "$date_str + 1 day")
+ ./extract-kafka-sensor.py -s $site -S $SOURCE_TYPE -D "$OUT_PATH/$SOURCE_TYPE" -d $date_str_1 --only noncurrent --consumer client.rack=$zone
+
+ done
+
+ # Upload L0 files to bucket, compacting with any existing file with the same name
+ if [[ -d "$OUT_PATH/$SOURCE_TYPE" ]]; then
+ linkdir=$(mktemp -d)
+ shopt -s globstar
+ out_parquet_glob="${OUT_PATH}/**/*.parquet"
+ # /pfs/out/rmyoung_hmr3300/2023/01/01/12345/data/file.parquet
+ echo "Linking output files to ${linkdir}"
+ # set -x # Uncomment for debugging
+ for f in $out_parquet_glob; do
+ # Parse the path
+ [[ "$f" =~ ^$OUT_PATH/(.*)/([0-9]+)/([0-9]+)/([0-9]+)/(.*)/data/(.*)$ ]]
+ fsourcetype="${BASH_REMATCH[1]}"
+ fyear="${BASH_REMATCH[2]}"
+ fmonth="${BASH_REMATCH[3]}"
+ fday="${BASH_REMATCH[4]}"
+ fsourceid="${BASH_REMATCH[5]}"
+ fname="${BASH_REMATCH[6]}"
+ # fname_out="${fsourcetype}_${fsourceid}_${fyear}-${fmonth}-${fday}.parquet" # Remove offsets from the filename
+ outdir="${linkdir}/v2/${fsourcetype}/ms=${fyear}-${fmonth}/source_id=${fsourceid}"
+ mkdir -p "${outdir}"
+ ln -s "${f}" "${outdir}/${fname}"
+
+ done
+
+ # Upload to bucket, compacting with any existing file
+ linkdir2=$(mktemp -d)
+ ./compact-bucket-copy.py --sourcepath "${linkdir}" -l "${linkdir2}" --stripoffset
+ ./compact-bucket-copy.py --sourcepath "${linkdir2}" --destbucket "${BUCKET_NAME}"
+
+ # set +x # Uncomment for debugging
+ rm -rf $linkdir
+ rm -rf "${linkdir2}"
+
+ fi
+ EOF
+input:
+ pfs:
+ name: import_trigger
+ repo: rmyoung_hmr3300_cron_daily_and_date_control
+ # Must be datum by day (e.g. /SOURCE_TYPE/*/*/*) or by day/site (e.g. /SOURCE_TYPE/*/*/*/*)
+ glob: "/*/*/*/*"
+parallelism_spec:
+ constant: 1
+autoscaling: true
+resource_requests:
+ memory: 300M
+ cpu: 1.6
+resource_limits:
+ memory: 1.5G
+ cpu: 2
+sidecar_resource_requests:
+ memory: 2G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung/rmyoung_data_source_trino.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_trino.yaml
similarity index 86%
rename from pipe/rmyoung/rmyoung_data_source_trino.yaml
rename to pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_trino.yaml
index 438128cea..3fb14fff7 100644
--- a/pipe/rmyoung/rmyoung_data_source_trino.yaml
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_data_source_trino.yaml
@@ -1,6 +1,6 @@
---
pipeline:
- name: rmyoung_data_source_trino
+ name: rmyoung_hmr3300_data_source_trino
transform:
image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-data-src-trino:v2.3.0
cmd:
@@ -26,6 +26,8 @@ transform:
echo "Processing $path"
p=${path#/pfs}
IFS="/"; arr=($p); unset IFS;
+ source_type=${arr[2]}
+ export SOURCE_TYPE=$source_type
year=${arr[3]}
month=${arr[4]}
day=${arr[5]}
@@ -40,11 +42,15 @@ transform:
echo "$year/$month/$day/$site is indicated to be streaming from Kafka. Skipping..."
continue
fi
- echo "Extracting data from Trino for $year/$month/$day/$site"
+
+ # Set env vars for trino loader
export GEN_DATE=$year-$month-$day
export GEN_SITE_NAME=$site
- export GEN_OUTPUT_DIR=$interimDir/$SOURCE_TYPE/$year/$month/$day
export REQUESTS_CA_BUNDLE=/etc/pki/tls/cert.pem
+ export GEN_YAML_CONF="/usr/src/app/genscript/configs/$(echo $SOURCE_TYPE)_streams.yaml"
+ export GEN_SCHEMA_FILE="/usr/src/app/schemas/buoy/$(echo $SOURCE_TYPE).avsc"
+ echo "Extracting $SOURCE_TYPE from Trino for $year/$month/$day/$site"
+ export GEN_OUTPUT_DIR=$interimDir/$SOURCE_TYPE/$year/$month/$day
mkdir -p $GEN_OUTPUT_DIR
/usr/src/app/genscript/genparquet.py --storesitename --codec gzip
done
@@ -59,7 +65,7 @@ transform:
out_parquet_glob="${OUT_PATH}/**/*.parquet"
# Example: /pfs/out/li191r/2023/01/01/12345/data/file.parquet
echo "Linking output files to ${linkdir}"
- # set -x # Uncomment for troubleshooting
+ # set -x # Uncomment for debugging
for f in $out_parquet_glob; do
# Parse the path
[[ "$f" =~ ^$OUT_PATH/(.*)/([0-9]+)/([0-9]+)/([0-9]+)/(.*)/data/(.*)$ ]]
@@ -88,13 +94,11 @@ transform:
echo "Removing temporary files"
rm -rf $linkdir
- # set +x # Uncomment for troubleshooting
+ # set +x # Uncomment for debugging
fi
EOF
env:
- # Environment variables for data conversion step
- GEN_YAML_CONF: "/usr/src/app/genscript/configs/rmyoung_streams.yaml"
- GEN_SCHEMA_FILE: "/usr/src/app/schemas/buoy/rmyoung.avsc"
+ # Static environment variables for data conversion step
LOG_LEVEL: INFO
REQUESTS_CA_BUNDLE: "/etc/pki/tls/cert.pem"
# Environment variables for linkmerge step
@@ -106,8 +110,6 @@ transform:
DAY_INDEX: '6'
SOURCE_ID_INDEX: '7'
KAFKA_RETENTION_DAYS: "15"
- # Environment variables for bash code
- SOURCE_TYPE: 'rmyoung'
secrets:
- name: pachd-trino-secret
key: TRINO_HOST
@@ -124,24 +126,21 @@ transform:
input:
pfs:
name: import_trigger
- repo: rmyoung_cron_daily_and_date_control
- glob: "/rmyoung/*/*/*"
+ repo: rmyoung_hmr3300_cron_daily_and_date_control
+ glob: "/*/*/*/*"
output_branch: master
parallelism_spec:
- constant: 2
+ constant: 5
autoscaling: true
resource_requests:
- memory: 300M
- cpu: 0.8
+ memory: 400M
+ cpu: 1.2
resource_limits:
- memory: 600M
- cpu: 1.5
+ memory: 800M
+ cpu: 2
sidecar_resource_requests:
- memory: 1G
- cpu: 0.5
-sidecar_resource_limits:
memory: 3G
- cpu: 1.2
+ cpu: 0.5
datum_set_spec:
number: 1
scheduling_spec:
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_fill_date_gaps.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_fill_date_gaps.yaml
new file mode 100644
index 000000000..b097445a2
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_fill_date_gaps.yaml
@@ -0,0 +1,138 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_fill_date_gaps
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-gap-fill-nonrglr:v1.0.3
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-strict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # Refresh interim directories with each datum (otherwise they persist and cause probs)
+ rm -rf $OUT_PATH
+ mkdir -p $OUT_PATH
+
+ # Get source type /SOURCE_TYPE/YYYY/MM/DD
+ if [ ${LOCATION_PATH+x} ]; then
+ source_type=$(echo $LOCATION_PATH | cut -f 4 -d "/")
+ echo "Source type: $source_type"
+ else
+ echo "LOCATION_PATH does not exist (no locations active for datum $DATA_PATH). Skipping..."
+ exit 0
+ fi
+
+ # Get empty file path based on source type
+ if [ $source_type = "rmyoung" ]; then
+ export EMPTY_FILE_PATH="/pfs/EMPTY_FILES/rmyoung"
+ elif [ $source_type = "hmr3300" ]; then
+ export EMPTY_FILE_PATH="/pfs/EMPTY_FILES/hmr3300"
+ else
+ echo "No source type determined. Check inputs."
+ exit 1
+ fi
+
+ # Run first module - date-gap-filler (using environment variables below as input parameters)
+ python3 -m date_gap_filler.date_gap_filler_main
+
+ #run gap filler for nonregularized data
+ if [ $source_type = "rmyoung" ]; then
+ Rscript ./flow.gap.fill.nonrglr.R \
+ DirIn=/tmp/pfs/date_filled \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums \
+ "DirFill=data|uncertainty_data|flags" \
+ WndwFill="01" \
+ "DirSubCopy=location|uncertainty_coef" \
+ "FileSchm=data:$SCHEMA_DATA_RMYOUNG|flags:$SCHEMA_FLAGS_RMYOUNG"
+ elif [ $source_type = "hmr3300" ]; then
+ Rscript ./flow.gap.fill.nonrglr.R \
+ DirIn=/tmp/pfs/date_filled \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums \
+ "DirFill=data" \
+ WndwFill="01" \
+ "DirSubCopy=location" \
+ "FileSchm=data:$SCHEMA_DATA_HMR3300"
+ EOF
+ env:
+ # Environment variables for date gap filler
+ LOG_LEVEL: INFO
+ OUT_PATH: /tmp/pfs/date_filled
+ OUTPUT_DIRECTORIES: data,location,uncertainty_data,uncertainty_coef,flags
+ DATA_SOURCE_TYPE_INDEX: '3'
+ DATA_YEAR_INDEX: '4'
+ DATA_MONTH_INDEX: '5'
+ DATA_DAY_INDEX: '6'
+ DATA_LOCATION_INDEX: '7'
+ DATA_TYPE_INDEX: '8'
+ LOCATION_SOURCE_TYPE_INDEX: '3'
+ LOCATION_YEAR_INDEX: '4'
+ LOCATION_MONTH_INDEX: '5'
+ LOCATION_DAY_INDEX: '6'
+ LOCATION_INDEX: '7'
+ EMPTY_FILE_TYPE_INDEX: '4'
+ LINK_TYPE: COPY # options are COPY or SYMLINK. Use COPY for combined modules.
+ # Environment variables for regularizer
+ PARALLELIZATION_INTERNAL: '3' # Parallelization within R. If increased, adjust resource requests appropriately.
+input:
+ cross:
+ - pfs:
+ name: EMPTY_FILES
+ repo: rmyoung_hmr3300_empty_files
+ glob: /
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: SCHEMA_DATA_RMYOUNG
+ repo: rmyoung_hmr3300_avro_schemas
+ glob: /rmyoung/rmyoung_calibrated.avsc
+ - pfs:
+ name: SCHEMA_FLAGS_RMYOUNG
+ repo: rmyoung_hmr3300_avro_schemas
+ glob: /rmyoung/rmyoung_calibration_flags.avsc
+ - pfs:
+ name: SCHEMA_DATA_HMR3300
+ repo: rmyoung_hmr3300_avro_schemas
+ glob: /hmr3300/hmr3300_l0.avsc
+ - group:
+ - pfs:
+ name: DATA_PATH
+ repo: rmyoung_hmr3300_location_group_and_restructure
+ glob: /(*/*/*/*)
+ group_by: $1
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - join:
+ - pfs:
+ name: LOCATION_PATH
+ repo: rmyoung_hmr3300_location_active_dates_assignment
+ glob: /(*/*/*/*)
+ joinOn: $1
+ group_by: $1
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: DATE_LIMITER_PATH
+ repo: rmyoung_hmr3300_cron_daily_and_date_control
+ glob: /(*/*/*/*)
+ joinOn: $1
+ group_by: $1
+ empty_files: true # This can remain true even if LINK_TYPE=COPY
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 2G
+ cpu: 3.3
+resource_limits:
+ memory: 3G
+ cpu: 4.5
+sidecar_resource_requests:
+ memory: 3G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_active_dates_assignment.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_active_dates_assignment.yaml
new file mode 100644
index 000000000..12cc52792
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_active_dates_assignment.yaml
@@ -0,0 +1,48 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_location_active_dates_assignment
+transform:
+ cmd: ["/bin/bash"]
+ stdin:
+ - "#!/bin/bash"
+ - export ERR_PATH="/pfs/out/errored_datums$FILE_YEAR"
+ - Rscript
+ ./flow.loc.grp.asgn.R
+ DirIn=$DIR_IN
+ DirOut=/pfs/out
+ DirErr=$ERR_PATH
+ FileYear=$FILE_YEAR
+ TypeFile=namedLocation
+ "Prop=HOR|VER|name|description|site|Data Rate|active_periods"
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-grp-asgn:v1.3.2
+ # image_pull_secrets:
+ # - battelleecology-quay-read-all-pull-secret
+ env:
+ LOG_LEVEL: INFO
+input:
+ cross:
+ - pfs:
+ name: DIR_IN
+ repo: rmyoung_hmr3300_location_loader
+ glob: /*/*
+ - pfs:
+ name: FILE_YEAR
+ repo: rmyoung_hmr3300_cron_daily_and_date_control
+ glob: /data_year*.txt
+parallelism_spec:
+ constant: 2
+autoscaling: true
+resource_requests:
+ memory: 210M
+ cpu: 1.2
+resource_limits:
+ memory: 500M
+ cpu: 1.6
+sidecar_resource_requests:
+ memory: 2G
+ cpu: 0.3
+datum_set_spec:
+ number: 5
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_asset.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_asset.yaml
new file mode 100644
index 000000000..43c3125e8
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_asset.yaml
@@ -0,0 +1,41 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_location_asset
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-asset-loader:v1.1.0
+ cmd:
+ - /bin/bash
+ stdin:
+ - '#!/bin/bash'
+ - export SOURCE_TYPE=$SOURCE_TYPE_1
+ - python3 -m location_asset_loader.location_asset_loader_main
+ - export SOURCE_TYPE=$SOURCE_TYPE_2
+ - python3 -m location_asset_loader.location_asset_loader_main
+ env:
+ OUT_PATH: /pfs/out
+ # ERR_PATH can be changed, it is user specified
+ ERR_PATH: /pfs/out/errored_datums
+ LOG_LEVEL: INFO
+ SOURCE_TYPE_1: rmyoung
+ SOURCE_TYPE_2: hmr3300
+ secrets:
+ - name: pdr-secret
+ mount_path: /var/db_secret
+input:
+ pfs:
+ repo: rmyoung_hmr3300_cron_daily_and_date_control_tick
+ glob: /*
+ empty_files: true
+autoscaling: true
+resource_requests:
+ memory: 100M
+ cpu: 0.15
+resource_limits:
+ memory: 300M
+ cpu: 0.5
+sidecar_resource_requests:
+ memory: 500M
+ cpu: 0.2
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
\ No newline at end of file
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_asset_assignment.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_asset_assignment.yaml
new file mode 100644
index 000000000..949744519
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_asset_assignment.yaml
@@ -0,0 +1,48 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_location_asset_assignment
+transform:
+ cmd: ["/bin/bash"]
+ stdin:
+ - "#!/bin/bash"
+ - export ERR_PATH="/pfs/out/errored_datums$FILE_YEAR"
+ - Rscript
+ ./flow.loc.grp.asgn.R
+ DirIn=$DIR_IN
+ DirOut=/pfs/out
+ DirErr=$ERR_PATH
+ FileYear=$FILE_YEAR
+ TypeFile=asset
+ "Prop=HOR|VER|install_date|remove_date|name|site|Data Rate"
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-grp-asgn:v1.3.2
+ # image_pull_secrets:
+ # - battelleecology-quay-read-all-pull-secret
+ env:
+ LOG_LEVEL: INFO
+input:
+ cross:
+ - pfs:
+ name: DIR_IN
+ repo: rmyoung_hmr3300_location_asset
+ glob: /*/*
+ - pfs:
+ name: FILE_YEAR
+ repo: rmyoung_hmr3300_cron_daily_and_date_control
+ glob: /data_year*.txt
+parallelism_spec:
+ constant: 2
+autoscaling: true
+resource_requests:
+ memory: 400M
+ cpu: 1.5
+resource_limits:
+ memory: 800M
+ cpu: 2
+sidecar_resource_requests:
+ memory: 2G
+ cpu: 0.3
+datum_set_spec:
+ number: 5
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_group_and_restructure.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_group_and_restructure.yaml
new file mode 100644
index 000000000..a2f2ab65a
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_group_and_restructure.yaml
@@ -0,0 +1,102 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_location_group_and_restructure
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-grp-strc-comb:v1.2.3
+ cmd:
+ - sh
+ - "-c"
+ - |-
+ /bin/bash <<'EOF'
+ # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/
+ set -euo pipefail
+ IFS=$'\n\t'
+
+ # Refresh interim directories with each datum (otherwise they persist and cause probs)
+ rm -rf /tmp/pfs/filter_joined
+ rm -rf /tmp/pfs/structured
+ rm -rf /tmp/pfs/structuredCopy
+ mkdir -p /tmp/pfs/filter_joined
+
+ # Run first module - filter-joiner (using environment variables below as input parameters)
+ python3 -m filter_joiner.filter_joiner_main
+
+ # Run second module - structure repo by location
+ Rscript ./flow.loc.repo.strc.R \
+ DirIn=/tmp/pfs/filter_joined \
+ DirOut=/tmp/pfs/structured \
+ DirErr=/pfs/out/errored_datums \
+ Comb=TRUE
+
+ # Copy output to another interim folder to destroy links (cannot daisy chain links from pfs input to output)
+ cp -rL /tmp/pfs/structured /tmp/pfs/structuredCopy || : # Allow to fail without exit code (happens if step above produced no output)
+ rm -rf /tmp/pfs/filter_joined
+ rm -rf /tmp/pfs/structured
+
+ # Run third module - merge data by location
+ Rscript ./flow.loc.data.trnc.comb.R \
+ DirIn=/tmp/pfs/structuredCopy \
+ DirOut=/pfs/out \
+ DirErr=/pfs/out/errored_datums \
+ "DirSubCombData=data|flags|uncertainty_data" \
+ DirSubCombUcrt=uncertainty_coef \
+ DirSubCopy=location
+ EOF
+ env:
+ # Environment variables for filter-joiner
+ CONFIG: |
+ ---
+ # In Pachyderm root will be index 0, 'pfs' index 1, and the repo name index 2.
+ # Metadata indices will typically begin at index 3.
+ input_paths:
+ - path:
+ name: DATA_PATH
+ # Filter for data directory
+ glob_pattern: /pfs/DATA_PATH/*/*/*/*/*/**
+ # Join on named location (already joined below by day)
+ join_indices: [7]
+ outer_join: true
+ - path:
+ name: LOCATION_PATH
+ # Filter for data directory
+ glob_pattern: /pfs/LOCATION_PATH/*/*/*/*/*/**
+ # Join on named location (already joined below by day)
+ join_indices: [7]
+ OUT_PATH: /tmp/pfs/filter_joined
+ LOG_LEVEL: INFO
+ RELATIVE_PATH_INDEX: "3"
+ LINK_TYPE: COPY # options are COPY or SYMLINK. Use COPY for combined module.
+ # Environment variables for R modules
+ PARALLELIZATION_INTERNAL: '3'
+input:
+ join:
+ - pfs:
+ name: DATA_PATH
+ repo: rmyoung_hmr3300_calibration_group_and_convert
+ glob: /(*)/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ outer_join: true
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+ - pfs:
+ name: LOCATION_PATH
+ repo: rmyoung_hmr3300_location_asset_assignment
+ glob: /(*)/(*)/(*)/(*)
+ joinOn: $1/$2/$3
+ empty_files: false # Make sure to use false if LINK_TYPE=COPY. Can also be set to false for LINK_TYPE=SYMLINK.
+parallelism_spec:
+ constant: 5
+autoscaling: true
+resource_requests:
+ memory: 2.2G
+ cpu: 3.3
+resource_limits:
+ memory: 4G
+ cpu: 4.5
+sidecar_resource_requests:
+ memory: 3G
+ cpu: 0.5
+datum_set_spec:
+ number: 1
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_loader.yaml b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_loader.yaml
new file mode 100644
index 000000000..0b00fc0b1
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/rmyoung_hmr3300_location_loader.yaml
@@ -0,0 +1,42 @@
+---
+pipeline:
+ name: rmyoung_hmr3300_location_loader
+transform:
+ image: us-central1-docker.pkg.dev/neon-shared-service/neonscience/neon-is-loc-loader:v1.0.0
+ cmd:
+ - /bin/bash
+ stdin:
+ - '#!/bin/bash'
+ - export SOURCE_TYPE=$SOURCE_TYPE_1
+ - python3 -m location_loader.location_loader_main
+ - export SOURCE_TYPE=$SOURCE_TYPE_2
+ - python3 -m location_loader.location_loader_main
+ env:
+ LOCATION_TYPE: CONFIG
+ SOURCE_TYPE_1: rmyoung
+ SOURCE_TYPE_2: hmr3300
+ OUT_PATH: /pfs/out
+ # ERR_PATH can be changed, it is user specified
+ ERR_PATH: /pfs/out/errored_datums
+ LOG_LEVEL: INFO
+ secrets:
+ - name: pdr-secret
+ mount_path: /var/db_secret
+input:
+ pfs:
+ repo: rmyoung_hmr3300_cron_daily_and_date_control_tick
+ glob: /*
+ empty_files: true
+autoscaling: true
+resource_requests:
+ memory: 100M
+ cpu: 0.1
+resource_limits:
+ memory: 300M
+ cpu: 0.5
+sidecar_resource_requests:
+ memory: 300M
+ cpu: 0.3
+scheduling_spec:
+ node_selector:
+ cloud.google.com/compute-class: pach-pipeline-class
diff --git a/pipe/rmyoung_hmr3300/site-list.json b/pipe/rmyoung_hmr3300/site-list.json
new file mode 100644
index 000000000..76b9296da
--- /dev/null
+++ b/pipe/rmyoung_hmr3300/site-list.json
@@ -0,0 +1,10 @@
+[
+ {
+ "site" : "BARC",
+ "kafka_start_date" : "2024-08-11"
+ },
+ {
+ "site" : "CRAM",
+ "kafka_start_date" : "2022-12-30"
+ }
+]
\ No newline at end of file
diff --git a/utilities/R_coding/flow.pack.is.proc.R b/utilities/R_coding/flow.pack.is.proc.R
index 7275eb846..7e2d83786 100644
--- a/utilities/R_coding/flow.pack.is.proc.R
+++ b/utilities/R_coding/flow.pack.is.proc.R
@@ -17,7 +17,7 @@ DirWrk00 <-
# Ed
#"/home/NEON/ayres/R/NEON-IS-data-processing"
# Nora
- #"/home/NEON/ncatolico/R/NEON-IS-data-processing"
+ "/home/NEON/ncatolico/R/NEON-IS-data-processing"
#Dave
#"~/code/NEON-IS-data-processing-ddurden"