Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/html5-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,14 @@ jobs:
- name: Install tidy and pandoc
run: sudo apt install tidy pandoc

- name: Remove cached R libraries
run: rm -rf /home/runner/work/_temp/Library/data.table

- name: Install dependencies
run: R -e 'install.packages(c("knitr", "rmarkdown", "XML", "httr", "data.table", "maps", "dplyr", "tidyr", "xml2", "testthat", "archive"))'
run: R -e 'install.packages(c("knitr", "rmarkdown", "XML", "httr", "maps", "dplyr", "tidyr", "xml2", "testthat", "archive"))'

- name: Install data.table from source
run: Rscript -e 'install.packages("data.table", type = "source")'

- uses: r-lib/actions/check-r-package@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: climate
Title: Interface to Download Meteorological (and Hydrological) Datasets
Version: 1.2.9
Version: 1.3.0
Authors@R: c(person(given = "Bartosz",
family = "Czernecki",
role = c("aut", "cre"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(.onAttach)
export(find_all_station_names)
export(hydro_imgw)
export(hydro_imgw_daily)
export(hydro_imgw_datastore)
Expand Down
13 changes: 13 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# climate 1.3.0

* adapting code to most recent changes in the IMGW-PIB repository:
* fixes for `meteo_imgw_monthly()`
* changes in metadata and downloading functions for hydrological datasets
* updated Polish vignette for hydro dataset
* added `find_all_station_names()` function to find all station names in the IMGW-PIB
repository that match the provided name(s) and return all available datasets for
those stations (e.g.entry "WARSZAWA" will also search for: "WARSZAWA-BIELANY",
"WARSZAWA-OKECIE", "WARSZAWA-OBSERWATORIUM", etc.)



# climate 1.2.9

* fixes for corrupted header files in `meteo_imgw_` family of functions due to changes in the IMGW-PIB repository
Expand Down
24 changes: 9 additions & 15 deletions R/clean_metadata_hydro.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,15 @@
#' @noRd

clean_metadata_hydro = function(address, interval) {
temp = tempfile()

temp = tempfile()
test_url(link = address, output = temp)
a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE,
fileEncoding = "CP1250", skip = 1, sep = "\t")$V1
a = gsub(a, pattern = "\\?", replacement = "")
a = gsub(x = a, pattern = "'", replacement = "")
a = trimws(gsub(x = a, pattern = "\\^", replacement = ""))
a = gsub(a, pattern = "\\s+", replacement = " ")

if (interval == "monthly") {
b = list(data.frame(parameters = a[1:10]))
}
if (interval == "daily") {
b = data.frame(parameters = a[1:10])
}
return(b)
a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE)$V1

inds = grepl("^[A-Z]{2}.{5}", a)

code = trimws(substr(a, 1, 7))[inds]
name = trimws(substr(a, 10, nchar(a)))[inds]
a = data.frame(parameters = code, label = name)
return(a)
}
40 changes: 11 additions & 29 deletions R/hydro_imgw_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ hydro_imgw_daily_bp = function(year,
unzip(zipfile = temp, exdir = temp2)
file2 = paste(temp2, dir(temp2), sep = "/")[1]
data2 = imgw_read(translit, file2)
colnames(data2) = meta[[2]][, 1]
colnames(data2) = gsub(x = meta[[2]][, 1], "^ZJ", "CO") # rename colnames starting with ^ZJ to be changed to ^CO:
zjaw_data = rbind(zjaw_data, data2)
}

Expand All @@ -140,10 +140,9 @@ hydro_imgw_daily_bp = function(year,
by = intersect(colnames(codz_data), colnames(zjaw_data)),
all.x = TRUE)

# station selection and names cleaning:
if (!is.null(station)) {
if (is.character(station)) {
inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data[[length(all_data)]]$`Nazwa stacji`))))))
inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data[[length(all_data)]]$PSNZWP))))))
if (any(is.na(inds)) || length(inds) == 0) {
env$logs = c(
env$logs,
Expand All @@ -166,39 +165,22 @@ hydro_imgw_daily_bp = function(year,
if (coords) {
all_data = merge(climate::imgw_hydro_stations, all_data,
by.x = "id",
by.y = "Kod stacji",
by.y = "PSKDSZS",
all.y = TRUE)
}

#station selection
if (!is.null(station)) {
if (is.character(station)) {
all_data = all_data[substr(all_data$`Nazwa stacji`, 1, nchar(station)) == station, ]
if (nrow(all_data) == 0) {
stop("Selected station(s) is not available in the database.", call. = FALSE)
}
} else if (is.numeric(station)) {
all_data = all_data[all_data$`Kod stacji` %in% station, ]
if (nrow(all_data) == 0) {
stop("Selected station(s) is not available in the database.", call. = FALSE)
}
} else {
stop("Selected station(s) are not in the proper format.", call. = FALSE)
}
}

all_data = as.data.frame(all_data)
all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydro|w roku hydro|Dzie")]), ]
all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "PSNZWP|COROKH|COMSCH|CODZIEN")]), ]
# fix dates and add as seperate column:
yy_ind = grep(x = colnames(all_data), "Rok hydrologiczny")
mm_ind = grep(x = colnames(all_data), "kalendarzowy")
dd_ind = grep(x = colnames(all_data), "Dzie")
yy_ind = grep(x = colnames(all_data), "COROKH")
mm_ind = grep(x = colnames(all_data), "COMSCK")
dd_ind = grep(x = colnames(all_data), "CODZIEN")
data_df = all_data[, c(yy_ind, mm_ind, dd_ind)]
data_df$yy = ifelse(data_df[, 2] >= 11, data_df[, 1] - 1, data_df[, 1])
all_data$Data = as.Date(ISOdate(year = data_df$yy, month = data_df[, 2], day = data_df[, 3]))
all_data = all_data[, c(1:3, ncol(all_data), 4:(ncol(all_data) - 1)), ]

all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)

#all_data = all_data[, c(1:3, ncol(all_data), 4:(ncol(all_data) - 1)), ]
#all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)
all_data = unique(all_data)
rownames(all_data) = 1:nrow(all_data)
return(all_data)
}
20 changes: 10 additions & 10 deletions R/hydro_imgw_monthly.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
hydro_imgw_monthly = function(year,
coords = FALSE,
station = NULL,
col_names= "short",
col_names = "short",
allow_failure = TRUE,
...) {

Expand Down Expand Up @@ -86,46 +86,46 @@ hydro_imgw_monthly_bp = function(year,
unzip(zipfile = temp, exdir = temp2)
file1 = paste(temp2, dir(temp2), sep = "/")[1]
data1 = imgw_read(translit, file1)
colnames(data1) = meta[[1]][, 1]
colnames(data1) = meta[, 1]
all_data[[i]] = data1
}
all_data = do.call(rbind, all_data)

all_data[all_data == 9999] = NA
all_data[all_data == 99999.999] = NA
all_data[all_data == 99.9] = NA
colnames(all_data) = meta[[1]][, 1]
colnames(all_data) = meta[, 1]
# coords
if (coords) {
all_data = merge(climate::imgw_hydro_stations, all_data, by.x = "id", by.y = "Kod stacji", all.y = TRUE)
all_data = merge(climate::imgw_hydro_stations, all_data, by.x = "id", by.y = "PSKDSZS", all.y = TRUE)
}
#station selection
if (!is.null(station)) {
if (is.character(station)) {
all_data = all_data[substr(all_data$`Nazwa stacji`, 1, nchar(station)) == station, ]
all_data = all_data[substr(all_data$PSNZWP, 1, nchar(station)) == station, ]
if (nrow(all_data) == 0) {
stop("Selected station(s) is not available in the database.", call. = FALSE)
}
} else if (is.numeric(station)) {
all_data = all_data[all_data$`Kod stacji` %in% station, ]
all_data = all_data[all_data$PSKDSZS %in% station, ]
if (nrow(all_data) == 0) {
stop("Selected station(s) is not available in the database.", call. = FALSE)
}
} else {
stop("Selected station(s) are not in the proper format.", call. = FALSE)
}
}
all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydrologiczny|w roku hydro")]), ]
all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "PSNZWP|MCROKH")]), ]
# fix dates and add as seperate column:
yy_ind = grep(x = colnames(all_data), "Rok hydrologiczny")
mm_ind = grep(x = colnames(all_data), "kalendarzowy")
yy_ind = grep(x = colnames(all_data), "MCROKH")
mm_ind = grep(x = colnames(all_data), "MCMSCK")
data_df = all_data[, c(yy_ind, mm_ind)]
data_df$day = 1
data_df$yy = ifelse(data_df[, 2] >= 11, data_df[, 1] - 1, data_df[, 1])
all_data$Data = as.Date(ISOdate(year = data_df$yy, month = data_df[, 2], day = data_df$day))
all_data = all_data[, c(1:3, ncol(all_data), 4:(ncol(all_data) - 1)), ]

all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)
#all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...)

return(all_data)
}
6 changes: 3 additions & 3 deletions R/hydro_metadata_imgw.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ hydro_metadata_imgw_bp = function(interval) {

if (interval == "daily") {
# dobowe
address_meta1 = paste0(base_url, "dobowe/codz_info.txt")
address_meta2 = paste0(base_url, "dobowe/zjaw_info.txt")
address_meta1 = paste0(base_url, "dobowe/CODZ_publiczne_format.txt")
address_meta2 = paste0(base_url, "dobowe/ZJAW_publiczne_format.txt")
meta = list(clean_metadata_hydro(address_meta1, interval),
clean_metadata_hydro(address_meta2, interval))
} else if (interval == "monthly") {
#miesieczne
address_meta = paste0(base_url, "miesieczne/mies_info.txt")
address_meta = paste0(base_url, "miesieczne/MIES_publiczne_format.txt")
meta = clean_metadata_hydro(address_meta, interval)
} else {
stop("Wrong `interval` value. It should be either 'daily' or 'monthly'.")
Expand Down
2 changes: 1 addition & 1 deletion R/match_imgw_wmoid_inds.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' Match WMO station IDs for IMGW SYNOP
#' @param station vector or station names provided to imgw_meteo_ family of functions
#' @param station vector or station names provided to meteo_imgw_ family of functions
#' @keywords internal
#' @noRd
match_imgw_wmoid_inds = function(station) {
Expand Down
7 changes: 4 additions & 3 deletions R/meteo_imgw.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
#' @param col_names three types of column names possible: "short" - default, values with shorten names,
#' "full" - full English description, "polish" - original names in the dataset
#' @param ... other parameters that may be passed to the 'shortening' function that shortens column names
#' @param station vector of hydrological stations danepubliczne.imgw.pl
#' can be name of station CAPITAL LETTERS(character).
#' It accepts names (characters in CAPITAL LETTERS) or stations' IDs (numeric)
#' @param station name of meteorological station(s).
#' It accepts names (characters in CAPITAL LETTERS). Stations' IDs (numeric) are no longer supported.
#' Please note that station names may change over time and thus sometimes 2 names
#' are required in some cases, e.g. `c("POZNAŃ", "POZNAŃ-ŁAWICA")`.
#' @export
#' @return A data.frame with columns describing the meteorological parameters
#' (e.g. temperature, wind speed, precipitation) where each row represent a measurement,
Expand Down
1 change: 1 addition & 0 deletions R/meteo_imgw_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ meteo_imgw_daily_bp = function(rank,

# match WMO ID of a given station(s) to download selectively for SYNOP stations
if (!is.null(station) && rank == "synop") {
station = find_all_station_names(station)
ids_to_download = match_imgw_wmoid_inds(station)
} else {
ids_to_download = NULL
Expand Down
4 changes: 2 additions & 2 deletions R/meteo_imgw_hourly.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
#' @param status leave the columns with measurement and observation statuses
#' (default status = FALSE - i.e. the status columns are deleted)
#' @param coords add coordinates of the station (logical value TRUE or FALSE)
#' @param station name or ID of meteorological station(s).
#' It accepts names (characters in CAPITAL LETTERS) or stations' IDs (numeric)
#' @param station name of meteorological station(s) (character vector)
#' @param col_names three types of column names possible: "short" - default,
#' values with shorten names, "full" - full English description,
#' "polish" - original names in the dataset
Expand Down Expand Up @@ -250,6 +249,7 @@ meteo_imgw_hourly_bp = function(rank,
# station selection and names cleaning:
if (!is.null(station)) {
if (is.character(station)) {
find_all_station_names(station)
inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST))))))
if (any(is.na(inds)) || length(inds) == 0) {
env$logs = c(
Expand Down
27 changes: 13 additions & 14 deletions R/meteo_imgw_monthly.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#' @param status leave the columns with measurement and observation statuses
#' (default status = FALSE - i.e. the status columns are deleted)
#' @param coords add coordinates of the station (logical value TRUE or FALSE)
#' @param station name or ID of meteorological station(s).
#' It accepts names (characters in CAPITAL LETTERS) or stations' IDs (numeric).
#' @param station name of meteorological station(s).
#' It accepts names (characters in CAPITAL LETTERS). Stations' IDs (numeric) are no longer supported.
#' Please note that station names may change over time and thus sometimes 2 names
#' are required in some cases, e.g. `c("POZNAŃ", "POZNAŃ-ŁAWICA")`.
#' @param col_names three types of column names possible: "short" - default,
Expand Down Expand Up @@ -39,13 +39,13 @@
#' }
#'
meteo_imgw_monthly = function(rank = "synop",
year,
status = FALSE,
coords = FALSE,
station = NULL,
col_names = "short",
allow_failure = TRUE,
...) {
year,
status = FALSE,
coords = FALSE,
station = NULL,
col_names = "short",
allow_failure = TRUE,
...) {
if (allow_failure) {
tryCatch(
meteo_imgw_monthly_bp(
Expand Down Expand Up @@ -113,7 +113,7 @@ meteo_imgw_monthly_bp = function(rank,
ind = lapply(years_in_catalogs, function(x) sum(x %in% year) > 0)
catalogs = catalogs[unlist(ind)]

all_data = vector("list", length = length(catalogs))
all_data = NULL

for (i in seq_along(catalogs)) {
catalog = gsub(catalogs[i], pattern = "/", replacement = "")
Expand Down Expand Up @@ -155,8 +155,8 @@ meteo_imgw_monthly_bp = function(rank,
if (file.exists(file2)) {
data2 = imgw_read(translit, file2)
colnames(data2) = meta[[2]]$parameters
for (labs in seq_along(meta[[1]]$parameters)) {
attr(data2[[labs]], "label") = meta[[1]]$label[[labs]]
for (labs in seq_along(meta[[2]]$parameters)) {
attr(data2[[labs]], "label") = meta[[2]]$label[[labs]]
}
data2$POST = trimws(data2$POST)
data.table::setDT(data2)
Expand Down Expand Up @@ -248,7 +248,6 @@ meteo_imgw_monthly_bp = function(rank,
# adding option to shorten columns and removing duplicates:
# TODO: turned off temporarily, consistent with daily implementation
# all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...)
rownames(all_data) = NULL

rownames(all_data) = 1:nrow(all_data)
return(all_data) # clipping to selected years only
}
33 changes: 33 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,36 @@ remove_status = function(df) {

return(df)
}



#' Find all variants of station' names
#'
#' For IMGW-PIB stations different naming were used historically. For example,
#' `POZNAŃ` and ``POZNAŃ-ŁAWICA, thus both names should be used when searching
#'for the station. This function finds all variants of station' names
#' status information and expand the created object
#'
#' @param station_name character vector of station names
#' @export
#' @returns character vector of station names with all variants of station's names
#' @examples {
#' find_all_station_names(c("WARSZAWA", "POZNAŃ"))
#' }

find_all_station_names = function(station_name) {

pattern = paste0("(?=.*", toupper(station_name), ")(?=.*-)")
matches = unlist(
sapply(pattern, function(x) {
grep(x, climate::imgw_meteo_stations$station,
perl = TRUE,
ignore.case = TRUE,
value = TRUE)
}
)
)
names(matches) = NULL

return(sort(unique(c(station_name, matches))))
}
Loading