From 63de55a962c7b4df56c841a101a4842a93552f96 Mon Sep 17 00:00:00 2001 From: hillarymarler <152432687+hillarymarler@users.noreply.github.com> Date: Thu, 30 Apr 2026 07:37:10 -0400 Subject: [PATCH 1/4] Update pkgdown.yaml --- .github/workflows/pkgdown.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index c59d5a7..ba2181f 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -22,7 +22,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: r-lib/actions/setup-pandoc@v2 @@ -41,7 +41,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.5.0 + uses: JamesIves/github-pages-deploy-action@v4.8.0 with: clean: false branch: gh-pages From a7b5c4f464b884f3ec05d0d630c37f3c405a0158 Mon Sep 17 00:00:00 2001 From: hillarymarler <152432687+hillarymarler@users.noreply.github.com> Date: Thu, 30 Apr 2026 08:53:45 -0400 Subject: [PATCH 2/4] precompute getting started vignette --- vignettes/GettingStartedWithrExpertQuery.Rmd | 351 +- .../GettingStartedWithrExpertQuery.Rmd.orig | 509 + vignettes/rExpertQueryCybertownTraining.html | 8251 ----------------- .../MathJax.js.download | 19 - 4 files changed, 783 insertions(+), 8347 deletions(-) create mode 100644 vignettes/GettingStartedWithrExpertQuery.Rmd.orig delete mode 100644 vignettes/rExpertQueryCybertownTraining.html delete mode 100644 vignettes/rExpertQueryCybertownTraining_files/MathJax.js.download diff --git a/vignettes/GettingStartedWithrExpertQuery.Rmd b/vignettes/GettingStartedWithrExpertQuery.Rmd index 40a93fe..429d77c 100644 --- a/vignettes/GettingStartedWithrExpertQuery.Rmd +++ b/vignettes/GettingStartedWithrExpertQuery.Rmd @@ -3,7 +3,7 @@ title: "Getting Started With rExpertQuery" format: html editor: visual author: "ATTAINS Team" -date: "`r Sys.Date()`" +date: "2026-04-30" output: rmarkdown::html_vignette: toc: true @@ -21,22 +21,9 @@ editor_options: wrap: 72 --- -```{r setup, include = FALSE} -library(knitr) -library(dplyr) -library(DT) -``` -```{css, echo = F, eval = FALSE} -pre { - max-height: auto; - overflow-y: auto; -} -pre[class] { - max-height: auto; -} -``` + ## About Expert Query and rExpertQuery @@ -81,7 +68,8 @@ You can install and/or update the [rExpertQuery Package](https://github.com/USEPA/rExpertQuery) and all dependencies by running: -```{r install, eval = TRUE, echo = TRUE, results = 'hide', message = FALSE, warning = FALSE} + +``` r if (!"remotes" %in% installed.packages()) { install.packages("remotes") } @@ -92,9 +80,7 @@ library(dplyr) library(data.table) ``` -```{r testkey, include=FALSE} -testkey <- "53BVce47MQ3KXKibjx35g4ojaDQGh8qWfbdO8cE0" -``` + ## EQ_Actions @@ -103,22 +89,21 @@ ATTAINS Actions data by a variety of user supplied parameters. For example, you might like to run a relatively simple query return data for all Region 4 Actions that should not be included in Measures. -```{r eq.actions.r4, echo = TRUE, results = FALSE, message = FALSE, warning = FALSE} + +``` r # query Actions from Region 4 that are not included in Measures R4_actions_not_meas <- rExpertQuery::EQ_Actions(api_key = testkey, region = 4, in_meas = "No") ``` This query returns -`r formatC(nrow(R4_actions_not_meas), big.mark = ",")` results, so we'll +202 results, so we'll review only a small random subset of them in a data table just to get an idea of what those results look like: -```{r r4.url.prep, include = FALSE} -# Actions Documents containing "nutrient" -R4_actions_not_meas$planSummaryLink <- paste0('', R4_actions_not_meas$planSummaryLink, "") -``` -```{r r4.datatable} + + +``` r # create random subset of R4 query results R4_subset <- R4_actions_not_meas |> dplyr::slice_sample(n = 20) @@ -127,13 +112,19 @@ R4_subset <- R4_actions_not_meas |> DT::datatable(R4_subset, options = list(pageLength = 2, scrollX = TRUE)) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + You might also want to create a more specific query. For example, you might want to find all Actions from Missouri that were issued by EPA with completion dates between 2000-01-01 and 2020-12-31. As this more detailed query returns a smaller data set, the data table shows all results of the query -```{r eq.actions.ex2, results = TRUE, message = FALSE, warning = FALSE} + +``` r # query Actions for Missouri Actions with action agency "EPA" and parameter group "PATHOGENS" MO_epa <- rExpertQuery::EQ_Actions( api_key = testkey, statecode = "MO", @@ -144,16 +135,23 @@ MO_epa <- rExpertQuery::EQ_Actions( ) ``` -```{r mo.url.prep, include = FALSE} -# Actions Documents containing "nutrient" -MO_epa$planSummaryLink <- paste0('', MO_epa$planSummaryLink, "") +``` +## [1] "EQ_Actions: The current query will return 4 rows." ``` -```{r mo.data.table} + + + +``` r # view MO results DT::datatable(MO_epa, options = list(pageLength = 10, scrollX = TRUE)) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + ## EQ_ActionsDocuments [**EQ_ActionsDocuments()**](https://usepa.github.io/rExpertQuery/reference/EQ_ActionsDocuments.html)queries @@ -162,7 +160,8 @@ unique features is that you can search the documents themselves by keyword or phrase. For example, you might want to find all Actions Documents which contained the keyword "nutrients". -```{r nutrients, results = TRUE, message = FALSE, warning = FALSE} + +``` r # Actions Documents containing "nutrient" Nutrient_docs <- rExpertQuery::EQ_ActionsDocuments( doc_query = "nutrient", @@ -170,50 +169,65 @@ Nutrient_docs <- rExpertQuery::EQ_ActionsDocuments( ) ``` -This query yields `r formatC(nrow(Nutrient_docs), big.mark = ",")` +``` +## [1] "EQ_ActionDocuments: The current query will return 16,962 rows." +``` + +This query yields 16,962 results. As well as providing the Action and Document Name for all results, *EQ_Actions()* also returns the column "actionDocumentUrl" containg the URL to link to the document. Becaue the query yields so many results, we'll take a look at a smaller subset of them in a data table to understand the structure of the results. -```{r nutrients.url.prep, include = FALSE} -# Actions Documents containing "nutrient" -Nutrient_docs$actionDocumentUrl <- paste0('', Nutrient_docs$actionDocumentUrl, "") -``` -```{r nutrients.data.table} + + +``` r Nutrient_docs_subset <- Nutrient_docs |> dplyr::slice_sample(n = 20) DT::datatable(Nutrient_docs_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + You can also query by more typical parameters like organization name (org_name), EPA region (region) , or action type (act_type). The example below demonstrates using the function to search for 4B Restoration Approach documents from Pennsylvania. -```{r pa.4b.ex, results = TRUE, message = FALSE, warning = FALSE} + +``` r PA_4B <- rExpertQuery::EQ_ActionsDocuments( statecode = "PA", act_type = "4B Restoration Approach", api = testkey ) ``` -```{r pa.url.prep, include = FALSE} -# Actions Documents containing "nutrient" -PA_4B$actionDocumentUrl <- paste0('', PA_4B$actionDocumentUrl, "") +``` +## [1] "EQ_ActionDocuments: The current query will return 14 rows." ``` -```{r pa.4b.table} + + + +``` r PA_4B_subset <- PA_4B |> dplyr::slice_sample(n = 20) DT::datatable(PA_4B_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` -As there are only `r formatC(nrow(PA_4B), big.mark = ",")` 4B +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + +As there are only 17 4B Restoration Approach documents from Pennsylvania, we can review them all in the table below. As in the other Actions Document Example, the actionDocumentUrl column contains a link to the document. @@ -225,23 +239,34 @@ ATTAINS Assessments data. For many use cases, querying Assessments for a single organization or state is desirable. The next example shows how to query by a single state and use class for the latest cycle. -```{r ky.assess, results = TRUE, message = FALSE, warning = FALSE} + +``` r KY_assessments <- rExpertQuery::EQ_Assessments( statecode = "KY", api_key = testkey ) ``` -There are `r formatC(nrow(KY_assessments), big.mark = ",")` results for +``` +## [1] "EQ_Assessments: The current query will return 20,254 rows." +``` + +There are 20,254 results for the basic latest cycle Kentucky query. -```{r ky.table} + +``` r KY_subset <- KY_assessments |> dplyr::slice_sample(n = 20) DT::datatable(KY_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + It is also possible to design more specific assessment queries. For example, you might want to narrow down the results to show causes for a use class in one region. The code below demonstrates querying for EPA @@ -249,7 +274,8 @@ Region 3 Assessments in the ECOLOGICAL_USE group with at least one parameter with the status "Cause". The data table below shows a random sample of the results from this Region 3 Query. -```{r R3.ecological, results = TRUE, message = FALSE, warning = FALSE} + +``` r R3_ecological <- rExpertQuery::EQ_Assessments( region = 3, use_group = "ECOLOGICAL_USE", @@ -258,13 +284,23 @@ R3_ecological <- rExpertQuery::EQ_Assessments( ) ``` -```{r r3.ecological.table} +``` +## [1] "EQ_Assessments: The current query will return 150,188 rows." +``` + + +``` r R3_ecological_subset <- R3_ecological |> dplyr::slice_sample(n = 20) DT::datatable(R3_ecological_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + ## EQ_AssessmentUnits [**EQ_AssessmentUnits()**](https://usepa.github.io/rExpertQuery/reference/EQ_AssessmentUnits.html)queries @@ -274,27 +310,39 @@ assessment units, but input params can be adjusted to query for "Active". The example below shows a simple example for querying for Active assessment units for a single state. -```{r ri.aus, results = TRUE, message = FALSE, warning = FALSE} + +``` r RI_aus <- rExpertQuery::EQ_AssessmentUnits( statecode = "RI", api_key = testkey ) ``` +``` +## [1] "EQ_AssessmentUnits: The current query will return 1,772 rows." +``` + We can take a look at a random selection of 20 of the active assessment units from Rhode Island. -```{r ri.table} + +``` r RI_aus_subset <- RI_aus |> dplyr::slice_sample(n = 20) DT::datatable(RI_aus_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + It is is also possible to search fo retired assessment units, as show in the example from Florida below. -```{r fl.retired, results = TRUE, message = FALSE, warning = FALSE} + +``` r FL_retired <- rExpertQuery::EQ_AssessmentUnits( statecode = "FL", au_status = "Retired", @@ -302,13 +350,23 @@ FL_retired <- rExpertQuery::EQ_AssessmentUnits( ) ``` -```{r fl.table} +``` +## [1] "EQ_AssessmentUnits: The current query will return 0 rows." +``` + + +``` r FL_ret_subset <- FL_retired |> dplyr::slice_sample(n = 20) DT::datatable(FL_ret_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + ## EQ_AUsMLs [**EQ_AUsMLs()**](https://usepa.github.io/rExpertQuery/reference/EQ_AUsMLs.html)returns @@ -317,20 +375,27 @@ determinations at specific assessment units. Not all organizations provide monitoring location data in ATTAINS. The first example uses a state that does supply some monitoring location data. -```{r akausmls, results = TRUE, message = FALSE, warning = FALSE} + +``` r AK_ausmls <- rExpertQuery::EQ_AUsMLs( statecode = "AK", api_key = testkey ) +``` +``` +## [1] "EQ_AUsMLs: The current query will return 844 rows." +``` + +``` r AK_filt <- AK_ausmls |> dplyr::filter(!is.na(monitoringLocationId)) ``` While the resulting data set from the query has -`r formatC(nrow(AK_ausmls), big.mark = ",")` results, filtering the data +844 results, filtering the data set to retain only records where a monitoring location id has been recorded leads to a data set with -`r formatC(nrow(AK_filt), big.mark = ",")` results. +208 results. ## EQ_CatchCorr @@ -341,28 +406,40 @@ practice is to make your query as specific as possible. Many machines may not have enough memory to load an entire state's worth of catchment correspondence data in an R session. -```{r dc.catchcorr, results = TRUE, message = TRUE, warning = FALSE} + +``` r DC_catch <- rExpertQuery::EQ_CatchCorr( statecode = "DC", api_key = testkey ) ``` +``` +## [1] "EQ_CatchCorr: The current query will return 2,438 rows." +``` + The table below shows 20 random results from the DC catchment correspondence query. -```{r dc.table} + +``` r DC_subset <- DC_catch |> dplyr::slice_sample(n = 20) DT::datatable(DC_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + It is also possible to focus on a specific assessment unit in catchment correspondence queries. The next example queries for a single Illinois assessment unit by its id. -```{r il.au.catch, results = TRUE, message = FALSE, warning = FALSE} + +``` r IL_N99_catch <- rExpertQuery::EQ_CatchCorr( statecode = "IL", auid = "IL_N-99", @@ -370,15 +447,25 @@ IL_N99_catch <- rExpertQuery::EQ_CatchCorr( ) ``` +``` +## [1] "EQ_CatchCorr: The current query will return 249 rows." +``` + The resulting table shows all results from this much smaller query. -```{r il.table} + +``` r IL_N99_subset <- IL_N99_catch |> dplyr::slice_sample(n = 20) DT::datatable(IL_N99_catch, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + ## EQ_NationalExtract [**EQ_NationalExtract()**](https://usepa.github.io/rExpertQuery/reference/EQ_NationalExtract.html)provides @@ -389,14 +476,24 @@ National Extracts to download. The following examples show how to download the TMDL and Actions extracts. The extracts are all large files and some may not open in R if there is not enough memory available. -```{r nat.tmdls, results = TRUE, message = FALSE, warning = FALSE} + +``` r Nat_tmdls <- rExpertQuery::EQ_NationalExtract("tmdl") ``` -```{r nat.actions, results = TRUE, message = FALSE, warning = FALSE} +``` +## [1] "EQ_NationalExtract: downloading Total Maximum Daily Load Profile (Expert Query National Extract). It was last updated on April 24, 2026 at 10:28 PM EDT." +``` + + +``` r Nat_actions <- rExpertQuery::EQ_NationalExtract("actions") ``` +``` +## [1] "EQ_NationalExtract: downloading Actions Profile (Expert Query National Extract). It was last updated on April 24, 2026 at 10:29 PM EDT." +``` + ## EQ_Sources [**EQ_Sources()**](https://usepa.github.io/rExpertQuery/reference/EQ_Sources.html)queries @@ -409,7 +506,8 @@ sources for one state by a parameter group, for example searching for sources related to the parameter group "HABITAT ALTERATIONS" in Wisconsin. -```{r wi.source.hab, results = TRUE, message = FALSE, warning = FALSE} + +``` r WI_habalt_sources <- rExpertQuery::EQ_Sources( statecode = "WI", param_group = "HABITAT ALTERATIONS", @@ -417,33 +515,54 @@ WI_habalt_sources <- rExpertQuery::EQ_Sources( ) ``` -There were `r formatC(nrow(WI_habalt_sources), big.mark = ",")` for this +``` +## [1] "EQ_Sources: The current query will return 587 rows." +``` + +There were 587 for this query, which can be reviewed in the data table below. -```{r wi.table} + +``` r DT::datatable(WI_habalt_sources, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + It is also possible to query directly be the name of the source. The example below queries all organizations for the source "LEGACY/HISTORICAL POLLUTANTS". -```{r source.legacy, results = TRUE, message = FALSE, warning = FALSE} + +``` r legacy_sources <- rExpertQuery::EQ_Sources( source = "LEGACY/HISTORICAL POLLUTANTS", api_key = testkey ) ``` -There were `r formatC(nrow(WI_habalt_sources), big.mark = ",")` records +``` +## [1] "EQ_Sources: The current query will return 188 rows." +``` + +There were 587 records for the source "LEGACY/HISTORICAL POLLUTANTS". These results can be reviewed in the table below to see which states, regions, etc. are associated with this source. -```{r legacy.table} + +``` r DT::datatable(legacy_sources, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + ## EQ_TMDLs [**EQ_TMDLs()**](https://usepa.github.io/rExpertQuery/reference/EQ_TMDLs.html) @@ -452,7 +571,8 @@ example below shows querying Hawaii for TMDLs with a source_type of "Both", which means that both point and non point sources were part of the TMDL. -```{r hi.tmdl.both, results = TRUE, message = FALSE, warning = FALSE} + +``` r HI_both_tmdls <- rExpertQuery::EQ_TMDLs( statecode = "HI", source_type = "Both", @@ -460,18 +580,29 @@ HI_both_tmdls <- rExpertQuery::EQ_TMDLs( ) ``` -There `r formatC(nrow(HI_both_tmdls), big.mark = ",")` can be reviewed +``` +## [1] "EQ_TMDL: The current query will return 474 rows." +``` + +There 474 can be reviewed in the data table below. -```{r hi.table} + +``` r DT::datatable(HI_both_tmdls, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + This example demonstrates using another combination of arguments in the query, searching for records from EPA Region 10 that pertain to the addressed parameter group "CAUSE UNKNOWN". -```{r unknown.r10, results = TRUE, message = FALSE, warning = FALSE} + +``` r R10_unknown <- rExpertQuery::EQ_TMDLs( region = 10, ad_param_group = "CAUSE UNKNOWN", @@ -479,13 +610,23 @@ R10_unknown <- rExpertQuery::EQ_TMDLs( ) ``` -There `r formatC(nrow(R10_unknown), big.mark = ",")` are displayed in +``` +## [1] "EQ_TMDL: The current query will return 588 rows." +``` + +There 588 are displayed in the table below. -```{r r10.uk.table} + +``` r DT::datatable(R10_unknown, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` + ## EQ_DomainValues [**EQ_DomainValues()**](https://usepa.github.io/rExpertQuery/reference/EQ_DomainValues.html) @@ -494,16 +635,72 @@ be used in the other rExpertQuery functions. If run with no argument supplied, it will return a list of all available domains that can be used as an argument in **EQ_DomainValues()**. -```{r see.dom.vals, results = TRUE, message = TRUE, warning = FALSE} + +``` r rExpertQuery::EQ_DomainValues() ``` +``` +## [1] "EQ_DomainValues: getting list of available domain names. Values in the eq_param column can be used as inputs in EQ_DomainValues." +``` + +``` +## EQ_DomainValues: domain list retrieved from ATTAINS web services. +``` + +``` +## eq_param attains_ws_name attains_ws_field +## 1 act_agency AgencyCode name +## 2 act_agency AgencyCode name +## 3 act_status ActionStatusType name +## 4 act_type ActionType code +## 5 ad_param ParameterName name +## 6 ad_param_group ParameterGroupCodeType name +## 7 assess_basis AssessmentBasisCode name +## 8 assess_methods MethodTypeCode name +## 9 assess_types AssessmentTypeCode name +## 10 au_status StatusIndicator name +## 11 cause ParameterName name +## 12 delist_reason DelistingReasonCode name +## 13 doc_type ActionDocumentType name +## 14 file_type DocumentFileType name +## 15 loc_type LocationTypeCode name +## 16 org_id OrgName code +## 17 org_name OrgName name +## 18 param_attain ParameterAttainmentCode name +## 19 param_group ParameterGroupCodeType name +## 20 param_name ParameterName name +## 21 param_state_ir_cat StateIRCategoryCode name +## 22 param_status ParameterStatus name +## 23 source_scale SizeSourceScaleText name +## 24 source_type SourceName name +## 25 statecode OrgStateCode name +## 26 use_name UseName name +## 27 use_support UseAttainmentCode name +## 28 water_type WaterTypeCode name +``` + The example below shows how to return allowable values for "org_id". -```{r dom.vals.org, results = TRUE, message = TRUE, warning = FALSE} + +``` r Org_vals <- rExpertQuery::EQ_DomainValues(domain = "org_id") ``` -```{r org.id.table} +``` +## [1] "EQ_DomainValues: For org_id the values in the 'code' column of the function output are the allowable values for rExpert Query functions." +``` + +``` +## EQ_DomainValues: domain list retrieved from ATTAINS web services. +``` + + +``` r DT::datatable(Org_vals, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) ``` + +``` +## Error in `loadNamespace()`: +## ! there is no package called 'webshot' +``` diff --git a/vignettes/GettingStartedWithrExpertQuery.Rmd.orig b/vignettes/GettingStartedWithrExpertQuery.Rmd.orig new file mode 100644 index 0000000..40a93fe --- /dev/null +++ b/vignettes/GettingStartedWithrExpertQuery.Rmd.orig @@ -0,0 +1,509 @@ +--- +title: "Getting Started With rExpertQuery" +format: html +editor: visual +author: "ATTAINS Team" +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: true + fig_caption: yes + fig_height: 8 + fig_width: 8 +vignette: > + %\VignetteEncoding{UTF-8} + %\VignetteIndexEntry{Getting Started With rExpertQuery} + %\VignetteEngine{knitr::rmarkdown} +description: An overview of rExpertQuery functions for querying and downloading ATTAINS data via Expert Query web services. +editor_options: + chunk_output_type: console + markdown: + wrap: 72 +--- + +```{r setup, include = FALSE} +library(knitr) +library(dplyr) +library(DT) +``` + +```{css, echo = F, eval = FALSE} +pre { + max-height: auto; + overflow-y: auto; +} + +pre[class] { + max-height: auto; +} +``` + +## About Expert Query and rExpertQuery + +[Expert Query](https://owapps.epa.gov/expertquery/attains/tmdl) empowers +users to access surface water quality data from the [Assessment and TMDL +Tracking and Implementation System +(ATTAINS)](https://www.epa.gov/waterdata/upload-data-resources-registered-attains-usershttps://www.epa.gov/waterdata/upload-data-resources-registered-attains-users) +encompassing Assessment decisions (under Clean Water Act Sections +303(d), 305(b), and 106) and Action data like Total Maximum Daily Loads +(TMDLs), Advance Restoration Plans (ARPs), and Protection Approaches. + +The tool supports querying and downloading data within and across +organizations (states or tribes), such as querying all the data within +an EPA region or all nutrient TMDLs nationally. It also allows users to +download national ATTAINS data extracts. + +rExpertQuery takes this a step further as its functions facilitate +querying the Expert Query web services from R functions which allow the +user to import the resulting data sets directly in to R for more +detailed review and analysis. + +It is crucial to be aware of certain complexities inherent in the data +and querying process. Users are cautioned against direct comparisons +between states due to differences in water quality standards and +assessment methodologies. + +While Expert Query facilitates data querying, it lacks built-in summary +capabilities. Users are encouraged to download and summarize data, but +use caution when relying on row counts, as duplicate entries may occur. +This complexity arises from multiple tables that have many-to-many +relationships. Future vignettes and functions may provide examples of +how to summarize Expert Query data. + +## Installation + +You must first have R and R Studio installed to use the rExpert Query +(see instructions below if needed). rExpert Query is in active +development, therefore we highly recommend that you update it and all of +its dependency libraries each time you use the package. + +You can install and/or update the [rExpertQuery +Package](https://github.com/USEPA/rExpertQuery) and all dependencies by +running: + +```{r install, eval = TRUE, echo = TRUE, results = 'hide', message = FALSE, warning = FALSE} +if (!"remotes" %in% installed.packages()) { + install.packages("remotes") +} + +remotes::install_github("USEPA/rExpertQuery", ref = "develop", dependencies = TRUE, force = TRUE) + +library(dplyr) +library(data.table) +``` + +```{r testkey, include=FALSE} +testkey <- "53BVce47MQ3KXKibjx35g4ojaDQGh8qWfbdO8cE0" +``` + +## EQ_Actions + +[**EQ_Actions()**](https://usepa.github.io/rExpertQuery/reference/EQ_Actions.htmlhttps://usepa.github.io/rExpertQuery/reference/EQ_Actions.html)queries +ATTAINS Actions data by a variety of user supplied parameters. For +example, you might like to run a relatively simple query return data for +all Region 4 Actions that should not be included in Measures. + +```{r eq.actions.r4, echo = TRUE, results = FALSE, message = FALSE, warning = FALSE} +# query Actions from Region 4 that are not included in Measures +R4_actions_not_meas <- rExpertQuery::EQ_Actions(api_key = testkey, region = 4, in_meas = "No") +``` + +This query returns +`r formatC(nrow(R4_actions_not_meas), big.mark = ",")` results, so we'll +review only a small random subset of them in a data table just to get an +idea of what those results look like: + +```{r r4.url.prep, include = FALSE} +# Actions Documents containing "nutrient" +R4_actions_not_meas$planSummaryLink <- paste0('', R4_actions_not_meas$planSummaryLink, "") +``` + +```{r r4.datatable} +# create random subset of R4 query results +R4_subset <- R4_actions_not_meas |> + dplyr::slice_sample(n = 20) + +# create data tab;e +DT::datatable(R4_subset, options = list(pageLength = 2, scrollX = TRUE)) +``` + +You might also want to create a more specific query. For example, you +might want to find all Actions from Missouri that were issued by EPA +with completion dates between 2000-01-01 and 2020-12-31. As this more +detailed query returns a smaller data set, the data table shows all +results of the query + +```{r eq.actions.ex2, results = TRUE, message = FALSE, warning = FALSE} +# query Actions for Missouri Actions with action agency "EPA" and parameter group "PATHOGENS" +MO_epa <- rExpertQuery::EQ_Actions( + api_key = testkey, statecode = "MO", + act_agency = "EPA", + param_group = "PATHOGENS", + comp_date_start = "2000-01-01", + comp_date_end = "2020-12-31" +) +``` + +```{r mo.url.prep, include = FALSE} +# Actions Documents containing "nutrient" +MO_epa$planSummaryLink <- paste0('', MO_epa$planSummaryLink, "") +``` + +```{r mo.data.table} +# view MO results +DT::datatable(MO_epa, options = list(pageLength = 10, scrollX = TRUE)) +``` + +## EQ_ActionsDocuments + +[**EQ_ActionsDocuments()**](https://usepa.github.io/rExpertQuery/reference/EQ_ActionsDocuments.html)queries +ATTAINS Actions Documents data with user supplied parameters. One of its +unique features is that you can search the documents themselves by +keyword or phrase. For example, you might want to find all Actions +Documents which contained the keyword "nutrients". + +```{r nutrients, results = TRUE, message = FALSE, warning = FALSE} +# Actions Documents containing "nutrient" +Nutrient_docs <- rExpertQuery::EQ_ActionsDocuments( + doc_query = "nutrient", + api_key = testkey +) +``` + +This query yields `r formatC(nrow(Nutrient_docs), big.mark = ",")` +results. As well as providing the Action and Document Name for all +results, *EQ_Actions()* also returns the column "actionDocumentUrl" +containg the URL to link to the document. Becaue the query yields so +many results, we'll take a look at a smaller subset of them in a data +table to understand the structure of the results. + +```{r nutrients.url.prep, include = FALSE} +# Actions Documents containing "nutrient" +Nutrient_docs$actionDocumentUrl <- paste0('', Nutrient_docs$actionDocumentUrl, "") +``` + +```{r nutrients.data.table} +Nutrient_docs_subset <- Nutrient_docs |> + dplyr::slice_sample(n = 20) + +DT::datatable(Nutrient_docs_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +You can also query by more typical parameters like organization name +(org_name), EPA region (region) , or action type (act_type). The example +below demonstrates using the function to search for 4B Restoration +Approach documents from Pennsylvania. + +```{r pa.4b.ex, results = TRUE, message = FALSE, warning = FALSE} +PA_4B <- rExpertQuery::EQ_ActionsDocuments( + statecode = "PA", act_type = "4B Restoration Approach", + api = testkey +) +``` + +```{r pa.url.prep, include = FALSE} +# Actions Documents containing "nutrient" +PA_4B$actionDocumentUrl <- paste0('', PA_4B$actionDocumentUrl, "") +``` + +```{r pa.4b.table} +PA_4B_subset <- PA_4B |> + dplyr::slice_sample(n = 20) + +DT::datatable(PA_4B_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +As there are only `r formatC(nrow(PA_4B), big.mark = ",")` 4B +Restoration Approach documents from Pennsylvania, we can review them all +in the table below. As in the other Actions Document Example, the +actionDocumentUrl column contains a link to the document. + +## EQ_Assessments + +[**EQ_Assessments()**](https://usepa.github.io/rExpertQuery/reference/EQ_Assessments.html)queries +ATTAINS Assessments data. For many use cases, querying Assessments for a +single organization or state is desirable. The next example shows how to +query by a single state and use class for the latest cycle. + +```{r ky.assess, results = TRUE, message = FALSE, warning = FALSE} +KY_assessments <- rExpertQuery::EQ_Assessments( + statecode = "KY", + api_key = testkey +) +``` + +There are `r formatC(nrow(KY_assessments), big.mark = ",")` results for +the basic latest cycle Kentucky query. + +```{r ky.table} +KY_subset <- KY_assessments |> + dplyr::slice_sample(n = 20) + +DT::datatable(KY_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +It is also possible to design more specific assessment queries. For +example, you might want to narrow down the results to show causes for a +use class in one region. The code below demonstrates querying for EPA +Region 3 Assessments in the ECOLOGICAL_USE group with at least one +parameter with the status "Cause". The data table below shows a random +sample of the results from this Region 3 Query. + +```{r R3.ecological, results = TRUE, message = FALSE, warning = FALSE} +R3_ecological <- rExpertQuery::EQ_Assessments( + region = 3, + use_group = "ECOLOGICAL_USE", + param_status = "Cause", + api_key = testkey +) +``` + +```{r r3.ecological.table} +R3_ecological_subset <- R3_ecological |> + dplyr::slice_sample(n = 20) + +DT::datatable(R3_ecological_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +## EQ_AssessmentUnits + +[**EQ_AssessmentUnits()**](https://usepa.github.io/rExpertQuery/reference/EQ_AssessmentUnits.html)queries +ATTAINS Assessment Units data. The default is to only return "Active" +assessment units, but input params can be adjusted to query for +"Retired" or "Historical" assessment units in addition to or instead of +"Active". The example below shows a simple example for querying for +Active assessment units for a single state. + +```{r ri.aus, results = TRUE, message = FALSE, warning = FALSE} +RI_aus <- rExpertQuery::EQ_AssessmentUnits( + statecode = "RI", + api_key = testkey +) +``` + +We can take a look at a random selection of 20 of the active assessment +units from Rhode Island. + +```{r ri.table} +RI_aus_subset <- RI_aus |> + dplyr::slice_sample(n = 20) + +DT::datatable(RI_aus_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +It is is also possible to search fo retired assessment units, as show in +the example from Florida below. + +```{r fl.retired, results = TRUE, message = FALSE, warning = FALSE} +FL_retired <- rExpertQuery::EQ_AssessmentUnits( + statecode = "FL", + au_status = "Retired", + api_key = testkey +) +``` + +```{r fl.table} +FL_ret_subset <- FL_retired |> + dplyr::slice_sample(n = 20) + +DT::datatable(FL_ret_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +## EQ_AUsMLs + +[**EQ_AUsMLs()**](https://usepa.github.io/rExpertQuery/reference/EQ_AUsMLs.html)returns +information on the monitoring locations used to make assessment +determinations at specific assessment units. Not all organizations +provide monitoring location data in ATTAINS. The first example uses a +state that does supply some monitoring location data. + +```{r akausmls, results = TRUE, message = FALSE, warning = FALSE} +AK_ausmls <- rExpertQuery::EQ_AUsMLs( + statecode = "AK", + api_key = testkey +) + +AK_filt <- AK_ausmls |> dplyr::filter(!is.na(monitoringLocationId)) +``` + +While the resulting data set from the query has +`r formatC(nrow(AK_ausmls), big.mark = ",")` results, filtering the data +set to retain only records where a monitoring location id has been +recorded leads to a data set with +`r formatC(nrow(AK_filt), big.mark = ",")` results. + +## EQ_CatchCorr + +[**EQ_CatchCorr()**](https://usepa.github.io/rExpertQuery/reference/EQ_CatchCorr.html) +queries ATTAINS Catchment Correspondance data. These queries return +large files, particularly if a large geospatial area is defined, so best +practice is to make your query as specific as possible. Many machines +may not have enough memory to load an entire state's worth of catchment +correspondence data in an R session. + +```{r dc.catchcorr, results = TRUE, message = TRUE, warning = FALSE} +DC_catch <- rExpertQuery::EQ_CatchCorr( + statecode = "DC", + api_key = testkey +) +``` + +The table below shows 20 random results from the DC catchment +correspondence query. + +```{r dc.table} +DC_subset <- DC_catch |> + dplyr::slice_sample(n = 20) + +DT::datatable(DC_subset, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +It is also possible to focus on a specific assessment unit in catchment +correspondence queries. The next example queries for a single Illinois +assessment unit by its id. + +```{r il.au.catch, results = TRUE, message = FALSE, warning = FALSE} +IL_N99_catch <- rExpertQuery::EQ_CatchCorr( + statecode = "IL", + auid = "IL_N-99", + api_key = testkey +) +``` + +The resulting table shows all results from this much smaller query. + +```{r il.table} +IL_N99_subset <- IL_N99_catch |> + dplyr::slice_sample(n = 20) + +DT::datatable(IL_N99_catch, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +## EQ_NationalExtract + +[**EQ_NationalExtract()**](https://usepa.github.io/rExpertQuery/reference/EQ_NationalExtract.html)provides +an efficient method for importing the [Expert Query National +Extracts](https://owapps.epa.gov/expertquery/national-downloads). This +function requires one argument, "extract" to specify which of the +National Extracts to download. The following examples show how to +download the TMDL and Actions extracts. The extracts are all large files +and some may not open in R if there is not enough memory available. + +```{r nat.tmdls, results = TRUE, message = FALSE, warning = FALSE} +Nat_tmdls <- rExpertQuery::EQ_NationalExtract("tmdl") +``` + +```{r nat.actions, results = TRUE, message = FALSE, warning = FALSE} +Nat_actions <- rExpertQuery::EQ_NationalExtract("actions") +``` + +## EQ_Sources + +[**EQ_Sources()**](https://usepa.github.io/rExpertQuery/reference/EQ_Sources.html)queries +ATTAINS Sources data. Not all organizations report sources in their +assessments, + +but for those that do, querying by source can be another useful option +for querying ATTAINS data. The next code chunk shows how to query +sources for one state by a parameter group, for example searching for +sources related to the parameter group "HABITAT ALTERATIONS" in +Wisconsin. + +```{r wi.source.hab, results = TRUE, message = FALSE, warning = FALSE} +WI_habalt_sources <- rExpertQuery::EQ_Sources( + statecode = "WI", + param_group = "HABITAT ALTERATIONS", + api_key = testkey +) +``` + +There were `r formatC(nrow(WI_habalt_sources), big.mark = ",")` for this +query, which can be reviewed in the data table below. + +```{r wi.table} +DT::datatable(WI_habalt_sources, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +It is also possible to query directly be the name of the source. The +example below queries all organizations for the source +"LEGACY/HISTORICAL POLLUTANTS". + +```{r source.legacy, results = TRUE, message = FALSE, warning = FALSE} +legacy_sources <- rExpertQuery::EQ_Sources( + source = "LEGACY/HISTORICAL POLLUTANTS", + api_key = testkey +) +``` + +There were `r formatC(nrow(WI_habalt_sources), big.mark = ",")` records +for the source "LEGACY/HISTORICAL POLLUTANTS". These results can be +reviewed in the table below to see which states, regions, etc. are +associated with this source. + +```{r legacy.table} +DT::datatable(legacy_sources, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +## EQ_TMDLs + +[**EQ_TMDLs()**](https://usepa.github.io/rExpertQuery/reference/EQ_TMDLs.html) +queries ATTAINS TMDL data and can utilize many search parameters. The +example below shows querying Hawaii for TMDLs with a source_type of +"Both", which means that both point and non point sources were part of +the TMDL. + +```{r hi.tmdl.both, results = TRUE, message = FALSE, warning = FALSE} +HI_both_tmdls <- rExpertQuery::EQ_TMDLs( + statecode = "HI", + source_type = "Both", + api_key = testkey +) +``` + +There `r formatC(nrow(HI_both_tmdls), big.mark = ",")` can be reviewed +in the data table below. + +```{r hi.table} +DT::datatable(HI_both_tmdls, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +This example demonstrates using another combination of arguments in the +query, searching for records from EPA Region 10 that pertain to the +addressed parameter group "CAUSE UNKNOWN". + +```{r unknown.r10, results = TRUE, message = FALSE, warning = FALSE} +R10_unknown <- rExpertQuery::EQ_TMDLs( + region = 10, + ad_param_group = "CAUSE UNKNOWN", + api_key = testkey +) +``` + +There `r formatC(nrow(R10_unknown), big.mark = ",")` are displayed in +the table below. + +```{r r10.uk.table} +DT::datatable(R10_unknown, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` + +## EQ_DomainValues + +[**EQ_DomainValues()**](https://usepa.github.io/rExpertQuery/reference/EQ_DomainValues.html) +is a function to help users understand the available domain values can +be used in the other rExpertQuery functions. If run with no argument +supplied, it will return a list of all available domains that can be +used as an argument in **EQ_DomainValues()**. + +```{r see.dom.vals, results = TRUE, message = TRUE, warning = FALSE} +rExpertQuery::EQ_DomainValues() +``` + +The example below shows how to return allowable values for "org_id". + +```{r dom.vals.org, results = TRUE, message = TRUE, warning = FALSE} +Org_vals <- rExpertQuery::EQ_DomainValues(domain = "org_id") +``` + +```{r org.id.table} +DT::datatable(Org_vals, options = list(pageLength = 5, scrollX = TRUE), escape = FALSE) +``` diff --git a/vignettes/rExpertQueryCybertownTraining.html b/vignettes/rExpertQueryCybertownTraining.html deleted file mode 100644 index 1e4320d..0000000 --- a/vignettes/rExpertQueryCybertownTraining.html +++ /dev/null @@ -1,8251 +0,0 @@ - - -
- - - - - - - - - - - -Expert -Query empowers users to access surface water quality data from the -Assessment -and TMDL Tracking and Implementation System (ATTAINS) encompassing -Assessment decisions (under Clean Water Act Sections 303(d), 305(b), and -106) and Action data like Total Maximum Daily Loads (TMDLs), Advance -Restoration Plans (ARPs), and Protection Approaches.
-The tool supports querying and downloading data within and across -organizations (states or tribes), such as querying all the data within -an EPA region or all nutrient TMDLs nationally. It also allows users to -download national ATTAINS data extracts.
-rExpertQuery takes this a step further as its functions facilitate -querying the Expert Query web services and importing the resulting data -sets directly in to R for more detailed review and analysis.
-It is crucial to be aware of certain complexities inherent in the -data and querying process. Users are cautioned against direct -comparisons between states due to differences in water quality standards -and assessment methodologies.
-While Expert Query facilitates data querying, it lacks built-in -summary capabilities. Users are encouraged to download and summarize -data, but use caution when relying on row counts, as duplicate entries -may occur. This complexity arises from multiple tables that have -many-to-many relationships. Several examples of summarizing Expert Query -data are included in this vignette. These examples use additional -packages for data wrangling, mapping, visualizing and other tasks. -Future rExpertQuery vignettes and functions may provide additional -examples of how to summarize Expert Query data.
-You must first have R and R Studio installed to use the rExpert Query -(see instructions below if needed). rExpert Query is in active -development, therefore we highly recommend that you update it and all of -its dependency libraries each time you use the package.
-You can install and/or update the rExpertQuery Package -and all dependencies by running:
-# install and load rExpertQuery
-if (!"remotes" %in% installed.packages()) {
- install.packages("remotes")
-}
-
-remotes::install_github("USEPA/rExpertQuery", ref = "training", dependencies = TRUE, force = TRUE)## Using GitHub PAT from the git credential store.
-## Downloading GitHub repo USEPA/rExpertQuery@training
-## Error in utils::download.file(url, path, method = method, quiet = quiet, :
-## download from 'https://api.github.com/repos/USEPA/rExpertQuery/tarball/training' failed
-
-This vignette also requires additional packages which can be -installed using the code chunk below:
-# # list of additional required packages
-# demo.pkgs <- c("datasets", "data.table", "dplyr", "DT", "ggplot2", "httr", "leaflet", "maps", "plotly", "sf", "usdata", "usmap", "stringi")
-#
-# # install additional packages not yet installed
-# installed_packages <- demo.pkgs %in% rownames(installed.packages())
-#
-# if (any(installed_packages == FALSE)) {
-# install.packages(demo.pkgs[!installed_packages])
-# }
-
-# load additional packages for this vignette
-library(datasets)
-library(data.table)
-library(dplyr)
-library(DT)
-library(ggplot2)## Warning: package 'ggplot2' was built under R version 4.4.3
-
-## Warning: package 'maps' was built under R version 4.4.3
-
-## Warning: package 'sf' was built under R version 4.4.3
-
-## Warning: package 'usdata' was built under R version 4.4.3
-
-## Warning: package 'usmap' was built under R version 4.4.3
-
-## Warning: package 'stringi' was built under R version 4.4.3
-As a final setup step, we will turn off scientific notation so that -any catchment id numbers from our queries are displayed correctly.
- -Most of the rExpertQuery functions require an API key. Users can -obtain their own by using Expert Query’s API -Key Signup Form.
-To run this vignette, we can use the following test key. Users should -obtain their own API key if they will be using rExpertQuery to develop -their own workflows.
- -There are ten exported functions in rExpertQuery. The first eight -allow users to query ATTAINS data via Expert Query -web services. These are:
-These first eight functions are analogous to the data profiles that -can be queries through the Expert Query User -Interface.
-An additional function allows users to download the Expert Query National Extracts of ATTAINS data which are -updated weekly. This function should be used when national data are -desired or when a query designed in one of the previous eight functions -would exceed the Expert Query web services limit of one millions rows. -This function is:
-The tenth function relies on ATTAINS -web services to provide allowable domain values for some -rExpertQuery parameters:
-In this session, we will use rExpertQuery functions, along with some -common R packages for data wrangling and visualization to answer some -commonly asked questions with Expert Query data. Members of the ATTAINS -team will be monitoring the chat for questions. If there are many -questions regarding a particular function, we may pause at the end of -that function’s material to review. Otherwise, questions are welcome at -the end of the demo as time permits or via e-mailing the ATTAINS team at -attains@epa.gov.
-EQ_NationalExtract()provides -an efficient method for importing the Expert -Query National Extracts. This function requires one argument, -“extract” to specify which of the National Extracts to download. The -following examples show how to download the TMDL and Actions extracts. -The extracts are all large files and some may not open in R if there is -not enough memory available.
-What is the most commonly reported impairment -nationally?
-After downloading the national assessments data, we’ll need to filter -to include only the latest cycle. Next, we can filter for rows -containing parameter causes, and summarize how many times each parameter -was listed as a cause of impairment. The additional packages used to -answer this question are dplyr (data manipulation) and DT (interface to DataTables -library).
-# import natioanl assessments profile
-assessments.nat <- rExpertQuery::EQ_NationalExtract("assessments")## [1] "EQ_NationalExtract: downloading Assessments Profile (Expert Query National Extract). It was last updated on May 23, 2025 at 10:00 PM EDT."
-## [1] "EQ_NationalExtract: unzipping Assessments Profile (Expert Query National Extract)."
-## [1] "EQ_NationalExtract: opening Assessments Profile (Expert Query National Extract)."
-# filter for latest assessments
-assessments.nat <- assessments.nat %>%
- dplyr::group_by(organizationId) %>%
- dplyr::slice_max(reportingCycle) %>%
- dplyr::select(-objectId) %>%
- dplyr::distinct() %>%
- dplyr::ungroup()
-
-# count impairments
-count.impair <- assessments.nat %>%
- # filter to retain only parameter causes
- dplyr::filter(parameterStatus == "Cause") %>%
- # subset data to find unique combinations
- dplyr::select(organizationId, assessmentUnitId, parameterName) %>%
- # remove duplicates
- dplyr::distinct() %>%
- # group by parameterName
- dplyr::group_by(parameterName) %>%
- # count listings for each parameterName
- dplyr::summarise(count = length(assessmentUnitId)) %>%
- # arrane in descending order
- dplyr::arrange(desc(count))
-
-# create data table of impairment count results
-DT::datatable(count.impair, options = list(pageLength = 10, scrollX = TRUE))Which waters are impaired for SELENIUM nationally? Which -states contain waters impaired for SELENIUM?
-Again, we’ll use the national assessments data filtered to the latest -cycle. Then we will filter for rows where the parameter is both selenium -and listed as a cause. The additional packages used in this step are dplyr and DT.
-# filter assessments data set for
-param.nat <- assessments.nat %>%
- # filter for selenium and cause
- dplyr::filter(parameterName == "SELENIUM" &
- parameterStatus == "Cause")
-
-# data table of selenium impairments
-DT::datatable(param.nat, options = list(pageLength = 5, scrollX = TRUE))Then we can create a list of states that have at least one selenium -impairment listed. We can also visualize this information on a map, with -states containing selenium impairments filled in blue. To build this -map, we first create a data frame with all 50 states and a -“SeleniumImpairment” column populated with “yes” or “no” to indicate -whether it has any listed selenium impairments.
-In addition to dplyr, this -code chunk also uses usdata (US -demographic data), datasets -(collection of data sets for use in R), usmap (US choropleth plotting) and ggplot2 (creating -graphics).
-# filter to create df of states that have at least one selenium impairment}
-state.names.sel <- param.nat %>%
- # select state column from filtered df
- dplyr::select(state) %>%
- # retain only distinct values for state
- dplyr::distinct() %>%
- # change state abbreviation to full state name
- dplyr::mutate(state = usdata::abbr2state(state)) %>%
- # create a list from the df
- dplyr::pull()
-
-# read in a list of all 50 state names
-state.names <- datasets::state.name
-
-# create df with of all 50 state names
-sel.map.data <- as.data.frame(state.names) %>%
- # rename state.name column to state
- dplyr::rename(state = state.names) %>%
- # create additional column to indicate if state has recorded selenium impairments ("yes") or not ("no")
- dplyr::mutate(SeleniumImpairment = factor(ifelse(state %in% state.names.sel,
- "yes", "no")))
-
-# create map, specifying data and values
-usmap::plot_usmap(
- regions = "state", data = sel.map.data,
- values = "SeleniumImpairment", color = "black"
-) +
-# fill states with selenium impairments with blue
- ggplot2::scale_fill_manual(
- values = c(`no` = "white", `yes` = "blue")
- ) +
-# remove legend
- ggplot2::theme(legend.position = "none")In the map above, the states with selenium impairments are -blue.
Which waters have a TMDL and are fully supporting -nationally?
-Starting with the filtered data frame of latest cycle assessments, we -can filter for Fully Supporting waters with an asssociatedActionType of -“TMDL”. Again, we can use a combination of dplyr and DT, for data manipulation and -creating a data table, respectively.
-# filter national assessments to includ only Fully Supporting w/ TMDL
-tmdl.fs <- assessments.nat %>%
- dplyr::filter(overallStatus == "Fully Supporting",
- associatedActionType == "TMDL"
- ) %>%
- # select columns to review
- dplyr::select(organizationId, state, assessmentUnitId, assessmentUnitName,
- associatedActionId) %>%
- # group by state and assessmentUnit
- dplyr::group_by(state, assessmentUnitId, assessmentUnitName) %>%
- # create column continaing all actionIds associated with the assessmentUnit
- dplyr::mutate(associatedActionIds = paste(unique(associatedActionId),
- collapse = ", ")) %>%
- # remove original associatedActionId column
- dplyr::select(-associatedActionId) %>%
- # retain only distinct rows
- dplyr::distinct()
-
-# fully supporting waters with tmdls data table
-DT::datatable(tmdl.fs, options = list(pageLength = 5, scrollX = TRUE))What are the impaired waters (cat 4 and 5 -nationally)?
-The workflow here is similar to the previous question, relying on dplyr and DT. This time we are retaining -a few more columns for inclusion in the data table and are filtering by -epaIRCategory for category 4 and 5 waters.
-# select subset of columns to include
-imp.waters <- assessments.nat %>%
- dplyr::select(organizationId, organizationName, organizationType,
- assessmentUnitId, assessmentUnitName, assessmentDate,
- epaIrCategory, waterType, waterSize, waterSizeUnits) %>%
- # retain only distinct rows
- dplyr::distinct() %>%
- # filter for category 4 and 5 waters
- dplyr::filter(epaIrCategory %in% c("4", "5"))The resulting data frame is large, with 128,870 unique impaired -waters nationally. Due to its size, we’ll include a random subset of 250 -results in a data table to review during the demo. You can view the full -results by viewing the imp.waters df.
- - -Which organizations have included any information about -monitoringLocations in ATTAINS? Which of these organizations have -included any monitoringLocationDataLinks?
-To answer this question, we’ll need to import a new national extract. -As we are curious about monitoringLocations and -monitoringLocationDataLinks, we will want to start with the Assessment -Units with Monitoring Locations data set. The param value for this in -EQ_NationalExtract is “au_mls”. Additional packages needed are dplyr and DT. We’ll use some additional -options in DT to highlight -useful rows in the final data table.
-The next step is to filter the data frame and retain only rows that -contain a value for monitoringLocationId. The resulting data frame, -called “filtered.mls” will allow us to determine (1) which organizations -have included monitoringLocationIdentifiers and (2) which organizations -have included monitoringLocationDataLinks.
-To find the organizations with monitoringLocationIdentifiers, we will -select only the rows organizationId, organizationName, and -organizationType and retain the unique rows.
-To find the organizations with monitoringLocationIdentifiers, we will -apply an additional filter and keep only rows that have a value in the -monitoringLocationDataLink column. Then select only the rows -organizationId, organizationName, and organizationType and retain the -unique rows.
-# filter for rows that contain monitoringLocationIds
-filtered.mls <- nat.ausmls %>%
- dplyr::filter(monitoringLocationId != "",
- !is.na(monitoringLocationId))
-
-# start with data set filtered to include only rows with MonitoringLocationIds
-orgs.mls <- filtered.mls %>%
- # select columns to describe orgs
- dplyr::select(organizationId, organizationName, organizationType) %>%
- dplyr::distinct()
-
-
-# start with data set filtered to include only rows with MonitoringLocationIds
-links.mls <- filtered.mls %>%
- # filter for orgs with monitoringLocationDataLinks
- dplyr::filter(monitoringLocationDataLink != "",
- !is.na(monitoringLocationDataLink)) %>%
- # select columns to describe orgs
- dplyr::select(organizationId, organizationName, organizationType) %>%
- # retain distinct rows
- dplyr::distinct()There are 45 with monitoringLocationId data in ATTAINS. Of these -organizations, 5 also contain monitoringLocationDataLinks. We can create -a data table to display this information and apply some conditional -formatting to highlight the organizations with -monitoringLocationIdentifiers in blue and those with -monitoringLocationIdentifiers and monitoringLocationDataLinks in -orange.
-# create data frame summarizing which organizations have monitoringLocationIds and monitoringLocationDataLinks in ATTAINS
-ml.table <- nat.ausmls %>%
- # create df of all orgs
- dplyr::select(organizationId, organizationName, organizationType) %>%
- # retain only distinct rows
- dplyr::distinct() %>%
- # add columns indicating whether the org has any data in ATTAINS for monitoringLocationId and monitoringLocationDataLink
- dplyr::mutate(monitoringLocationId = ifelse(organizationId %in%
- orgs.mls$organizationId,
- "yes", "no"),
- monitoringLocationDataLink = ifelse(organizationId %in%
- links.mls$organizationId,
- "yes", "no"))
-# create data table
-DT::datatable(ml.table, options = list(pageLength = 10, scrollX = TRUE)) %>%
- # highlight rows for orgs that have monitoringLocationId in blue
- DT::formatStyle(
- "monitoringLocationId", target = "row",
- backgroundColor = styleEqual(c("yes"), c("#66c3c5"))) %>%
- # highlight rows for orgs that also have data for data links in organge
- DT::formatStyle(
- "monitoringLocationDataLink", target = "row",
- backgroundColor = styleEqual(c("yes"), c("#f9b97d")))EQ_Assessments()queries -ATTAINS Assessments data. For many use cases, querying Assessments for a -single organization or state is desirable. The next example shows how to -query by a single state for the latest cycle. The default for the -function is to query for the latest cycle. If you would like to search -for an older cycle, you can set the “report_cycle” parameter equal to -the desired cycle year in YYYY format.
-How many stream miles in Kansas are impaired for primary -contact recreation in the latest cycle?
-First, we can construct our query to return only Primary Contact -Recreation Assessments from the latest cycle from Kansas for streams. -The remaining steps can be accomplished with dplyr functions to retain unique -combinations of assessmentUnitId, assessmentUnitName, waterSize and -waterSizeUnits, then summing the waterSize column to find the number of -stream miles.
-# set up query to search assessments for KS, primary contact recreation use, and stream, default behavior is to query latest report cycle
-ks.imp.streams <- rExpertQuery::EQ_Assessments(statecode = "KS",
- use_name = "Primary Contact Recreation",
- api_key = testkey,
- water_type = "STREAM")
-
-# calculate total water size
-ks.miles <- ks.imp.streams %>%
- # select required columns
- dplyr::select(assessmentUnitId, assessmentUnitName,
- waterSize, waterSizeUnits) %>%
- # retain distinct rows
- dplyr::distinct() %>%
- # sum water size
- dplyr::summarize(totalWaterSize = sum(waterSize)) %>%
- dplyr::pull()There are 11,129.78 stream miles in Kansas impaired for primary -contract recreation use.
What waters in Montana are impaired due to Zinc? How many -different uses are zinc impairments associated with in -Montana?
-We start by querying for Montana assessments with zinc as a cause and -can use dplyr and plotly (graphing library) to summarize -the results by useName and create a bar plot of the results.
-# query MT's latest assessments for zinc as a cause
-MT.impair <- rExpertQuery::EQ_Assessments(statecode = "MT",
- api_key = testkey,
- param_name = "ZINC",
- param_status = "Cause"
- )## [1] "EQ_Assessments: The current query will return 123 rows."
-Next we summarize the uses associated with zinc impairments and count -the number of unique assessmentUnitIdentifiers for each use to -facilitate creating a bar plot.
-# summarize by useName
-MT.impair.use <- MT.impair %>%
- dplyr::group_by(useName) %>%
- # count AUIDs by use
- dplyr::summarize(count = length(unique(assessmentUnitId)))There are 3 uses with zinc as a cause in Montana: Agricultural, -Aquatic Life, and Drinking Water . As our last step, we’ll create the -bar plot.
- - -Which waters are new to category 5 in the 2024 cycle for -Illinois? Which categories did they move from?
-This is another question that is well suited to using dplyr and plotly in addition to EQ_Assessments. -The query is for category 5 waters from Illinois from the latest -reporting cycle. In order to compare reporting cycles, we’ll also need -to provide a value for the previous cycle and run the query again, this -time for the previous reporting cycle and without specifying the -category.
-# get cat 5 waters from 2024 assessment cycle
-IL.latest <- rExpertQuery::EQ_Assessments(statecode = "IL",
- epa_ir_cat = "5",
- api_key = testkey)
-
-# select required columns from latest cycle data
-IL.latest <- IL.latest %>%
- dplyr::select(assessmentUnitId, epaIrCategory) %>%
- # retain unique rows
- dplyr::distinct()
-
-
-# get all assessments from previous cycle
-IL.previous <- rExpertQuery::EQ_Assessments(statecode = "IL",
- report_cycle = 2022,
- api_key = testkey)Next we’ll need to filter the previous cycle results to retain only -the assessment units that are category 5 waters in the latest cycle. -This allows us to combine the latest and previous cycle data frames and -filter them to create a data frame which contains only the assessment -units that changed from some other category to category 5 from the -previous cycle to the latest cycle.
-# filter previous cycle to include only cat 5 waters from 2024
-IL.previous <- IL.previous %>%
- # retain only cat 5 waters from latest cycle
- dplyr::filter(assessmentUnitId %in% IL.latest$assessmentUnitId) %>%
- # select auid and category
- dplyr::select(assessmentUnitId, epaIrCategory) %>%
- # rename to indicate previous cycle
- dplyr::rename(epaIrCategory.previous = epaIrCategory)
-
-# combine latest and previous cycle dfs
-IL.combine <- IL.latest %>%
- dplyr::left_join(IL.previous, dplyr::join_by(assessmentUnitId))
-
-# create df of the auids that changed to cat 5 in 2024
-IL.changes <- IL.combine %>%
- dplyr::filter(epaIrCategory != epaIrCategory.previous) %>%
- dplyr::distinct()Finally, we can summarize by counting the categories the new category -5s were previously in and create a pie chart to visualize. This example -sets up a color palette that mimics the one used in the ATTAINS report -module.
-# summarize what cats the new cat 5s were in previously
-IL.change.sum <- IL.changes %>%
- dplyr::group_by(epaIrCategory.previous) %>%
- dplyr::summarise(count = dplyr::n_distinct(assessmentUnitId))
-
-# set color palette for categories
-cat.pal <- c("#9dd49d", "#bee3be", "#c5c5c5", "#f6ce95", "#f9deb8", "#f0bab9", "#e89895","#4be367","#17b644", "#c5a577")
-# set category names for palette
-cat.pal <- setNames(cat.pal, c("1", "2", "3", "4A", "4B", "4C", "5", "5A", "5R", "4"))
-
-
-# create plotly plot
-plotly::plot_ly() %>%
- # add pipe based IL.change.sum
- plotly::add_pie(data = IL.change.sum, ~count, labels = ~epaIrCategory.previous,
- marker = list(
- colors = cat.pal,
- line = list(color = "white", width = 2)
- ), type = "pie",
- hoverinfo = "value", outsidetextfont = list(color = "black"),
- sort = FALSE
- ) %>%
- # add legend
- plotly::layout(legend = list(orientation = "h",
- xanchor = "center",
- x = 0.5))EQ_AssessmentUnits() -queries ATTAINS Assessment Units data. The default is to only return -“Active” assessment units, but input params can be adjusted to query for -“Retired” or “Historical” assessment units in addition to or instead of -“Active”.
-What are the retired assessment units for -Florida?
-This can be answered with a simple EQ_AssessmentUnits query -specifying Florida with an assesment unit status of “Retired” without -using functions from any additional packages. For easier review, we’ll -create a datatable with DT.
-EQ_TMDLs() -queries ATTAINS TMDL data and can utilize many search parameters.
-What are all the TMDLs in HI? How many are there (counted -as unique combinations of actionID/pollutant/assessmentUnitId)? How many -are associated with each pollutantGroup?
-To answer this question we’ll use dplyr and plotly. The EQ_TMDLs query is simple, -specifying only the state of Hawaii since we want to return all of -Hawaii’s TMDLs.
- -The536 rows can be reviewed in the data frame. Keep in mind that if -we want to count one TMDL as one unique combination of -assessmentUnitIdentifier, pollutant and actionId, we can’t simply count -the number of rows as this would lead to double counting as each -assessmentUnitIdentifier/pollutant/actionId may be associated with -multiple parameters, which means it will be listed more than once. The -code chunk below demonstrates one way to determine the number of TMDLs -by unique assessmentUnitIdentifier/pollutant/actionId. We can than -visualize TMDLs related to each pollutantGroup with a simple bar -graph.
-# determine the number of tmdls
-HI.count <- HI.tmdls %>%
- # select required columns
- dplyr::select(assessmentUnitId, assessmentUnitName, actionId, actionName, pollutant, pollutantGroup, fiscalYearEstablished, actionAgency, planSummaryLink) %>%
- # retain unique rows
- dplyr::distinct()
-
-# determine number per group
-HI.sum <- HI.count %>%
- # groupy by pollutantGroup
- dplyr::group_by(pollutantGroup,) %>%
- # count unique auids per pollutantGroup
- dplyr::summarize(count = dplyr::n_distinct(assessmentUnitId)) %>%
- # if pollutantGroup is missing, assing to group named "NULL"
- dplyr::mutate(pollutantGroup = ifelse(is.na(pollutantGroup), "NULL", pollutantGroup))
-
-# create bar plot
-plotly::plot_ly( data = HI.sum,
- x = ~pollutantGroup,
- y = ~count,
- type = "bar"
-)There are 168 TMDLs in Hawaii.
How many TMDL projects (by actionId) were approved so far -in FY 2025 in R10?
-This question can be answered simply with a well crafted query and a -single dplyr function. The -2025 fiscal year began on October 1, 2024, so that will be set as the -tmdl_start_date parameter along with specifying 10 for the region.
-# r10 query for FY 2025 TMDLs
-tmdl.proj <- rExpertQuery::EQ_TMDLs(region = 10,
- tmdl_date_start = "2024-10-01",
- api_key = testkey)
-
-# count distinct actionIds
-tmdl.proj.count <- dplyr::n_distinct(tmdl.proj$actionId)There are TMDL projects (by actionID) approved so far in FY 2025. In -order to create a user-friendly DT datatable, we will have to -take a few more steps. The columns, assessmentUnitId, pollutant, and -addressedParameter are likely to have more than one value per actionId. -In order to create a readable table with only one row per actionId, we -will use dplyr functions to -manipulate the data frame so that for example, multiple pollutants will -be listed together in the same row.
-tmdl.sum <- tmdl.proj %>%
- # select columns
- dplyr::select(region, state, organizationId, organizationName, organizationId,
- actionId, actionName, assessmentUnitId, pollutant,
- addressedParameter, tmdlDate, planSummaryLink) %>%
- # retain unique rows
- dplyr::distinct() %>%
- # group by actionId
- dplyr::group_by(actionId) %>%
- # create new columns to address issues with mulitple rows
- dplyr::mutate(assessmentUnitIds = paste(sort(unique(assessmentUnitId)), collapse = ", "
- ),
- pollutants = paste(sort(unique(pollutant)), collapse = ", "),
- addressedParameters = paste(sort(unique(addressedParameter)), collapse = ", ")) %>% dplyr::select(-assessmentUnitId, -pollutant, -addressedParameter) %>%
- # retain unique rows
- dplyr::distinct()
-
-# create data table
-DT::datatable(tmdl.sum %>%
- rExpertQuery::EQ_FormatPlanLinks(), escape = FALSE,
- options = list(pageLength = 2, scrollX = TRUE))EQ_Actions()queries -ATTAINS Actions data by a variety of user supplied parameters.
-Which actions in Delaware should be included in measures? -How many actions (by actionIds) are in this group?
-We can use dplyr and DT with the query results to -find the answer and display the relevant actionIds. For the query, we -use EQ_Actions to search for Deleware actions with the “in_meas” -parameter set to “Yes”. Then we count the distinct actionIds.
-# query Actions from Delaware that are included in Measures
-DE.meas <- rExpertQuery::EQ_Actions(statecode = "DE",
- in_meas = "Yes",
- api_key = testkey)## [1] "EQ_Actions: The current query will return 649 rows."
-
-There are 480 actionIds that should be included when calculating -measures for Delaware. In a similar process to the final TMDL example, -we can create a data table with one row per actionId.
-# determine unique actionIds
-DE.actids <- DE.meas %>%
- # select columns
- dplyr::select(actionType, actionId, actionName, actionAgency, parameter,
- completionDate, assessmentUnitId, fiscalYearEstablished,
- planSummaryLink) %>%
- # retain unique rows
- dplyr::distinct() %>%
- # group by actionId
- dplyr::group_by(actionId) %>%
- # create new columns to address issues with mulitple rows
- dplyr::mutate(parameters = paste(unique(parameter), collapse = ", "),
- assessmentUnitIds = paste(unique(assessmentUnitId), collapse = ", ")) %>%
- dplyr::select(-parameter, -assessmentUnitId) %>%
- # retain unique rows
- dplyr::distinct()
-
-DT::datatable(DE.actids %>%
- rExpertQuery::EQ_FormatPlanLinks(),
- escape = FALSE,
- options = list(pageLength = 5, scrollX = TRUE))How many actions (by actionId) were established in R3 in -FY 2024? What was the most common type of action established in R3 in FY -2024?
-For this query, we will set region equal to 3 and both -fisc_year_start and fisc_year_end to 2024. Then use dplyr and DT to summarize and display -results as in the first EQ_Actions example.
-# query for R3 FY2024 actions
-act.fy.24 <- rExpertQuery::EQ_Actions(region = 3,
- fisc_year_start = 2024,
- fisc_year_end = 2024,
- api_key = testkey)
-
-# count unique actionIds
-n_act.fy.24 <- dplyr::n_distinct(act.fy.24$actionId)There were 7 unique actions by actionId established in R3 in FY 2024. -We can create data table with counts for each actionType.
-act.fy.24.counts <- act.fy.24 %>%
- # select columns
- dplyr::select(state, organizationId, organizationName,
- actionId, actionName, actionType) %>%
- # retain unique rows
- dplyr::distinct() %>%
- # group by action type
- dplyr::group_by(actionType) %>%
- # count actionId per each action type
- dplyr::summarize(count = length(unique(actionId)))
-
-DT::datatable(act.fy.24.counts)EQ_ActionsDocuments() -queries ATTAINS Actions Documents data with user supplied parameters. -One of its unique features is the ability to search the documents -themselves by keyword or phrase.
-What are all the actions documents that contain the -keyword “nutrient”?
-The only query parameter we will provide is the keyword -“nutrient”.
-# Actions Documents containing "nutrient"
-nutrient.doc <- rExpertQuery::EQ_ActionsDocuments(doc_query = "nutrient",
- api_key = testkey )This query yields 17,657 results. As well as providing the actionId -and documentName for all results, EQ_Actions() also returns the -column “actionDocumentUrl” containing the URL to link to the document. -Because the query yields so many results, we’ll take a look at a random -subset of 50 in a DT data -table to understand the structure of the results.
- - -How many actions documents that contain the keyword -“nutrient” are from each region?
-To answer this question, wcount the number of unique actionIds per -region from the initial “nutrients” keyword query. We can display the -results in a plotly bar plot.
- - -Which monitoringLocations correspond to assessmentUnits -in Alaska? How many monitoringLocations are associated with -assessmentUnits? Which organizations are the monitoringLocations -from?
-We start by querying for all monitoringLocations in Alaska and then -use dplyr functions to retain -only rows that have a value for monitoringLocation. We can create a DT data table to review the -results. We can also use the base R functions sort and unique to make a -list of the organizations represented in the monitoringLocationOrgId -column and organize them alphabetically.
-# query for assessment units and monitoring locations from Alaska
-ak.mls <- rExpertQuery::EQ_AUsMLs(statecode = "AK",
- api_key = testkey)
-
-# filter for assessment units with monitoring locations from Alaska
-filt.ak.mls <- ak.mls %>%
- # filter for rows with a valule for monitoring location id
- dplyr::filter(!is.na(monitoringLocationId),
- monitoringLocationId != "") %>%
- # select rows for review
- dplyr::select(assessmentUnitId, assessmentUnitName, monitoringLocationId,
- monitoringLocationOrgId) %>%
- # retain unique rows
- dplyr::distinct()
-
-DT::datatable(filt.ak.mls, options = list(pageLength = 10, scrollX = TRUE))
-
-# list of orgs inlcuded in monitoringLocationOrgId
-ak.orgs <- sort(unique(filt.ak.mls$monitoringLocationOrgId))There are 208 monitoringLocationIds associated with assessmentUnits -in Alaska. The monitoringLocationIds come from 5 different -organizations: `ADOT&PF, AKDECWQ, Kenai Watershed Forum, National -Park Service, and USFWS_ALASKA .
Are there active assessmentUnits without any listed -monitoringLocations in Alaska?
-This question can be answered by filtering the initial Alaska query -results for assessmentUnitIds without any corresponding -monitoringLocationIdentifiers.
-# filter Alaska data set for assessmentUnitIds without listed monitoringLocationIds
-no.ml.ak <- ak.mls %>%
- dplyr::filter(!assessmentUnitId %in% filt.ak.mls$assessmentUnitId)
-
-# count number of unique asessment units without listed monitoringLocationIds
-n_no.ml.ak <- dplyr::n_distinct(no.ml.ak$assessmentUnitId)There are active assessmentUnits in Alaska without -monitoringLocations listed in ATTAINS.
Which states in Region 6 have listed Sources in their -latest report cycle?
-Start by querying for EQ_Source for region 6. Then use dplyr and usdata to -manipulate the data and create a list of the region 6 states that list -sources for any assessments in their latest report cycle.
-# query for sources in the latest assessment cycle in R6
-r6.sources <- rExpertQuery::EQ_Sources(region = 6,
- api_key = testkey)## Rows: 36176 Columns: 21
-## ── Column specification ────────────────────────────────────────────────────────
-## Delimiter: ","
-## chr (17): region, state, organizationType, organizationId, organizationName,...
-## dbl (4): objectId, reportingCycle, cycleId, waterSize
-##
-## ℹ Use `spec()` to retrieve the full column specification for this data.
-## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
-# filter for states with listed sources
-r6.sources.filter <- r6.sources %>%
- # remove all rows without a listed source
- dplyr::filter(!is.na(sourceName)) %>%
- # select state
- dplyr::select(state) %>%
- # retain distinct state
- dplyr::distinct() %>%
- # create list
- dplyr::pull() %>%
- # convert abbreviation to state name
- usdata::abbr2state() %>%
- # sort alphabetically
- sort()Arkansas, Louisiana, New Mexico, Oklahoma and Texas all listed -sources in their latest report cycles.
What are the most commonly listed sources in the latest -report cycle in each Region 6 state?
-Using the same initial query results from the previous question, we -can use dplyr functions to -count the number of times a source is listed by each state, select the -most common source from each state and make a a DT data table of the -results.
-# count the number of times a source is listed for each state
-r6.sources.summarize <- r6.sources %>%
- dplyr::select(state, organizationId, assessmentUnitId, sourceName) %>%
- dplyr::distinct() %>%
- dplyr::group_by(state, organizationId, sourceName) %>%
- dplyr::summarize(count = length(assessmentUnitId))%>%
- dplyr::ungroup() %>%
- dplyr::group_by(state) %>%
- dplyr::slice_max(count)
-
-# data table for max count
-DT::datatable(r6.sources.summarize)EQ_CatchCorr() -queries ATTAINS Catchment Correspondance data. These queries return -large files, particularly if a large geospatial area is defined, so best -practice is to make your query as specific as possible. Many machines -may not have enough memory to load an entire state’s worth of catchment -correspondence data in an R session. It is also possible to focus on a -specific assessment unit in catchment correspondence queries.
-Which catchments correspond to assessmentUnitId”IL_N-99” -?
-We can start with a very focused query to retrieve information for -the Illinios assessment unit “IL_N-99”. Then we’ll use dplyr functions to retain the -unique rows make a a DT data -table.
-# query for IL_N-99 catchment data
-il.n99.catch <- rExpertQuery::EQ_CatchCorr(statecode = "IL",
- auid = "IL_N-99",
- api_key = testkey)
-
-il.n99.catch <- il.n99.catch %>%
- # remove objectId
- dplyr::select(-objectId) %>%
- # retain unique rows
- dplyr::distinct()
-
-# create data table
-DT::datatable(il.n99.catch, options = list(pageLength = 10, scrollX = TRUE))How large of a catchment area corresponds to -““IL_N-99”?
-We will need some additional information in order to answer this -question. We could get catchment areas from the nhdplusTools package or -from ATTAINS geospatial web services. For this example, we will use -ATTAINS geospatial web services as this will also allow us to create a -map of the catchments and assessment unit. This example utlizes a few -new R packages, httr (wrapper for -curl packge), leaflet -(interactive maps), and sf -(spatial vector data).
-The first step is to set up query parameters for ATTAINS geospatial -webservices. Then query the assessment lines and catchment associations -layers to retrieve the geospatial data for the assessment unit and its -associated catchments. The data from the catchment association layer -allows us to sum the total area for all the catchments associated with -IL_N-99.
-# set up query params for ATTAINS geospatial web services
-query.params <- list(
- where = "assessmentunitidentifier IN ('IL_N-99')",
- outFields = "*",
- f = "geojson"
- )
-
-# query ATTAINS geospatial assessments lines layer
-lines.response <- httr::GET("https://gispub.epa.gov/arcgis/rest/services/OW/ATTAINS_Assessment/MapServer/1/query?",
- query = query.params)
-
-# read content from ATTAINS geospatial assessments lines layer
-lines.geojson <- httr::content(lines.response, as = "text", encoding = "UTF-8")
-
-# get sf from ATTAINS geospatial assessments lines layer
-lines.sf <- sf::st_read(lines.geojson, quiet = TRUE)
-
-# query ATTAINS geospatial catchment association layer
-catch.response <- httr::GET("https://gispub.epa.gov/arcgis/rest/services/OW/ATTAINS_Assessment/MapServer/3/query?",
- query = query.params)
-
-# read content from ATTAINS geospatial catchment association layer
-catch.geojson <- httr::content(catch.response, as = "text", encoding = "UTF-8")
-
-# # get sf from ATTAINS geospatial assessments lines layer
-catch.sf <- sf::st_read(catch.geojson, quiet = TRUE)
-
-# sum the catchment areas that correspond to IL_N-99
-sum.catch <- catch.sf %>%
- dplyr::summarise(total = sum(areasqkm)) %>%
- sf::st_drop_geometry() %>%
- dplyr::pull()The total catchment area associated with IL_N-99 is 35.6019.
-To create map of IL_N-99 and and its associated catchments, we can -create a leaflet map with the defaul Open Street Map tile then add the -catchment polygons with a popup contain the nhdplusid and area in sq km. -The final step is to add the assessment unit line.
-# create leaflet map
-leaflet::leaflet() %>%
- # add open street map layer
- leaflet::addTiles() %>%
- # add catchments, include popups with nhdplus id and area in sq km
- leaflet::addPolygons(data = catch.sf, color = "darkorange",
- popup = paste0(catch.sf$nhdplusid, "<br>",
- catch.sf$areasqkm, " sq km")) %>%
- # added assessment id
- leaflet::addPolylines(data = lines.sf, color = "blue")EQ_DomainValues is unique in the rExpertQuery packages as it relies -on ATTAINS web services, rather than Expert Query web services in order -to provide allowable domain values. It was created to provide guidance -for users on allowable values for queries. The function returns all -information available from ATTAINS web services about the selected -domain and prints a message informing the user which column contains the -value that should be used in queries.
-Which parameters can EQ_DomainValues provide values -for?
-Running the EQ_DomainValues function with no value provided for the -“domain” argument will return a list of the parameter names that it can -return domain values for.
-# run EQ_DomainValues with no value provided for "domain" to return list of params
-domain.vals <- EQ_DomainValues()## [1] "EQ_DomainValues: getting list of available domain names."
-EQ_DomainValues can provide values for the following query -parameters: act_agency, act_status, act_type, ad_param, assess_basis, -assess_methods, assess_types, au_status, cause, delist_reason, doc_type, -file_type, loc_type, org_id, org_name, param_attain, param_group, -param_name, param_state_ir_cat, param_status, source_scale, source_type, -statecode, use_name, use_support and water_type .
How many different values for “water_type” can be used in -rExpertQuery functions?
-To find the allowable values for “water_type” in rExpertQuery -function, we can run EQ_DomainValues again, this time specifying -“water_type”.
- -## [1] "EQ_DomainValues: For water_type the values in the name column of the function output are the allowable values for rExpert Query functions."
-The printed message informs us that the “name” column contains the -values for use in water_type queries for rExpertQuery functions. For -water_type there are 66 unique values. They are: BAY; BAYOU; BEACH; -BLACKWATER SYSTEM; CHANNEL; CIRQUE LAKE; COASTAL; COASTAL & BAY -SHORELINE; CONNECTING CHANNEL; CREEK; CREEK, INTERMITTENT; DITCH OR -CANAL; DRAIN; ESTUARY; ESTUARY, FRESHWATER; FLOWAGE; GREAT LAKES BAYS -AND HARBORS; GREAT LAKES BEACH; GREAT LAKES CONNECTING CHANNEL; GREAT -LAKES OPEN WATER; GREAT LAKES SHORELINE; GULCH; GULF; HARBOR; -IMPOUNDMENT; INLAND LAKE SHORELINE; INLET; ISLAND COASTAL WATERS; -LAGOON; LAKE; LAKE, FRESHWATER; LAKE, NATURAL; LAKE, PLAYA; LAKE, -SALINE; LAKE, SPRINGS; LAKE, WILD RICE; LAKE/RESERVOIR/POND; MARSH; -OCEAN; OCEAN/NEAR COASTAL; POND; RESERVOIR; RESERVOIR EMBAYMENT; RIVER; -RIVER, TIDAL; RIVER, WILD RICE; RIVERINE BACKWATER; SINK HOLE; SOUND; -SPRING; SPRINGSHED; STREAM; STREAM, COASTAL; STREAM, EPHEMERAL; STREAM, -INTERMITTENT; STREAM, PERENNIAL; STREAM, TIDAL; STREAM/CREEK/RIVER; -WASH; WATERSHED; WETLAND; WETLANDS, DEPRESSIONAL; WETLANDS, FRESHWATER; -WETLANDS, RIVERINE; WETLANDS, SLOPE; and WETLANDS, TIDAL.
rExpertQuery is in development, so additional changes to existing -functions to improve usability and documentation may occur. If you have -any additional questions, contact the ATTAINS team at -attains@epa.gov.
-