Title: | General Purpose Client for 'ERDDAP' Servers |
---|---|
Description: | General purpose R client for 'ERDDAP' servers. Includes functions to search for 'datasets', get summary information on 'datasets', and fetch 'datasets', in either 'csv' or 'netCDF' format. 'ERDDAP' information: <https://upwell.pfeg.noaa.gov/erddap/information.html>. |
Authors: | Scott Chamberlain [aut], Ben Tupper [ctb], Salvador Jesús Fernández Bejarano [ctb], Roy Mendelssohn [cre, ctb] |
Maintainer: | Roy Mendelssohn <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.1.0 |
Built: | 2024-11-20 16:27:08 UTC |
Source: | https://github.com/ropensci/rerddap |
Note that it is an error to call this when base::interactive()
returns FALSE
browse(x, url = eurl(), ...)
browse(x, url = eurl(), ...)
x |
datasetid or an object associated with a datasetid such
|
url |
A URL for an ERDDAP server. Default:
https://upwell.pfeg.noaa.gov/erddap/ - See |
... |
Further args passed on to |
if in interactive mode, opens a URL in your default browser; if not, then prints the URL in the console
Ben Tupper [email protected]
## Not run: if (interactive()) { # browse by dataset_id browse('erdATastnhday') # browse info class my_info <- info('erdATastnhday') browse(my_info) # browse tabledap class my_tabledap <- tabledap('erdCalCOFIlrvsiz', fields=c('latitude','longitude','larvae_size', 'itis_tsn'), 'time>=2011-10-25', 'time<=2011-10-31') browse(my_tabledap) } ## End(Not run)
## Not run: if (interactive()) { # browse by dataset_id browse('erdATastnhday') # browse info class my_info <- info('erdATastnhday') browse(my_info) # browse tabledap class my_tabledap <- tabledap('erdCalCOFIlrvsiz', fields=c('latitude','longitude','larvae_size', 'itis_tsn'), 'time>=2011-10-25', 'time<=2011-10-31') browse(my_tabledap) } ## End(Not run)
Delete cached files
cache_delete(x, force = FALSE) cache_delete_all(force = FALSE)
cache_delete(x, force = FALSE) cache_delete_all(force = FALSE)
x |
File names |
force |
(logical) Should files be force deleted? Default: |
Other cache:
cache_details()
,
cache_list()
,
cache_setup()
## Not run: # delete files by name in cache # cache_delete('9911750294a039b8b517c8bf288978ea.csv') # cache_delete(c('9911750294a039b8b517c8bf288978ea.csv', # 'b26825b6737da13d6a52c28c8dfe690f.csv')) # You can delete from the output of griddap or tabledap fxns ## tabledap (table_res <- tabledap('erdCinpKfmBT')) cache_delete(table_res) ## griddap (out <- info('erdQMekm14day')) (grid_res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90) )) cache_delete(grid_res) ## End(Not run)
## Not run: # delete files by name in cache # cache_delete('9911750294a039b8b517c8bf288978ea.csv') # cache_delete(c('9911750294a039b8b517c8bf288978ea.csv', # 'b26825b6737da13d6a52c28c8dfe690f.csv')) # You can delete from the output of griddap or tabledap fxns ## tabledap (table_res <- tabledap('erdCinpKfmBT')) cache_delete(table_res) ## griddap (out <- info('erdQMekm14day')) (grid_res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90) )) cache_delete(grid_res) ## End(Not run)
Get details of cached files
cache_details(x)
cache_details(x)
x |
File names |
Can be used to list details for all files, both .nc and .csv
types, or details for just individual files of class tabledap
,
griddap_nc
, and griddap_csv
Other cache:
cache_delete()
,
cache_list()
,
cache_setup()
## Not run: # List details for all cached files cache_details() ## End(Not run)
## Not run: # List details for all cached files cache_details() ## End(Not run)
List cached files
cache_list()
cache_list()
Other cache:
cache_delete()
,
cache_details()
,
cache_setup()
## Not run: # list files in cache cache_list() # List info for files ## download some data first tabledap('erdCinpKfmBT') griddap('erdVHNchlamday', time = c('2015-04-01','2015-04-10'), latitude = c(18, 21), longitude = c(-120, -119) ) (x <- cache_list()) cache_details(x$nc[1]) cache_details(x$csv[1]) cache_details() # delete files by name in cache # cache_delete(x$nc[1]) # cache_delete(x$nc[2:3]) ## End(Not run)
## Not run: # list files in cache cache_list() # List info for files ## download some data first tabledap('erdCinpKfmBT') griddap('erdVHNchlamday', time = c('2015-04-01','2015-04-10'), latitude = c(18, 21), longitude = c(-120, -119) ) (x <- cache_list()) cache_details(x$nc[1]) cache_details(x$csv[1]) cache_details() # delete files by name in cache # cache_delete(x$nc[1]) # cache_delete(x$nc[2:3]) ## End(Not run)
Setup cache path
cache_setup(full_path = NULL, temp_dir = FALSE) cache_info()
cache_setup(full_path = NULL, temp_dir = FALSE) cache_info()
full_path |
(character) the full path to use for storing cached files. |
temp_dir |
(logical) if |
On opening, by default a temporary directory is created for caching
files. To have files cached elsewhere, give the full path of where to
cache files. Adding temp_dir = TRUE
will again use a temporary
dirctory for cacheing.
the full cache path, a directory (character)
Other cache:
cache_delete()
,
cache_details()
,
cache_list()
## Not run: # default path cache_setup() # you can define your own path cache_setup(path = "foobar") # set a tempdir - better for programming with to avoid prompt cache_setup(temp_dir = TRUE) # cache info cache_info() ## End(Not run)
## Not run: # default path cache_setup() # you can define your own path cache_setup(path = "foobar") # set a tempdir - better for programming with to avoid prompt cache_setup(temp_dir = TRUE) # cache info cache_info() ## End(Not run)
str(colors) List of 13 $ viridis $ cdom $ chlorophyll $ density $ freesurface $ oxygen $ par $ phase $ salinity $ temperature $ turbidity $ velocity $ vorticity
colors
colors
An object of class list
of length 13.
Convert a UDUNITS compatible time to ISO time
convert_time( n = NULL, isoTime = NULL, units = "seconds since 1970-01-01T00:00:00Z", url = eurl(), method = "local", ... )
convert_time( n = NULL, isoTime = NULL, units = "seconds since 1970-01-01T00:00:00Z", url = eurl(), method = "local", ... )
n |
numeric; A unix time number. |
isoTime |
character; A string time representation. |
units |
character; Units to return. Default: "seconds since 1970-01-01T00:00:00Z" |
url |
Base URL of the ERDDAP server. See |
method |
(character) One of local or web. Local simply uses
|
... |
Curl options passed on to crul::verb-GET |
When method = "web"
time zone is GMT/UTC
## Not run: # local conversions convert_time(n = 473472000) convert_time(isoTime = "1985-01-02T00:00:00Z") # using an erddap web service convert_time(n = 473472000, method = "web") convert_time(isoTime = "1985-01-02T00:00:00Z", method = "web") ## End(Not run)
## Not run: # local conversions convert_time(n = 473472000) convert_time(isoTime = "1985-01-02T00:00:00Z") # using an erddap web service convert_time(n = 473472000, method = "web") convert_time(isoTime = "1985-01-02T00:00:00Z", method = "web") ## End(Not run)
Convert a CF Standard Name to/from a GCMD Science Keyword
convert_units(udunits = NULL, ucum = NULL, url = eurl(), ...)
convert_units(udunits = NULL, ucum = NULL, url = eurl(), ...)
udunits |
character; A UDUNITS character string https://www.unidata.ucar.edu/software/udunits/ |
ucum |
character; A UCUM character string https://ucum.org/ucum.html |
url |
Base URL of the ERDDAP server. See |
... |
Curl options passed on to crul::verb-GET |
## Not run: convert_units(udunits = "degree_C meter-1") convert_units(ucum = "Cel.m-1") ## End(Not run)
## Not run: convert_units(udunits = "degree_C meter-1") convert_units(ucum = "Cel.m-1") ## End(Not run)
Options for saving ERDDAP datasets.
disk(path = NULL, overwrite = TRUE) memory()
disk(path = NULL, overwrite = TRUE) memory()
path |
Path to store files in. A directory, not a file.
Default: the root cache path, see |
overwrite |
(logical) Overwrite an existing file of the same name?
Default: |
Search for ERDDAP tabledep or griddap datasets
ed_search( query, page = NULL, page_size = NULL, which = "griddap", url = eurl(), ... ) ed_datasets(which = "tabledap", url = eurl())
ed_search( query, page = NULL, page_size = NULL, which = "griddap", url = eurl(), ... ) ed_datasets(which = "tabledap", url = eurl())
query |
(character) Search terms |
page |
(integer) Page number |
page_size |
(integer) Results per page |
which |
(character) One of tabledep or griddap. |
url |
A URL for an ERDDAP server. Default:
https://upwell.pfeg.noaa.gov/erddap/ - See |
... |
Curl options passed on to crul::verb-GET (must be named parameters) |
https://upwell.pfeg.noaa.gov/erddap/index.html
## Not run: (out <- ed_search(query='temperature')) out$alldata[[1]] (out <- ed_search(query='size')) out$info # List datasets ed_datasets('table') ed_datasets('grid') # use a different ERDDAP server ## Marine Institute (Ireland) ed_search("temperature", url = "http://erddap.marine.ie/erddap/") ## End(Not run)
## Not run: (out <- ed_search(query='temperature')) out$alldata[[1]] (out <- ed_search(query='size')) out$info # List datasets ed_datasets('table') ed_datasets('grid') # use a different ERDDAP server ## Marine Institute (Ireland) ed_search("temperature", url = "http://erddap.marine.ie/erddap/") ## End(Not run)
Advanced search for ERDDAP tabledep or griddap datasets
ed_search_adv( query = NULL, page = 1, page_size = 1000, protocol = NULL, cdm_data_type = NULL, institution = NULL, ioos_category = NULL, keywords = NULL, long_name = NULL, standard_name = NULL, variableName = NULL, maxLat = NULL, minLon = NULL, maxLon = NULL, minLat = NULL, minTime = NULL, maxTime = NULL, url = eurl(), ... )
ed_search_adv( query = NULL, page = 1, page_size = 1000, protocol = NULL, cdm_data_type = NULL, institution = NULL, ioos_category = NULL, keywords = NULL, long_name = NULL, standard_name = NULL, variableName = NULL, maxLat = NULL, minLon = NULL, maxLon = NULL, minLat = NULL, minTime = NULL, maxTime = NULL, url = eurl(), ... )
query |
(character) Search terms |
page |
(integer) Page number. Default: 1 |
page_size |
(integer) Results per page: Default: 1000 |
protocol |
(character) One of any (default), tabledep or griddap |
cdm_data_type |
(character) One of grid, other, point, profile, timeseries, timeseriesprofile, trajectory, trajectoryprofile |
institution |
(character) An institution. See the dataset
|
ioos_category |
(character) An ioos category See the dataset
|
keywords |
(character) A keywords. See the dataset |
long_name |
(character) A long name. See the dataset |
standard_name |
(character) A standar dname. See the dataset
|
variableName |
(character) A variable name. See the dataset
|
minLon , maxLon
|
(numeric) Minimum and maximum longitude. Some datasets have longitude values within -180 to 180, others use 0 to 360. If you specify min and max Longitude within -180 to 180 (or 0 to 360), ERDDAP will only find datasets that match the values you specify. Consider doing one search: longitude -180 to 360, or two searches: longitude -180 to 180, and 0 to 360. |
minLat , maxLat
|
(numeric) Minimum and maximum latitude, between -90 and 90 |
minTime , maxTime
|
(numeric/character) Minimum and maximum time. Time string with the format "yyyy-MM-ddTHH:mm:ssZ, (e.g., 2009-01-21T23:00:00Z). If you specify something, you must include at least yyyy-MM-dd; you can omit Z, :ss, :mm, :HH, and T. Always use UTC (GMT/Zulu) time. Or specify the number of seconds since 1970-01-01T00:00:00Z. |
url |
A URL for an ERDDAP server. Default:
https://upwell.pfeg.noaa.gov/erddap/ - See |
... |
Curl options passed on to crul::verb-GET (must be named parameters) |
https://upwell.pfeg.noaa.gov/erddap/index.html
## Not run: ed_search_adv(query = 'temperature') ed_search_adv(query = 'temperature', protocol = "griddap") ed_search_adv(query = 'temperature', protocol = "tabledap") ed_search_adv(maxLat = 63, minLon = -107, maxLon = -87, minLat = 50, protocol = "griddap") ed_search_adv(maxLat = 63, minLon = -107, maxLon = -87, minLat = 50, protocol = "tabledap") ed_search_adv(minTime = "2010-01-01T00:00:00Z", maxTime="2010-02-01T00:00:00Z") (out <- ed_search_adv(maxLat = 63, minLon = -107, maxLon = -87, minLat = 50, minTime = "2010-01-01T00:00:00Z", maxTime="2010-02-01T00:00:00Z")) out$alldata[[1]] ed_search_adv(variableName = 'upwelling') ed_search_adv(query = 'upwelling', protocol = "tabledap") # use a different URL ed_search_adv(query = 'temperature', url = servers()$url[6]) ## End(Not run)
## Not run: ed_search_adv(query = 'temperature') ed_search_adv(query = 'temperature', protocol = "griddap") ed_search_adv(query = 'temperature', protocol = "tabledap") ed_search_adv(maxLat = 63, minLon = -107, maxLon = -87, minLat = 50, protocol = "griddap") ed_search_adv(maxLat = 63, minLon = -107, maxLon = -87, minLat = 50, protocol = "tabledap") ed_search_adv(minTime = "2010-01-01T00:00:00Z", maxTime="2010-02-01T00:00:00Z") (out <- ed_search_adv(maxLat = 63, minLon = -107, maxLon = -87, minLat = 50, minTime = "2010-01-01T00:00:00Z", maxTime="2010-02-01T00:00:00Z")) out$alldata[[1]] ed_search_adv(variableName = 'upwelling') ed_search_adv(query = 'upwelling', protocol = "tabledap") # use a different URL ed_search_adv(query = 'temperature', url = servers()$url[6]) ## End(Not run)
Default ERDDAP server URL
eurl()
eurl()
default url is https://upwell.pfeg.noaa.gov/erddap/
You can set a default using an environment variable so you don't have to pass anything to the URL parameter in your function calls.
In your .Renviron file or similar set a URL for the environment
variable RERDDAP_DEFAULT_URL
, like
RERDDAP_DEFAULT_URL=https://upwell.pfeg.noaa.gov/erddap/
It's important that you include a trailing slash in your URL
eurl() Sys.setenv(RERDDAP_DEFAULT_URL = "https://google.com") Sys.getenv("RERDDAP_DEFAULT_URL") eurl() Sys.unsetenv("RERDDAP_DEFAULT_URL") eurl()
eurl() Sys.setenv(RERDDAP_DEFAULT_URL = "https://google.com") Sys.getenv("RERDDAP_DEFAULT_URL") eurl() Sys.unsetenv("RERDDAP_DEFAULT_URL") eurl()
Convert a FIPS County Code to/from a County Name
fipscounty(county = NULL, code = NULL, url = eurl(), ...)
fipscounty(county = NULL, code = NULL, url = eurl(), ...)
county |
character; A county name. |
code |
numeric; A FIPS code. |
url |
A URL for an ERDDAP server. Default:
https://upwell.pfeg.noaa.gov/erddap/ - See |
... |
Curl options passed on to crul::verb-GET |
## Not run: fipscounty(code = "06053") fipscounty(county = "CA, Monterey") fipscounty(county = "OR, Multnomah") ## End(Not run)
## Not run: fipscounty(code = "06053") fipscounty(county = "CA, Monterey") fipscounty(county = "OR, Multnomah") ## End(Not run)
Search for ERDDAP tabledap or griddap datasets from a list of ERDDAP servers based on search terms.
global_search(query, server_list, which_service)
global_search(query, server_list, which_service)
query |
(character) Search terms |
server_list |
(list of character) List of ERDDAP servers to search |
which_service |
(character) One of tabledep or griddap. |
Uses the 'reddap' function ed_search() to search over the list of servers
If successful a dataframe wih columns:
title - the dataset title
dataset_id - the datasetid on that ERDDAP server
url - base url of dataset ERDDAP server
if urls are valid, no match is found, will return no match found else returns error message
# get list of servers know by # https://irishmarineinstitute.github.io/awesome-erddap # e_servers <- servers()$url # select a couple to search # e_servers <- e_servers[c(1, 40)] # to meet CRAN time limits will only search 1 place e_servers <- "https://coastwatch.pfeg.noaa.gov/erddap/" test_query <- 'NOAA/NCDC Blended Monthly' query_results <- global_search(test_query, e_servers, "griddap")
# get list of servers know by # https://irishmarineinstitute.github.io/awesome-erddap # e_servers <- servers()$url # select a couple to search # e_servers <- e_servers[c(1, 40)] # to meet CRAN time limits will only search 1 place e_servers <- "https://coastwatch.pfeg.noaa.gov/erddap/" test_query <- 'NOAA/NCDC Blended Monthly' query_results <- global_search(test_query, e_servers, "griddap")
Get ERDDAP gridded data
griddap( datasetx, ..., fields = "all", stride = 1, fmt = "nc", url = eurl(), store = disk(), read = TRUE, callopts = list() )
griddap( datasetx, ..., fields = "all", stride = 1, fmt = "nc", url = eurl(), store = disk(), read = TRUE, callopts = list() )
datasetx |
Anything coercable to an object of class info. So the output of a
call to |
... |
Dimension arguments. See examples. Can be any 1 or more of the dimensions for the particular dataset - and the dimensions vary by dataset. For each dimension, pass in a vector of length two, with min and max value desired. at least 1 required. |
fields |
(character) Fields to return, in a character vector. |
stride |
(integer) How many values to get. 1 = get every value, 2 = get every other value, etc. Default: 1 (i.e., get every value) |
fmt |
(character) One of csv or nc (for netcdf). Default: nc |
url |
A URL for an ERDDAP server. Default:
https://upwell.pfeg.noaa.gov/erddap/ - See |
store |
One of |
read |
(logical) Read data into memory or not. Does not apply when
|
callopts |
Curl options passed on to |
Details:
If you run into an error like "HTTP Status 500 - There was a (temporary?)
problem. Wait a minute, then try again.". it's likely they are hitting
up against a size limit, and they should reduce the amount of data they
are requesting either via space, time, or variables. Pass in
config = verbose()
to the request, and paste the URL into your
browser to see if the output is garbled to examine if there's a problem
with servers or this package
An object of class griddap_csv
if csv chosen or
griddap_nc
if nc file format chosen.
griddap_csv
: a data.frame created from the downloaded csv
data
griddap_nc
: a list, with slots "summary" and "data". "summary"
is the unclassed output from ncdf4::nc_open
, from which you can
do any netcdf operations you like. "data" is a data.frame created
from the netcdf data. the data.frame may be empty if there were problems
parsing the netcdf data
Both have the attributes: datasetid (the dataset id), path (the path on file for the csv or nc file), url (the url requested to the ERDDAP server)
If read=FALSE
, the data.frame for griddap_csv
and the data.frame in the "data" slot is empty for griddap_nc
ERDDAP grid dap data has this concept of dimenions vs. variables. Dimensions are things like time, latitude, longitude, altitude, and depth. Whereas variables are the measured variables, e.g., temperature, salinity, air.
You can't separately adjust values for dimensions for different variables. So, here's how it's gonna work:
Pass in lower and upper limits you want for each dimension as a vector
(e.g., c(1,2)
), or leave to defaults (i.e., don't pass anything to
a dimension). Then pick which variables you want returned via the
fields
parameter. If you don't pass in options to the fields
parameter, you get all variables back.
To get the dimensions and variables, along with other metadata for a
dataset, run info
, and each will be shown, with their min
and max values, and some other metadata.
You can choose where data is stored. Be careful though. You can easily get a
single file of hundreds of MB's (upper limit: 2 GB) in size with a single
request. To the store
parameter, pass memory
if you
want to store the data in memory (saved as a data.frame), or pass
disk
if you want to store on disk in a file. Note that
memory
and disk
are not character strings, but
function calls. memory
does not accept any inputs, while
disk
does. Possibly will add other options, like
“sql” for storing in a SQL database.
Some gridded datasets have latitude/longitude components, but some do not. When nc format gridded datasets have latitude and longitude we "melt" them into a data.frame for easy downstream consumption. When nc format gridded datasets do not have latitude and longitude components, we do not read in the data, throw a warning saying so. You can readin the nc file yourself with the file path. CSV format is not affected by this issue as CSV data is easily turned into a data.frame regardless of whether latitude/longitude data are present.
https://upwell.pfeg.noaa.gov/erddap/rest.html
## Not run: # single variable dataset ## You can pass in the outpu of a call to info (out <- info('erdVHNchlamday')) ## Or, pass in a dataset id (res <- griddap('erdVHNchlamday', time = c('2015-04-01','2015-04-10'), latitude = c(18, 21), longitude = c(-120, -119) )) # multi-variable dataset (out <- info('erdQMekm14day')) (res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90) )) (res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90), fields = 'mod_current')) (res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90), fields = 'mod_current', stride = c(1,2,1,2))) (res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90), fields = c('mod_current','u_current'))) # Write to memory (within R), or to disk (out <- info('erdQSwindmday')) ## disk, by default (to prevent bogging down system w/ large datasets) ## you can also pass in path and overwrite options to disk() (res <- griddap(out, time = c('2006-07-11','2006-07-20'), longitude = c(166, 170), store = disk() )) ## the 2nd call is much faster as it's mostly just the time of reading in ## the table from disk system.time( griddap(out, time = c('2006-07-11','2006-07-15'), longitude = c(10, 15), store = disk() ) ) system.time( griddap(out, time = c('2006-07-11','2006-07-15'), longitude = c(10, 15), store = disk() ) ) ## memory - you have to choose fmt="csv" if you use memory (res <- griddap("erdMBchla1day", time = c('2015-01-01','2015-01-03'), latitude = c(14, 15), longitude = c(125, 126), fmt = "csv", store = memory() )) ## Use ncdf4 package to parse data info("erdMBchla1day") (res <- griddap("erdMBchla1day", time = c('2015-01-01','2015-01-03'), latitude = c(14, 15), longitude = c(125, 126) )) # Get data in csv format ## by default, we get netcdf format data (res <- griddap('erdMBchla1day', time = c('2015-01-01','2015-01-03'), latitude = c(14, 15), longitude = c(125, 126), fmt = "csv" )) # Use a different ERDDAP server url ## NOAA IOOS PacIOOS url = "https://cwcgom.aoml.noaa.gov/erddap/" out <- info("miamiacidification", url = url) (res <- griddap(out, time = c('2019-11-01','2019-11-03'), latitude = c(15, 16), longitude = c(-90, -88) )) ## pass directly into griddap() - if you pass a datasetid string directly ## you must pass in the url or you'll be querying the default ERDDAP url, ## which isn't the one you want if you're not using the default ERDDAP url griddap("miamiacidification", url = url, time = c('2019-11-01','2019-11-03'), latitude = c(15, 16), longitude = c(-90, -88) ) # Using 'last' ## with time griddap('erdVHNchlamday', time = c('last-5','last'), latitude = c(18, 21), longitude = c(-120, -119) ) ## with latitude griddap('erdVHNchlamday', time = c('2015-04-01','2015-04-10'), latitude = c('last', 'last'), longitude = c(-120, -119) ) ## with longitude griddap('erdVHNchlamday', time = c('2015-04-01','2015-04-10'), latitude = c(18, 21), longitude = c('last', 'last') ) # datasets without lat/lon grid and with fmt=nc # FIXME: this dataset is gone # (x <- info('glos_tds_5912_ca66_3f41')) # res <- griddap(x, # time = c('2018-04-01','2018-04-10'), # ny = c(1, 2), # nx = c(3, 5) # ) ## data.frame is empty # res$data ## read in from the nc file path # ncdf4::nc_open(res$summary$filename) ## End(Not run)
## Not run: # single variable dataset ## You can pass in the outpu of a call to info (out <- info('erdVHNchlamday')) ## Or, pass in a dataset id (res <- griddap('erdVHNchlamday', time = c('2015-04-01','2015-04-10'), latitude = c(18, 21), longitude = c(-120, -119) )) # multi-variable dataset (out <- info('erdQMekm14day')) (res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90) )) (res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90), fields = 'mod_current')) (res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90), fields = 'mod_current', stride = c(1,2,1,2))) (res <- griddap(out, time = c('2015-12-28','2016-01-01'), latitude = c(24, 23), longitude = c(88, 90), fields = c('mod_current','u_current'))) # Write to memory (within R), or to disk (out <- info('erdQSwindmday')) ## disk, by default (to prevent bogging down system w/ large datasets) ## you can also pass in path and overwrite options to disk() (res <- griddap(out, time = c('2006-07-11','2006-07-20'), longitude = c(166, 170), store = disk() )) ## the 2nd call is much faster as it's mostly just the time of reading in ## the table from disk system.time( griddap(out, time = c('2006-07-11','2006-07-15'), longitude = c(10, 15), store = disk() ) ) system.time( griddap(out, time = c('2006-07-11','2006-07-15'), longitude = c(10, 15), store = disk() ) ) ## memory - you have to choose fmt="csv" if you use memory (res <- griddap("erdMBchla1day", time = c('2015-01-01','2015-01-03'), latitude = c(14, 15), longitude = c(125, 126), fmt = "csv", store = memory() )) ## Use ncdf4 package to parse data info("erdMBchla1day") (res <- griddap("erdMBchla1day", time = c('2015-01-01','2015-01-03'), latitude = c(14, 15), longitude = c(125, 126) )) # Get data in csv format ## by default, we get netcdf format data (res <- griddap('erdMBchla1day', time = c('2015-01-01','2015-01-03'), latitude = c(14, 15), longitude = c(125, 126), fmt = "csv" )) # Use a different ERDDAP server url ## NOAA IOOS PacIOOS url = "https://cwcgom.aoml.noaa.gov/erddap/" out <- info("miamiacidification", url = url) (res <- griddap(out, time = c('2019-11-01','2019-11-03'), latitude = c(15, 16), longitude = c(-90, -88) )) ## pass directly into griddap() - if you pass a datasetid string directly ## you must pass in the url or you'll be querying the default ERDDAP url, ## which isn't the one you want if you're not using the default ERDDAP url griddap("miamiacidification", url = url, time = c('2019-11-01','2019-11-03'), latitude = c(15, 16), longitude = c(-90, -88) ) # Using 'last' ## with time griddap('erdVHNchlamday', time = c('last-5','last'), latitude = c(18, 21), longitude = c(-120, -119) ) ## with latitude griddap('erdVHNchlamday', time = c('2015-04-01','2015-04-10'), latitude = c('last', 'last'), longitude = c(-120, -119) ) ## with longitude griddap('erdVHNchlamday', time = c('2015-04-01','2015-04-10'), latitude = c(18, 21), longitude = c('last', 'last') ) # datasets without lat/lon grid and with fmt=nc # FIXME: this dataset is gone # (x <- info('glos_tds_5912_ca66_3f41')) # res <- griddap(x, # time = c('2018-04-01','2018-04-10'), # ny = c(1, 2), # nx = c(3, 5) # ) ## data.frame is empty # res$data ## read in from the nc file path # ncdf4::nc_open(res$summary$filename) ## End(Not run)
Get information on an ERDDAP dataset.
info(datasetid, url = eurl(), ...) as.info(x, url)
info(datasetid, url = eurl(), ...) as.info(x, url)
datasetid |
Dataset id |
url |
A URL for an ERDDAP server. Default:
https://upwell.pfeg.noaa.gov/erddap/ - See |
... |
Further args passed on to crul::verb-GET (must be a named parameter) |
x |
A datasetid or the output of |
Prints a summary of the data on return, but you can index to various information.
The data is a list of length two with:
variables - Data.frame of variables and their types
alldata - List of data variables and their full attributes
Where alldata
element has many data.frame's, one for each variable,
with metadata for that variable. E.g., for griddap dataset
noaa_pfeg_696e_ec99_6fa6
, alldata
has:
NC_GLOBAL
time
latitude
longitude
sss
https://upwell.pfeg.noaa.gov/erddap/index.html
## Not run: # grid dap datasets info('erdATastnhday') (out <- ed_search(query='temperature')) info(out$info$dataset_id[5]) info(out$info$dataset_id[15]) info(out$info$dataset_id[25]) info(out$info$dataset_id[150]) info(out$info$dataset_id[400]) info(out$info$dataset_id[678]) out <- info(datasetid='erdMBchla1day') ## See brief overview of the variables and range of possible values, if given out$variables ## all information on longitude out$alldata$longitude ## all information on chlorophyll out$alldata$chlorophyll # table dap datasets (out <- ed_search(query='temperature', which = "table")) info(out$info$dataset_id[1]) info(out$info$dataset_id[2]) info(out$info$dataset_id[3]) info(out$info$dataset_id[4]) info('erdCinpKfmBT') out <- info('erdCinpKfmBT') ## See brief overview of the variables and range of possible values, if given out$variables ## all information on longitude out$alldata$longitude ## all information on Haliotis_corrugata_Mean_Density out$alldata$Haliotis_corrugata_Mean_Density # use a different ERDDAP server ## Marine Institute (Ireland) info("IMI_CONN_2D", url = "http://erddap.marine.ie/erddap/") ## End(Not run)
## Not run: # grid dap datasets info('erdATastnhday') (out <- ed_search(query='temperature')) info(out$info$dataset_id[5]) info(out$info$dataset_id[15]) info(out$info$dataset_id[25]) info(out$info$dataset_id[150]) info(out$info$dataset_id[400]) info(out$info$dataset_id[678]) out <- info(datasetid='erdMBchla1day') ## See brief overview of the variables and range of possible values, if given out$variables ## all information on longitude out$alldata$longitude ## all information on chlorophyll out$alldata$chlorophyll # table dap datasets (out <- ed_search(query='temperature', which = "table")) info(out$info$dataset_id[1]) info(out$info$dataset_id[2]) info(out$info$dataset_id[3]) info(out$info$dataset_id[4]) info('erdCinpKfmBT') out <- info('erdCinpKfmBT') ## See brief overview of the variables and range of possible values, if given out$variables ## all information on longitude out$alldata$longitude ## all information on Haliotis_corrugata_Mean_Density out$alldata$Haliotis_corrugata_Mean_Density # use a different ERDDAP server ## Marine Institute (Ireland) info("IMI_CONN_2D", url = "http://erddap.marine.ie/erddap/") ## End(Not run)
Convert a CF Standard Name to/from a GCMD Science Keyword
key_words(cf = NULL, gcmd = NULL, url = eurl(), ...)
key_words(cf = NULL, gcmd = NULL, url = eurl(), ...)
cf |
character; A cf standard name http://cfconventions.org/Data/cf-standard-names/27/build/cf-standard-name-table.html |
gcmd |
character; A GCMD science keyword http://gcmd.gsfc.nasa.gov/learn/keyword_list.html |
url |
A URL for an ERDDAP server. Default:
https://upwell.pfeg.noaa.gov/erddap/. See |
... |
Curl options passed on to crul::verb-GET |
## Not run: key_words(cf = "air_pressure") cat(key_words(cf = "air_pressure")) # a different ERDDAP server # key_words(cf = "air_pressure", url = servers()$url[6]) ## End(Not run)
## Not run: key_words(cf = "air_pressure") cat(key_words(cf = "air_pressure")) # a different ERDDAP server # key_words(cf = "air_pressure", url = servers()$url[6]) ## End(Not run)
ERDDAP server URLS and other info
servers(...)
servers(...)
... |
curl options passed on to crul::verb-GET |
data.frame with 3 columns:
name (character): ERDDAP name
url (character): ERDDAP url
public (logical): whether it's public or not
## Not run: servers() ## End(Not run)
## Not run: servers() ## End(Not run)
Get ERDDAP tabledap data.
tabledap( x, ..., fields = NULL, distinct = FALSE, orderby = NULL, orderbymax = NULL, orderbymin = NULL, orderbyminmax = NULL, units = NULL, url = eurl(), store = disk(), callopts = list() )
tabledap( x, ..., fields = NULL, distinct = FALSE, orderby = NULL, orderbymax = NULL, orderbymin = NULL, orderbyminmax = NULL, units = NULL, url = eurl(), store = disk(), callopts = list() )
x |
Anything coercable to an object of class info. So the output of
a call to |
... |
Any number of key-value pairs in quotes as query constraints. See Details & examples |
fields |
Columns to return, as a character vector |
distinct |
If |
orderby |
If used, ERDDAP will sort all of the rows in the results
table (starting with the first variable, then using the second variable
if the first variable has a tie, ...). Normally, the rows of data in the
response table are in the order they arrived from the data source. orderBy
allows you to request that the results table be sorted in a specific way.
For example, use |
orderbymax |
Give a vector of one or more fields, that must be included in the fields parameter as well. Gives back data given constraints. ERDDAP will sort all of the rows in the results table (starting with the first variable, then using the second variable if the first variable has a tie, ...) and then just keeps the rows where the value of the last sort variable is highest (for each combination of other values). |
orderbymin |
Same as |
orderbyminmax |
Same as |
units |
One of 'udunits' (units will be described via the UDUNITS standard (e.g.,degrees_C)) or 'ucum' (units will be described via the UCUM standard (e.g., Cel)). |
url |
A URL for an ERDDAP server.
Default: https://upwell.pfeg.noaa.gov/erddap/ - See |
store |
One of |
callopts |
Curl options passed on to crul::verb-GET (must be named parameters) |
For key-value pair query constraints, the valid operators are =,
!= (not equals), =~ (a regular expression test), <, <=, >, and >= . For
regular expressions you need to add a regular expression. For others, nothing
more is needed. Construct the entry like 'time>=2001-07-07'
with the
parameter on the left, value on the right, and the operator in the middle,
all within a set of quotes. Since ERDDAP accepts values other than =
,
we can't simply do time = '2001-07-07'
as we normally would.
Server-side functionality: Some tasks are done server side. You don't have
to worry about what that means. They are provided via parameters in this
function. See distinct
, orderby
, orderbymax
,
orderbymin
, orderbyminmax
, and units
.
Data is cached based on all parameters you use to get a dataset, including base url, query parameters. If you make the same exact call in the same or a different R session, as long you don't clear the cache, the function only reads data from disk, and does not have to request the data from the web again.
If you run into an error like "HTTP Status 500 - There was a (temporary?)
problem. Wait a minute, then try again.". it's likely they are hitting
up against a size limit, and they should reduce the amount of data they
are requesting either via space, time, or variables. Pass in
config = verbose()
to the request, and paste the URL into your
browser to see if the output is garbled to examine if there's a problem
with servers or this package
An object of class tabledap
. This class is a thin wrapper
around a data.frame, so the data you get back is a data.frame with metadata
attached as attributes (datasetid, path (path where the csv is stored on
your machine), url (url for the request))
https://upwell.pfeg.noaa.gov/erddap/index.html
## Not run: # Just passing the datasetid without fields gives all columns back tabledap('erdCinpKfmBT') # Pass time constraints tabledap('erdCinpKfmBT', 'time>=2006-08-24') # Pass in fields (i.e., columns to retrieve) & time constraints tabledap('erdCinpKfmBT', fields = c('longitude', 'latitude', 'Aplysia_californica_Mean_Density'), 'time>=2006-08-24' ) # Get info on a datasetid, then get data given information learned info('erdCalCOFIlrvsiz')$variables tabledap('erdCalCOFIlrvsiz', fields=c('latitude','longitude','larvae_size', 'itis_tsn'), 'time>=2011-10-25', 'time<=2011-10-31') # An example workflow ## Search for data (out <- ed_search(query='fish', which = 'table')) ## Using a datasetid, search for information on a datasetid id <- out$alldata[[1]]$dataset_id vars <- info(id)$variables ## Get data from the dataset vars$variable_name[1:3] tabledap(id, fields = vars$variable_name[1:3]) # Time constraint ## Limit by time with date only (info <- info('erdCinpKfmBT')) tabledap(info, fields = c( 'latitude','longitude','Haliotis_fulgens_Mean_Density'), 'time>=2001-07-14') # Use distinct parameter - compare to distinct = FALSE tabledap('sg114_3', fields=c('longitude','latitude','trajectory'), 'time>=2008-12-05', distinct = TRUE) # Use units parameter ## In this example, values are the same, but sometimes they can be different ## given the units value passed tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', units='udunits') tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', units='ucum') # Use orderby parameter tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', orderby='temperature') # Use orderbymax parameter tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', orderbymax='temperature') # Use orderbymin parameter tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', orderbymin='temperature') # Use orderbyminmax parameter tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', orderbyminmax='temperature') # Use orderbymin parameter with multiple values tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','depth','temperature'), 'time>=2007-06-10', 'time<=2007-09-21', orderbymax=c('depth','temperature') ) # Integrate with taxize out <- tabledap('erdCalCOFIlrvcntHBtoHI', fields = c('latitude','longitude','scientific_name','itis_tsn'), 'time>=2007-06-10', 'time<=2007-09-21' ) tsns <- unique(out$itis_tsn[1:100]) library("taxize") classif <- classification(tsns, db = "itis") head(rbind(classif)); tail(rbind(classif)) # Write to memory (within R), or to disk (out <- info('erdCinpKfmBT')) ## disk, by default (to prevent bogging down system w/ large datasets) ## the 2nd call is much faster as it's mostly just the time of reading ## in the table from disk system.time( tabledap('erdCinpKfmBT', store = disk()) ) system.time( tabledap('erdCinpKfmBT', store = disk()) ) ## memory tabledap('erdCinpKfmBT', store = memory()) # use a different ERDDAP server ## NOAA IOOS NERACOOS url <- "http://www.neracoos.org/erddap/" tabledap("E01_optics_hist", url = url) ## End(Not run)
## Not run: # Just passing the datasetid without fields gives all columns back tabledap('erdCinpKfmBT') # Pass time constraints tabledap('erdCinpKfmBT', 'time>=2006-08-24') # Pass in fields (i.e., columns to retrieve) & time constraints tabledap('erdCinpKfmBT', fields = c('longitude', 'latitude', 'Aplysia_californica_Mean_Density'), 'time>=2006-08-24' ) # Get info on a datasetid, then get data given information learned info('erdCalCOFIlrvsiz')$variables tabledap('erdCalCOFIlrvsiz', fields=c('latitude','longitude','larvae_size', 'itis_tsn'), 'time>=2011-10-25', 'time<=2011-10-31') # An example workflow ## Search for data (out <- ed_search(query='fish', which = 'table')) ## Using a datasetid, search for information on a datasetid id <- out$alldata[[1]]$dataset_id vars <- info(id)$variables ## Get data from the dataset vars$variable_name[1:3] tabledap(id, fields = vars$variable_name[1:3]) # Time constraint ## Limit by time with date only (info <- info('erdCinpKfmBT')) tabledap(info, fields = c( 'latitude','longitude','Haliotis_fulgens_Mean_Density'), 'time>=2001-07-14') # Use distinct parameter - compare to distinct = FALSE tabledap('sg114_3', fields=c('longitude','latitude','trajectory'), 'time>=2008-12-05', distinct = TRUE) # Use units parameter ## In this example, values are the same, but sometimes they can be different ## given the units value passed tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', units='udunits') tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', units='ucum') # Use orderby parameter tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', orderby='temperature') # Use orderbymax parameter tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', orderbymax='temperature') # Use orderbymin parameter tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', orderbymin='temperature') # Use orderbyminmax parameter tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','temperature'), 'time>=2007-09-19', 'time<=2007-09-21', orderbyminmax='temperature') # Use orderbymin parameter with multiple values tabledap('erdCinpKfmT', fields=c('longitude','latitude','time','depth','temperature'), 'time>=2007-06-10', 'time<=2007-09-21', orderbymax=c('depth','temperature') ) # Integrate with taxize out <- tabledap('erdCalCOFIlrvcntHBtoHI', fields = c('latitude','longitude','scientific_name','itis_tsn'), 'time>=2007-06-10', 'time<=2007-09-21' ) tsns <- unique(out$itis_tsn[1:100]) library("taxize") classif <- classification(tsns, db = "itis") head(rbind(classif)); tail(rbind(classif)) # Write to memory (within R), or to disk (out <- info('erdCinpKfmBT')) ## disk, by default (to prevent bogging down system w/ large datasets) ## the 2nd call is much faster as it's mostly just the time of reading ## in the table from disk system.time( tabledap('erdCinpKfmBT', store = disk()) ) system.time( tabledap('erdCinpKfmBT', store = disk()) ) ## memory tabledap('erdCinpKfmBT', store = memory()) # use a different ERDDAP server ## NOAA IOOS NERACOOS url <- "http://www.neracoos.org/erddap/" tabledap("E01_optics_hist", url = url) ## End(Not run)
Get ERDDAP version
version(url = eurl(), ...)
version(url = eurl(), ...)
url |
A URL for an ERDDAP server. Default:
https://upwell.pfeg.noaa.gov/erddap/ - See |
... |
Curl options passed on to crul::verb-GET |
## Not run: version() ss <- servers() version(ss$url[2]) version(ss$url[3]) ## End(Not run)
## Not run: version() ss <- servers() version(ss$url[2]) version(ss$url[3]) ## End(Not run)