Title: | Interface to the Global Biodiversity Information Facility API |
---|---|
Description: | A programmatic interface to the Web Service methods provided by the Global Biodiversity Information Facility (GBIF; <https://www.gbif.org/developer/summary>). GBIF is a database of species occurrence records from sources all over the globe. rgbif includes functions for searching for taxonomic names, retrieving information on data providers, getting species occurrence records, getting counts of occurrence records, and using the GBIF tile map service to make rasters summarizing huge amounts of data. |
Authors: | Scott Chamberlain [aut] , Damiano Oldoni [aut] , Vijay Barve [ctb] , Peter Desmet [ctb] , Laurens Geffert [ctb], Dan Mcglinn [ctb] , Karthik Ram [ctb] , rOpenSci [fnd] (https://ropensci.org/), John Waller [aut, cre] |
Maintainer: | John Waller <[email protected]> |
License: | MIT + file LICENSE |
Version: | 3.8.1.1 |
Built: | 2024-11-15 06:20:04 UTC |
Source: | https://github.com/ropensci/rgbif |
rgbif: A programmatic interface to the Web Service methods provided by the Global Biodiversity Information Facility.
This package gives you access to data from GBIF https://www.gbif.org/ via their API.
summary https://www.gbif.org/developer/summary - Summary of the GBIF API
registry https://www.gbif.org/developer/registry - Metadata on datasets, and contributing organizations
species names https://www.gbif.org/developer/species - Species names and metadata
occurrences https://www.gbif.org/developer/occurrence - Occurrences
maps https://www.gbif.org/developer/maps - Maps - these APIs are not implemented in rgbif, and are meant more for integration with web based maps.
Scott Chamberlain
Karthik Ram
Dan Mcglinn
Vijay Barve
John Waller
Check input WKT
check_wkt(wkt = NULL, skip_validate = FALSE)
check_wkt(wkt = NULL, skip_validate = FALSE)
wkt |
(character) one or more Well Known Text objects |
skip_validate |
(logical) whether to skip |
## Not run: check_wkt('POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 10.1))') check_wkt('POINT(30.1 10.1)') check_wkt('LINESTRING(3 4,10 50,20 25)') # check many passed in at once check_wkt(c('POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 10.1))', 'POINT(30.1 10.1)')) # bad WKT # wkt <- 'POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 a))' # check_wkt(wkt) # many wkt's, semi-colon separated, for many repeated "geometry" args wkt <- "POLYGON((-102.2 46.0,-93.9 46.0,-93.9 43.7,-102.2 43.7,-102.2 46.0)) ;POLYGON((30.1 10.1, 10 20, 20 40, 40 40, 30.1 10.1))" check_wkt(gsub("\n", '', wkt)) ## End(Not run)
## Not run: check_wkt('POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 10.1))') check_wkt('POINT(30.1 10.1)') check_wkt('LINESTRING(3 4,10 50,20 25)') # check many passed in at once check_wkt(c('POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 10.1))', 'POINT(30.1 10.1)')) # bad WKT # wkt <- 'POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 a))' # check_wkt(wkt) # many wkt's, semi-colon separated, for many repeated "geometry" args wkt <- "POLYGON((-102.2 46.0,-93.9 46.0,-93.9 43.7,-102.2 43.7,-102.2 46.0)) ;POLYGON((30.1 10.1, 10 20, 20 40, 40 40, 30.1 10.1))" check_wkt(gsub("\n", '', wkt)) ## End(Not run)
Facetted count occurrence search.
count_facet(keys = NULL, by = "country", countries = 10, removezeros = FALSE)
count_facet(keys = NULL, by = "country", countries = 10, removezeros = FALSE)
keys |
(numeric) GBIF keys, a vector. optional |
by |
(character) One of georeferenced, basisOfRecord, country, or publishingCountry. default: country |
countries |
(numeric) Number of countries to facet on, or a vector of country names. default: 10 |
removezeros |
(logical) remove zeros or not? default: |
## Not run: # Select number of countries to facet on count_facet(by='country', countries=3, removezeros = TRUE) # Or, pass in country names count_facet(by='country', countries='AR', removezeros = TRUE) spplist <- c('Geothlypis trichas','Tiaris olivacea','Pterodroma axillaris', 'Calidris ferruginea','Pterodroma macroptera', 'Gallirallus australis', 'Falco cenchroides','Telespiza cantans','Oreomystis bairdi', 'Cistothorus palustris') keys <- sapply(spplist, function(x) name_backbone(x, rank="species")$usageKey) count_facet(keys, by='country', countries=3, removezeros = TRUE) count_facet(keys, by='country', countries=3, removezeros = FALSE) count_facet(by='country', countries=20, removezeros = TRUE) count_facet(keys, by='basisOfRecord', countries=5, removezeros = TRUE) # get occurrences by georeferenced state ## across all records count_facet(by='georeferenced') ## by keys count_facet(keys, by='georeferenced') # by basisOfRecord count_facet(by="basisOfRecord") ## End(Not run)
## Not run: # Select number of countries to facet on count_facet(by='country', countries=3, removezeros = TRUE) # Or, pass in country names count_facet(by='country', countries='AR', removezeros = TRUE) spplist <- c('Geothlypis trichas','Tiaris olivacea','Pterodroma axillaris', 'Calidris ferruginea','Pterodroma macroptera', 'Gallirallus australis', 'Falco cenchroides','Telespiza cantans','Oreomystis bairdi', 'Cistothorus palustris') keys <- sapply(spplist, function(x) name_backbone(x, rank="species")$usageKey) count_facet(keys, by='country', countries=3, removezeros = TRUE) count_facet(keys, by='country', countries=3, removezeros = FALSE) count_facet(by='country', countries=20, removezeros = TRUE) count_facet(keys, by='basisOfRecord', countries=5, removezeros = TRUE) # get occurrences by georeferenced state ## across all records count_facet(by='georeferenced') ## by keys count_facet(keys, by='georeferenced') # by basisOfRecord count_facet(by="basisOfRecord") ## End(Not run)
Search for more obscure dataset metadata.
dataset( country = NULL, type = NULL, identifierType = NULL, identifier = NULL, machineTagNamespace = NULL, machineTagName = NULL, machineTagValue = NULL, modified = NULL, query = NULL, deleted = FALSE, limit = NULL, start = NULL, curlopts = list() )
dataset( country = NULL, type = NULL, identifierType = NULL, identifier = NULL, machineTagNamespace = NULL, machineTagName = NULL, machineTagValue = NULL, modified = NULL, query = NULL, deleted = FALSE, limit = NULL, start = NULL, curlopts = list() )
country |
The 2-letter country code (as per ISO-3166-1) of the country publishing the dataset. |
type |
The primary type of the dataset. Available values : OCCURRENCE, CHECKLIST, METADATA, SAMPLING_EVENT, MATERIAL_ENTITY. |
identifierType |
An identifier type for the identifier parameter. Available values : URL, LSID, HANDLER, DOI, UUID, FTP, URI, UNKNOWN, GBIF_PORTAL, GBIF_NODE, GBIF_PARTICIPANT, GRSCICOLL_ID, GRSCICOLL_URI, IH_IRN, ROR, GRID, CITES, SYMBIOTA_UUID, WIKIDATA, NCBI_BIOCOLLECTION. |
identifier |
An identifier of the type given by the identifierType parameter. |
machineTagNamespace |
Filters for entities with a machine tag in the specified namespace. |
machineTagName |
Filters for entities with a machine tag with the specified name (use in combination with the machineTagNamespace parameter). |
machineTagValue |
Filters for entities with a machine tag with the specified value (use in combination with the machineTagNamespace and machineTagName parameters). |
modified |
The modified date of the dataset. Accepts ranges and a ” can be used as a wildcard, e.g.:modified=2023-04-01, |
query |
Simple full text search parameter. The value for this parameter can be a simple word or a phrase. Wildcards are not supported. |
deleted |
Logical specifying whether to return only deleted datasets. |
limit |
Controls the number of results in the page. |
start |
Determines the start for the search results. |
curlopts |
options passed on to crul::HttpClient. |
This function allows you to search for some more obscure dataset metadata
that might not be possible with dataset_search()
. For example, searching
through registry machinetags.
A list
.
## Not run: dataset(limit=3) dataset(country="US",limit=3) dataset(type="CHECKLIST",limit=3) dataset(identifierType = "URL",limit=3) dataset(identifier = 168,limit=3) dataset(machineTagNamespace = "metasync.gbif.org",limit=3) dataset(machineTagName = "datasetTitle",limit=3) dataset(machineTagValue = "Borkhart",limit=3) dataset(modified = "2023-04-01", limit=3) dataset(q = "dog", limit=3) dataset(deleted=TRUE,limit=3) ## End(Not run)
## Not run: dataset(limit=3) dataset(country="US",limit=3) dataset(type="CHECKLIST",limit=3) dataset(identifierType = "URL",limit=3) dataset(identifier = 168,limit=3) dataset(machineTagNamespace = "metasync.gbif.org",limit=3) dataset(machineTagName = "datasetTitle",limit=3) dataset(machineTagValue = "Borkhart",limit=3) dataset(modified = "2023-04-01", limit=3) dataset(q = "dog", limit=3) dataset(deleted=TRUE,limit=3) ## End(Not run)
Get a GBIF dataset from a doi
dataset_doi(doi = NULL, limit = 20, start = NULL, curlopts = list())
dataset_doi(doi = NULL, limit = 20, start = NULL, curlopts = list())
doi |
the doi of the dataset you wish to lookup. |
limit |
Controls the number of results in the page. |
start |
Determines the offset for the search results. |
curlopts |
options passed on to crul::HttpClient. |
This function allows for dataset lookup using a doi. Be aware that some doi have more than one dataset associated with them.
A list
.
## Not run: dataset_doi('10.15468/igasai') ## End(Not run)
## Not run: dataset_doi('10.15468/igasai') ## End(Not run)
Check if a dataset is gridded
dataset_gridded( uuid = NULL, min_dis = 0.05, min_per = 50, min_dis_count = 30, return = "logical", warn = TRUE )
dataset_gridded( uuid = NULL, min_dis = 0.05, min_per = 50, min_dis_count = 30, return = "logical", warn = TRUE )
uuid |
(vector) A character vector of GBIF datasetkey uuids. |
min_dis |
(numeric) (default 0.02) Minimum distance in degrees to accept as gridded. |
min_per |
(integer)(default 50%) Minimum percentage of points having same nearest neighbor distance to be considered gridded. |
min_dis_count |
(default 30) Minimum number of unique points to accept an assessment of 'griddyness'. |
return |
(character) (default "logical"). Choice of "data" will return a data.frame of more information or "logical" will return just TRUE or FALSE indicating whether a dataset is considered 'gridded". |
warn |
(logical) indicates whether to warn about missing values or bad values. |
Gridded datasets are a known problem at GBIF. Many datasets have equally-spaced points in a regular pattern. These datasets are usually systematic national surveys or data taken from some atlas (“so-called rasterized collection designs”). This function uses the percentage of unique lat-long points with the most common nearest neighbor distance to identify gridded datasets.
https://data-blog.gbif.org/post/finding-gridded-datasets/
I recommend keeping the default values for the parameters.
A logical vector
indicating whether a dataset is considered gridded.
Or if return="data"
, a data.frame
of more information.
## Not run: dataset_gridded("9070a460-0c6e-11dd-84d2-b8a03c50a862") dataset_gridded(c("9070a460-0c6e-11dd-84d2-b8a03c50a862", "13b70480-bd69-11dd-b15f-b8a03c50a862")) ## End(Not run)
## Not run: dataset_gridded("9070a460-0c6e-11dd-84d2-b8a03c50a862") dataset_gridded(c("9070a460-0c6e-11dd-84d2-b8a03c50a862", "13b70480-bd69-11dd-b15f-b8a03c50a862")) ## End(Not run)
List datasets that are deleted or have no endpoint.
dataset_duplicate(limit = 20, start = NULL, curlopts = list()) dataset_noendpoint(limit = 20, start = NULL, curlopts = list())
dataset_duplicate(limit = 20, start = NULL, curlopts = list()) dataset_noendpoint(limit = 20, start = NULL, curlopts = list())
limit |
Controls the number of results in the page. |
start |
Determines the start for the search results. |
curlopts |
options passed on to crul::HttpClient. |
Get a list of deleted datasets or datasets with no endpoint. You get the full
and no parameters aside from limit
and start
are accepted.
A list
.
## Not run: dataset_noendpoint(limit=3) ## End(Not run)
## Not run: dataset_noendpoint(limit=3) ## End(Not run)
Search for dataset metadata.
dataset_export( query = NULL, type = NULL, publishingCountry = NULL, subtype = NULL, license = NULL, keyword = NULL, publishingOrg = NULL, hostingOrg = NULL, endorsingNodeKey = NULL, decade = NULL, projectId = NULL, hostingCountry = NULL, networkKey = NULL, doi = NULL ) dataset_search( query = NULL, type = NULL, publishingCountry = NULL, subtype = NULL, license = NULL, keyword = NULL, publishingOrg = NULL, hostingOrg = NULL, endorsingNodeKey = NULL, decade = NULL, projectId = NULL, hostingCountry = NULL, networkKey = NULL, doi = NULL, facet = NULL, facetLimit = NULL, facetOffset = NULL, facetMincount = NULL, facetMultiselect = NULL, limit = 100, start = NULL, description = FALSE, curlopts = list() ) dataset_suggest( query = NULL, type = NULL, publishingCountry = NULL, subtype = NULL, license = NULL, keyword = NULL, publishingOrg = NULL, hostingOrg = NULL, endorsingNodeKey = NULL, decade = NULL, projectId = NULL, hostingCountry = NULL, networkKey = NULL, doi = NULL, limit = 100, start = NULL, description = FALSE, curlopts = list() )
dataset_export( query = NULL, type = NULL, publishingCountry = NULL, subtype = NULL, license = NULL, keyword = NULL, publishingOrg = NULL, hostingOrg = NULL, endorsingNodeKey = NULL, decade = NULL, projectId = NULL, hostingCountry = NULL, networkKey = NULL, doi = NULL ) dataset_search( query = NULL, type = NULL, publishingCountry = NULL, subtype = NULL, license = NULL, keyword = NULL, publishingOrg = NULL, hostingOrg = NULL, endorsingNodeKey = NULL, decade = NULL, projectId = NULL, hostingCountry = NULL, networkKey = NULL, doi = NULL, facet = NULL, facetLimit = NULL, facetOffset = NULL, facetMincount = NULL, facetMultiselect = NULL, limit = 100, start = NULL, description = FALSE, curlopts = list() ) dataset_suggest( query = NULL, type = NULL, publishingCountry = NULL, subtype = NULL, license = NULL, keyword = NULL, publishingOrg = NULL, hostingOrg = NULL, endorsingNodeKey = NULL, decade = NULL, projectId = NULL, hostingCountry = NULL, networkKey = NULL, doi = NULL, limit = 100, start = NULL, description = FALSE, curlopts = list() )
query |
Simple full text search parameter. The value for this parameter can be a simple word or a phrase. Wildcards are not supported. |
type |
The primary type of the dataset. Available values: "OCCURRENCE", "CHECKLIST", "METADATA", "SAMPLING_EVENT", "MATERIAL_ENTITY". |
publishingCountry |
Filters datasets by their owning organization's country given as a ISO 639-1 (2 letter) country code. |
subtype |
The sub-type of the dataset.The sub-type of the dataset. Available values: "TAXONOMIC_AUTHORITY", "NOMENCLATOR_AUTHORITY", "INVENTORY_THEMATIC", "INVENTORY_REGIONAL", "GLOBAL_SPECIES_DATASET", "DERIVED_FROM_OCCURRENCE", "SPECIMEN", "OBSERVATION". |
license |
The dataset's licence. Available values: "CC0_1_0", "CC_BY_4_0", "CC_BY_NC_4_0", "UNSPECIFIED", "UNSUPPORTED". |
keyword |
Filters datasets by a case insensitive plain text keyword. The search is done on the merged collection of tags, the dataset keywordCollections and temporalCoverages. |
publishingOrg |
Filters datasets by their publishing organization UUID key. |
hostingOrg |
Filters datasets by their hosting organization UUID key |
endorsingNodeKey |
Node UUID key that endorsed this dataset's publisher. |
decade |
Filters datasets by their temporal coverage broken down to decades. Decades are given as a full year, e.g. 1880, 1960, 2000, etc, and will return datasets wholly contained in the decade as well as those that cover the entire decade or more. Ranges can be used like this "1800,1900". |
projectId |
Filter or facet based on the project ID of a given dataset. A dataset can have a project id if it is the result of a project. Multiple datasets can have the same project id. |
hostingCountry |
Filters datasets by their hosting organization's country given as a ISO 639-1 (2 letter) country code. |
networkKey |
Filters network UUID associated to a dataset. |
doi |
DOI of the dataset. |
facet |
A facet name used to retrieve the most frequent values for a field. |
facetLimit |
Facet parameters allow paging requests using the parameters facetOffset and facetLimit. |
facetOffset |
Facet parameters allow paging requests using the parameters facetOffset and facetLimit |
facetMincount |
Used in combination with the facet parameter. |
facetMultiselect |
Used in combination with the facet parameter. |
limit |
Controls the number of results in the page. Using too high a value will be overwritten with the default maximum threshold, depending on the service. Sensible defaults are used so this may be omitted. |
start |
Determines the offset for the search results. A limit of 20 and offset of 40 will get the third page of 20 results. Some services have a maximum offset. |
description |
Logical whether to return descriptions. |
curlopts |
options passed on to crul::HttpClient. |
dataset_search()
searches and returns metadata on GBIF datasets from the
registry. This function does not search occurrence data, only metadata on
the datasets that contain may contain occurrence data. It also searches over
other dataset types, such checklist and metadata datasets. Only a sample of
results is returned.
dataset_export()
function will download a tibble
of the results of a
dataset_search()
. This function is primarily useful if you want the full results of a
dataset_search()
.
Use dataset_search(facet="x",limit=0)$facets
to get simple group by counts
for different parameters.
A list
for dataset_search()
. A tibble
for dataset_export()
.
https://techdocs.gbif.org/en/openapi/v1/registry#/Datasets/searchDatasets
## Not run: # search metadata on all datasets and return a sample dataset_search() # dataset_export() # download info on all +90K datasets dataset_search(publishingCountry = "US") dataset_search(type = "OCCURRENCE") dataset_search(keyword = "bird") dataset_search(subtype = "TAXONOMIC_AUTHORITY") dataset_search(license = "CC0_1_0") dataset_search(query = "frog") dataset_search(publishingCountry = "UA") dataset_search(publishingOrg = "e2e717bf-551a-4917-bdc9-4fa0f342c530") dataset_search(hostingOrg = "7ce8aef0-9e92-11dc-8738-b8a03c50a862") dataset_search(decade="1890,1990",limit=5) dataset_search(projectId = "GRIIS") dataset_search(hostingCountry = "NO") dataset_search(networkKey = "99d66b6c-9087-452f-a9d4-f15f2c2d0e7e") dataset_search(doi='10.15468/aomfnb') # search multiple values dataset_search(projectId = "GRIIS;BID-AF2020-140-REG") dataset_search(hostingCountry = "NO;SE") dataset_search(doi="10.15468/aomfnb;10.15468/igasai") # multiple filters dataset_search(license = "CC0_1_0",subtype = "TAXONOMIC_AUTHORITY") # dataset_export(license = "CC0_1_0",subtype = "TAXONOMIC_AUTHORITY") # using dataset export to get all datasets dataset_export(decade="1800,1900") dataset_export(projectId="GRIIS") # get simple group by counts dataset_search(facet="type",limit=0,facetLimit=5)$facets dataset_search(facet="publishingCountry",limit=0,facetLimit=5)$facets dataset_search(facet="license",limit=0,facetLimit=5, facetMincount=10000) ## End(Not run)
## Not run: # search metadata on all datasets and return a sample dataset_search() # dataset_export() # download info on all +90K datasets dataset_search(publishingCountry = "US") dataset_search(type = "OCCURRENCE") dataset_search(keyword = "bird") dataset_search(subtype = "TAXONOMIC_AUTHORITY") dataset_search(license = "CC0_1_0") dataset_search(query = "frog") dataset_search(publishingCountry = "UA") dataset_search(publishingOrg = "e2e717bf-551a-4917-bdc9-4fa0f342c530") dataset_search(hostingOrg = "7ce8aef0-9e92-11dc-8738-b8a03c50a862") dataset_search(decade="1890,1990",limit=5) dataset_search(projectId = "GRIIS") dataset_search(hostingCountry = "NO") dataset_search(networkKey = "99d66b6c-9087-452f-a9d4-f15f2c2d0e7e") dataset_search(doi='10.15468/aomfnb') # search multiple values dataset_search(projectId = "GRIIS;BID-AF2020-140-REG") dataset_search(hostingCountry = "NO;SE") dataset_search(doi="10.15468/aomfnb;10.15468/igasai") # multiple filters dataset_search(license = "CC0_1_0",subtype = "TAXONOMIC_AUTHORITY") # dataset_export(license = "CC0_1_0",subtype = "TAXONOMIC_AUTHORITY") # using dataset export to get all datasets dataset_export(decade="1800,1900") dataset_export(projectId="GRIIS") # get simple group by counts dataset_search(facet="type",limit=0,facetLimit=5)$facets dataset_search(facet="publishingCountry",limit=0,facetLimit=5)$facets dataset_search(facet="license",limit=0,facetLimit=5, facetMincount=10000) ## End(Not run)
Get dataset metadata using a datasetkey
dataset_get(uuid = NULL, curlopts = list()) dataset_process(uuid = NULL, limit = 20, start = NULL, curlopts = list()) dataset_networks(uuid = NULL, limit = 20, start = NULL, curlopts = list()) dataset_constituents(uuid = NULL, limit = 20, start = NULL, curlopts = list()) dataset_comment(uuid = NULL, curlopts = list()) dataset_contact(uuid = NULL, curlopts = list()) dataset_endpoint(uuid = NULL, curlopts = list()) dataset_identifier(uuid = NULL, curlopts = list()) dataset_machinetag(uuid = NULL, curlopts = list()) dataset_tag(uuid = NULL, curlopts = list()) dataset_metrics(uuid = NULL, curlopts = list())
dataset_get(uuid = NULL, curlopts = list()) dataset_process(uuid = NULL, limit = 20, start = NULL, curlopts = list()) dataset_networks(uuid = NULL, limit = 20, start = NULL, curlopts = list()) dataset_constituents(uuid = NULL, limit = 20, start = NULL, curlopts = list()) dataset_comment(uuid = NULL, curlopts = list()) dataset_contact(uuid = NULL, curlopts = list()) dataset_endpoint(uuid = NULL, curlopts = list()) dataset_identifier(uuid = NULL, curlopts = list()) dataset_machinetag(uuid = NULL, curlopts = list()) dataset_tag(uuid = NULL, curlopts = list()) dataset_metrics(uuid = NULL, curlopts = list())
uuid |
A GBIF datasetkey uuid. |
curlopts |
options passed on to crul::HttpClient. |
limit |
Number of records to return. |
start |
Record number to start at. |
dataset_metrics()
can only be used with checklist type datasets.
A tibble
or a list
.
https://techdocs.gbif.org/en/openapi/v1/registry
## Not run: dataset_get("38b4c89f-584c-41bb-bd8f-cd1def33e92f") dataset_process("38b4c89f-584c-41bb-bd8f-cd1def33e92f",limit=3) dataset_networks("3dab037f-a520-4bc3-b888-508755c2eb52") dataset_constituents("7ddf754f-d193-4cc9-b351-99906754a03b",limit=3) dataset_comment("2e4cc37b-302e-4f1b-bbbb-1f674ff90e14") dataset_contact("7ddf754f-d193-4cc9-b351-99906754a03b") dataset_endpoint("7ddf754f-d193-4cc9-b351-99906754a03b") dataset_identifier("7ddf754f-d193-4cc9-b351-99906754a03b") dataset_machinetag("7ddf754f-d193-4cc9-b351-99906754a03b") dataset_tag("c47f13c1-7427-45a0-9f12-237aad351040") dataset_metrics("7ddf754f-d193-4cc9-b351-99906754a03b") ## End(Not run)
## Not run: dataset_get("38b4c89f-584c-41bb-bd8f-cd1def33e92f") dataset_process("38b4c89f-584c-41bb-bd8f-cd1def33e92f",limit=3) dataset_networks("3dab037f-a520-4bc3-b888-508755c2eb52") dataset_constituents("7ddf754f-d193-4cc9-b351-99906754a03b",limit=3) dataset_comment("2e4cc37b-302e-4f1b-bbbb-1f674ff90e14") dataset_contact("7ddf754f-d193-4cc9-b351-99906754a03b") dataset_endpoint("7ddf754f-d193-4cc9-b351-99906754a03b") dataset_identifier("7ddf754f-d193-4cc9-b351-99906754a03b") dataset_machinetag("7ddf754f-d193-4cc9-b351-99906754a03b") dataset_tag("c47f13c1-7427-45a0-9f12-237aad351040") dataset_metrics("7ddf754f-d193-4cc9-b351-99906754a03b") ## End(Not run)
Search for datasets and dataset metadata.
datasets( data = "all", type = NULL, uuid = NULL, query = NULL, id = NULL, limit = 100, start = NULL, curlopts = list() )
datasets( data = "all", type = NULL, uuid = NULL, query = NULL, id = NULL, limit = 100, start = NULL, curlopts = list() )
data |
The type of data to get. One or more of: 'organization',
'contact', 'endpoint', 'identifier', 'tag', 'machinetag', 'comment',
'constituents', 'document', 'metadata', 'deleted', 'duplicate',
'subDataset', 'withNoEndpoint', or the special 'all'. Default: |
type |
Type of dataset. Options: include occurrence, checklist, metadata, or sampling_event. |
uuid |
UUID of the data node provider. This must be specified if data
is anything other than |
query |
Query term(s). Only used when |
id |
A metadata document id. |
limit |
Number of records to return. Default: 100. Maximum: 1000. |
start |
Record number to start at. Default: 0. Use in combination
with |
curlopts |
list of named curl options passed on to
|
A list.
https://www.gbif.org/developer/registry#datasets
## Not run: datasets(limit=5) datasets(type="occurrence", limit=10) datasets(uuid="a6998220-7e3a-485d-9cd6-73076bd85657") datasets(data='contact', uuid="a6998220-7e3a-485d-9cd6-73076bd85657") datasets(data='metadata', uuid="a6998220-7e3a-485d-9cd6-73076bd85657") datasets(data='metadata', uuid="a6998220-7e3a-485d-9cd6-73076bd85657", id=598) datasets(data=c('deleted','duplicate')) datasets(data=c('deleted','duplicate'), limit=1) # curl options datasets(data=c('deleted','duplicate'), curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: datasets(limit=5) datasets(type="occurrence", limit=10) datasets(uuid="a6998220-7e3a-485d-9cd6-73076bd85657") datasets(data='contact', uuid="a6998220-7e3a-485d-9cd6-73076bd85657") datasets(data='metadata', uuid="a6998220-7e3a-485d-9cd6-73076bd85657") datasets(data='metadata', uuid="a6998220-7e3a-485d-9cd6-73076bd85657", id=598) datasets(data=c('deleted','duplicate')) datasets(data=c('deleted','duplicate'), limit=1) # curl options datasets(data=c('deleted','duplicate'), curlopts = list(verbose=TRUE)) ## End(Not run)
Register a derived dataset for citation.
derived_dataset( citation_data = NULL, title = NULL, description = NULL, source_url = NULL, gbif_download_doi = NULL, user = NULL, pwd = NULL, curlopts = list() ) derived_dataset_prep( citation_data = NULL, title = NULL, description = NULL, source_url = NULL, gbif_download_doi = NULL, user = NULL, pwd = NULL, curlopts = list() )
derived_dataset( citation_data = NULL, title = NULL, description = NULL, source_url = NULL, gbif_download_doi = NULL, user = NULL, pwd = NULL, curlopts = list() ) derived_dataset_prep( citation_data = NULL, title = NULL, description = NULL, source_url = NULL, gbif_download_doi = NULL, user = NULL, pwd = NULL, curlopts = list() )
citation_data |
(required) A data.frame with two columns. The first column should be GBIF datasetkey uuids and the second column should be occurrence counts from each of your datasets, representing the contribution of each dataset to your final derived dataset. |
title |
(required) The title for your derived dataset. |
description |
(required) A description of the dataset. Perhaps describing how it was created. |
source_url |
(required) A link to where the dataset is stored. |
gbif_download_doi |
(optional) A DOI from an original GBIF download. |
user |
(required) Your GBIF username. |
pwd |
(required) Your GBIF password. |
curlopts |
a list of arguments to pass to curl. |
A list.
Create a citable DOI for a dataset derived from GBIF mediated occurrences.
Use-case (1) your dataset was obtained with occ_search()
and
never returned a citable DOI, but you want to cite the data in a
research paper.
Use-case (2) your dataset was obtained using occ_download()
and you
got a DOI, but the data underwent extensive filtering using
CoordinateCleaner
or some other cleaning pipeline. In this case be sure
to fill in your original gbif_download_doi
.
Use-case (3) your dataset was generated using a GBIF cloud export but you want a DOI to cite in your research paper.
Use derived_dataset
to create a custom citable meta-data description and
most importantly a DOI link between an external archive (e.g. Zenodo) and the
datasets involved in your research or analysis.
All fields (except gbif_download_doi
) are required for the registration to
work.
We recommend that you run derived_dataset_prep()
to check registration
details before making it final with derived_dataset()
.
Some rgbif
functions require your GBIF credentials.
For the user
and pwd
parameters, you can set them in one of
three ways:
Set them in your .Renviron
/.bash_profile
(or similar) file with the
names GBIF_USER
, GBIF_PWD
, and GBIF_EMAIL
Set them in your .Rprofile
file with the names gbif_user
and
gbif_pwd
.
Simply pass strings to each of the parameters in the function call.
We strongly recommend the first option - storing your details as environment variables - as it's the most widely used way to store secrets.
You can edit your .Renviron
with usethis::edit_r_environ()
.
After editing, your .Renviron
file should look something like this...
GBIF_USER="jwaller"
GBIF_PWD="fakepassword123"
GBIF_EMAIL="[email protected]"
See ?Startup
for help.
https://data-blog.gbif.org/post/derived-datasets/ https://www.gbif.org/derived-dataset/about
## Not run: data <- data.frame( datasetKey = c( "3ea36590-9b79-46a8-9300-c9ef0bfed7b8", "630eb55d-5169-4473-99d6-a93396aeae38", "806bf7d4-f762-11e1-a439-00145eb45e9a"), count = c(3, 1, 2781) ) ## If output looks ok, run derived_dataset to register the dataset derived_dataset_prep( citation_data = data, title = "Test for derived dataset", description = "This data was filtered using a fake protocol", source_url = "https://zenodo.org/record/4246090#.YPGS2OgzZPY" ) # derived_dataset( # citation_data = data, # title = "Test for derived dataset", # description = "This data was filtered using a fake protocol", # source_url = "https://zenodo.org/record/4246090#.YPGS2OgzZPY" # ) ## Example with occ_search and dplyr # library(dplyr) # citation_data <- occ_search(taxonKey=212, limit=20)$data %>% # group_by(datasetKey) %>% # count() # # You would still need to upload your data to Zenodo or something similar # derived_dataset_prep( # citation_data = citation_data, # title="Bird data downloaded for test", # description="This data was downloaded using rgbif::occ_search and was # later uploaded to Zenodo.", # source_url="https://zenodo.org/record/4246090#.YPGS2OgzZPY", # gbif_download_doi = NULL, # ) ## End(Not run)
## Not run: data <- data.frame( datasetKey = c( "3ea36590-9b79-46a8-9300-c9ef0bfed7b8", "630eb55d-5169-4473-99d6-a93396aeae38", "806bf7d4-f762-11e1-a439-00145eb45e9a"), count = c(3, 1, 2781) ) ## If output looks ok, run derived_dataset to register the dataset derived_dataset_prep( citation_data = data, title = "Test for derived dataset", description = "This data was filtered using a fake protocol", source_url = "https://zenodo.org/record/4246090#.YPGS2OgzZPY" ) # derived_dataset( # citation_data = data, # title = "Test for derived dataset", # description = "This data was filtered using a fake protocol", # source_url = "https://zenodo.org/record/4246090#.YPGS2OgzZPY" # ) ## Example with occ_search and dplyr # library(dplyr) # citation_data <- occ_search(taxonKey=212, limit=20)$data %>% # group_by(datasetKey) %>% # count() # # You would still need to upload your data to Zenodo or something similar # derived_dataset_prep( # citation_data = citation_data, # title="Bird data downloaded for test", # description="This data was downloaded using rgbif::occ_search and was # later uploaded to Zenodo.", # source_url="https://zenodo.org/record/4246090#.YPGS2OgzZPY", # gbif_download_doi = NULL, # ) ## End(Not run)
Download predicate DSL (domain specific language)
pred(key, value) pred_gt(key, value) pred_gte(key, value) pred_lt(key, value) pred_lte(key, value) pred_not(...) pred_like(key, value) pred_within(value) pred_isnull(key) pred_notnull(key) pred_or(..., .list = list()) pred_and(..., .list = list()) pred_in(key, value) pred_default()
pred(key, value) pred_gt(key, value) pred_gte(key, value) pred_lt(key, value) pred_lte(key, value) pred_not(...) pred_like(key, value) pred_within(value) pred_isnull(key) pred_notnull(key) pred_or(..., .list = list()) pred_and(..., .list = list()) pred_in(key, value) pred_default()
key |
(character) the key for the predicate. See "Keys" below |
value |
(various) the value for the predicate |
... , .list
|
For |
pred*
functions are named for the 'type' of operation they do, following
the terminology used by GBIF, see
https://www.gbif.org/developer/occurrence#predicates
Function names are given, with the equivalent GBIF type value (e.g.,
pred_gt
and greaterThan
)
The following functions take one key and one value:
pred
: equals
pred_lt
: lessThan
pred_lte
: lessThanOrEquals
pred_gt
: greaterThan
pred_gte
: greaterThanOrEquals
pred_like
: like
The following function is only for geospatial queries, and only accepts a WKT string:
pred_within
: within
The following function is only for stating the you don't want a key to be null, so only accepts one key:
pred_notnull
: isNotNull
The following function is only for stating that you want a key to be null.
pred_isnull
: isNull
The following two functions accept multiple individual predicates, separating them by either "and" or "or":
pred_and
: and
pred_or
: or
The not predicate accepts one predicate; that is, this negates whatever predicate is passed in, e.g., not the taxonKey of 12345:
pred_not
: not
The following function is special in that it accepts a single key but many values; stating that you want to search for all the values:
pred_in
: in
The following function will apply commonly used defaults.
pred_default
Using pred_default()
is equivalent to running:
pred_and( pred("HAS_GEOSPATIAL_ISSUE",FALSE), pred("HAS_COORDINATE",TRUE), pred("OCCURRENCE_STATUS","PRESENT"), pred_not(pred_in("BASIS_OF_RECORD", c("FOSSIL_SPECIMEN","LIVING_SPECIMEN"))) )
Internally, the input to pred*
functions turns into JSON to be sent to
GBIF. For example ...
pred_in("taxonKey", c(2480946, 5229208))
gives:
{ "type": "in", "key": "TAXON_KEY", "values": ["2480946", "5229208"] }
pred_gt("elevation", 5000)
gives:
{ "type": "greaterThan", "key": "ELEVATION", "value": "5000" }
pred_or(pred("taxonKey", 2977832), pred("taxonKey", 2977901))
gives:
{ "type": "or", "predicates": [ { "type": "equals", "key": "TAXON_KEY", "value": "2977832" }, { "type": "equals", "key": "TAXON_KEY", "value": "2977901" } ] }
Acceptable arguments to the key
parameter are (with the version of
the key in parens that must be sent if you pass the query via the body
parameter; see below for examples). You can also use the 'ALL_CAPS' version
of a key if you prefer. Open an issue in the GitHub
repository for this package if you know of a key that should
be supported that is not yet.
taxonKey (TAXON_KEY)
acceptedTaxonKey (ACCEPTED_TAXON_KEY)
kingdomKey (KINGDOM_KEY)
phylumKey (PHYLUM_KEY)
classKey (CLASS_KEY)
orderKey (ORDER_KEY)
familyKey (FAMILY_KEY)
genusKey (GENUS_KEY)
subgenusKey (SUBGENUS_KEY)
speciesKey (SPECIES_KEY)
scientificName (SCIENTIFIC_NAME)
country (COUNTRY)
publishingCountry (PUBLISHING_COUNTRY)
hasCoordinate (HAS_COORDINATE)
hasGeospatialIssue (HAS_GEOSPATIAL_ISSUE)
typeStatus (TYPE_STATUS)
recordNumber (RECORD_NUMBER)
lastInterpreted (LAST_INTERPRETED)
modified (MODIFIED)
continent (CONTINENT)
geometry (GEOMETRY)
basisOfRecord (BASIS_OF_RECORD)
datasetKey (DATASET_KEY)
datasetID/datasetId (DATASET_ID)
eventDate (EVENT_DATE)
catalogNumber (CATALOG_NUMBER)
otherCatalogNumbers (OTHER_CATALOG_NUMBERS)
year (YEAR)
month (MONTH)
decimalLatitude (DECIMAL_LATITUDE)
decimalLongitude (DECIMAL_LONGITUDE)
elevation (ELEVATION)
depth (DEPTH)
institutionCode (INSTITUTION_CODE)
collectionCode (COLLECTION_CODE)
issue (ISSUE)
mediatype (MEDIA_TYPE)
recordedBy (RECORDED_BY)
recordedById/recordedByID (RECORDED_BY_ID)
establishmentMeans (ESTABLISHMENT_MEANS)
coordinateUncertaintyInMeters (COORDINATE_UNCERTAINTY_IN_METERS)
gadm (GADM_GID) (for the Database of Global Administrative Areas)
level0Gid (GADM_LEVEL_0_GID)
level1Gid (GADM_LEVEL_1_GID)
level2Gid (GADM_LEVEL_2_GID)
level3Gid (GADM_LEVEL_3_GID)
stateProvince (STATE_PROVINCE)
occurrenceStatus (OCCURRENCE_STATUS)
publishingOrg (PUBLISHING_ORG)
occurrenceId/occurrenceID (OCCURRENCE_ID)
eventId/eventID (EVENT_ID)
parentEventId/parentEventID (PARENT_EVENT_ID)
identifiedBy (IDENTIFIED_BY)
identifiedById/identifiedByID (IDENTIFIED_BY_ID)
license (LICENSE)
locality(LOCALITY)
pathway (PATHWAY)
preparations (PREPARATIONS)
networkKey (NETWORK_KEY)
organismId/organismID (ORGANISM_ID)
organismQuantity (ORGANISM_QUANTITY)
organismQuantityType (ORGANISM_QUANTITY_TYPE)
protocol (PROTOCOL)
relativeOrganismQuantity (RELATIVE_ORGANISM_QUANTITY)
repatriated (REPATRIATED)
sampleSizeUnit (SAMPLE_SIZE_UNIT)
sampleSizeValue (SAMPLE_SIZE_VALUE)
samplingProtocol (SAMPLING_PROTOCOL)
verbatimScientificName (VERBATIM_SCIENTIFIC_NAME)
taxonID/taxonId (TAXON_ID)
taxonomicStatus (TAXONOMIC_STATUS)
waterBody (WATER_BODY)
iucnRedListCategory (IUCN_RED_LIST_CATEGORY)
degreeOfEstablishment (DEGREE_OF_ESTABLISHMENT)
isInCluster (IS_IN_CLUSTER)
lifeStage (LIFE_STAGE)
distanceFromCentroidInMeters (DISTANCE_FROM_CENTROID_IN_METERS)
gbifId (GBIF_ID)
Download predicates docs: https://www.gbif.org/developer/occurrence#predicates
Other downloads:
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
pred("taxonKey", 3119195) pred_gt("elevation", 5000) pred_gte("elevation", 5000) pred_lt("elevation", 1000) pred_lte("elevation", 1000) pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))") pred_and(pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))"), pred_gte("elevation", 5000)) pred_or(pred_lte("year", 1989), pred("year", 2000)) pred_and(pred_lte("year", 1989), pred("year", 2000)) pred_in("taxonKey", c(2977832, 2977901, 2977966, 2977835)) pred_in("basisOfRecord", c("MACHINE_OBSERVATION", "HUMAN_OBSERVATION")) pred_not(pred("taxonKey", 729)) pred_like("catalogNumber", "PAPS5-560%") pred_notnull("issue") pred("basisOfRecord", "LITERATURE") pred("hasCoordinate", TRUE) pred("stateProvince", "California") pred("hasGeospatialIssue", FALSE) pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))") pred_or(pred("taxonKey", 2977832), pred("taxonKey", 2977901), pred("taxonKey", 2977966)) pred_in("taxonKey", c(2977832, 2977901, 2977966, 2977835))
pred("taxonKey", 3119195) pred_gt("elevation", 5000) pred_gte("elevation", 5000) pred_lt("elevation", 1000) pred_lte("elevation", 1000) pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))") pred_and(pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))"), pred_gte("elevation", 5000)) pred_or(pred_lte("year", 1989), pred("year", 2000)) pred_and(pred_lte("year", 1989), pred("year", 2000)) pred_in("taxonKey", c(2977832, 2977901, 2977966, 2977835)) pred_in("basisOfRecord", c("MACHINE_OBSERVATION", "HUMAN_OBSERVATION")) pred_not(pred("taxonKey", 729)) pred_like("catalogNumber", "PAPS5-560%") pred_notnull("issue") pred("basisOfRecord", "LITERATURE") pred("hasCoordinate", TRUE) pred("stateProvince", "California") pred("hasGeospatialIssue", FALSE) pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))") pred_or(pred("taxonKey", 2977832), pred("taxonKey", 2977901), pred("taxonKey", 2977966)) pred_in("taxonKey", c(2977832, 2977901, 2977966, 2977835))
GBIF provides two ways to get occurrence data: through the
/occurrence/search
route (see occ_search()
),
or via the /occurrence/download
route (many functions, see below).
occ_search()
is more appropriate for smaller data, while
occ_download*()
functions are more appropriate for larger data requests.
You'll use occ_download()
to kick off a download. You'll need to
give that function settings from your GBIF profile: your user name, your
password, and your email. These three settings are required to use the
function. You can specify them in one of three ways:
Pass them to occ_download
as parameters
Use R options: As options either in the current R session using
the options()
function, or by setting them in your .Rprofile
file, after
which point they'll be read in automatically
Use environment variables: As env vars either in the current R session using
the Sys.setenv()
function, or by setting them in your
.Renviron
/.bash_profile
or similar files, after which point they'll be read
in automatically
You can not perform that many downloads, so plan wisely. See Rate limiting below.
If you try to launch too many downloads, you will receive an 420 "Enhance Your Calm" response. If there is less then 100 in total across all GBIF users, then you can have 3 running at a time. If there are more than that, then each user is limited to 1 only. These numbers are subject to change.
occ_download()
- Start a download
occ_download_prep()
- Prepare a download request
occ_download_queue()
- Start many downloads in a queue
occ_download_cached()
- Check for downloads already in your GBIF account
occ_download_wait()
- Re-run occ_download_meta()
until ready
occ_download_meta()
- Get metadata progress on a single download
occ_download_list()
- List your downloads
occ_download_cancel()
- Cancel a download
occ_download_cancel_staged()
- Cancels any jobs with status RUNNING
or PREPARING
occ_download_get()
- Retrieve a download
occ_download_import()
- Import a download from local file system
occ_download_datasets()
- List datasets for a download
occ_download_dataset_activity()
- Lists the downloads activity
of a dataset
Download query composer methods:
GBIF has a limit of 12,000 characters for a download query. This means that you can have a pretty long query, but at some point it may lead to an error on GBIF's side and you'll have to split your query into a few.
The following statuses can be found with any download:
PREPARING: just submitted by user and awaiting processing (typically only a few seconds)
RUNNING: being created (takes typically 1-15 minutes)
FAILED: something unexpected went wrong
KILLED: user decided to abort the job while it was in PREPARING or RUNNING phase
SUCCEEDED: The download was created and the user was informed
FILE_ERASED: The download was deleted according to the retention policy, see https://www.gbif.org/faq?question=for-how-long-will-does-gbif-store-downloads
Uses the GeoNames web service
elevation( input = NULL, latitude = NULL, longitude = NULL, latlong = NULL, elevation_model = "srtm3", username = Sys.getenv("GEONAMES_USER"), key, curlopts, ... )
elevation( input = NULL, latitude = NULL, longitude = NULL, latlong = NULL, elevation_model = "srtm3", username = Sys.getenv("GEONAMES_USER"), key, curlopts, ... )
input |
A data.frame of lat/long data. There must be columns decimalLatitude and decimalLongitude. |
latitude |
A vector of latitude's. Must be the same length as the longitude vector. |
longitude |
A vector of longitude's. Must be the same length as the latitude vector. |
latlong |
A vector of lat/long pairs. See examples. |
elevation_model |
(character) one of srtm3 (default), srtm1, astergdem, or gtopo30. See "Elevation models" below for more |
username |
(character) Required. An GeoNames user name. See Details. |
key , curlopts
|
defunct. see docs |
... |
curl options passed on to crul::verb-GET
see |
A new column named elevation_geonames
in the supplied data.frame
or a vector with elevation of each location in meters. Note that data from
GBIF can already have a column named elevation
, thus the column we
add is named differently.
To get a GeoNames user name, register for an account at
http://www.geonames.org/login - then you can enable your account for the
GeoNames webservice on your account page
(http://www.geonames.org/manageaccount). Once you are enabled to use
the webservice, you can pass in your username to the username
parameter. Better yet, store your username in your .Renviron
file, or
similar (e.g., .zshrc or .bash_profile files) and read it in via
Sys.getenv()
as in the examples below. By default we do
Sys.getenv("GEONAMES_USER")
for the username
parameter.
srtm3:
sample area: ca 90m x 90m
result: a single number giving the elevation in meters according to srtm3, ocean areas have been masked as "no data" and have been assigned a value of -32768
srtm1:
sample area: ca 30m x 30m
result: a single number giving the elevation in meters according to srtm1, ocean areas have been masked as "no data" and have been assigned a value of -32768
astergdem (Aster Global Digital Elevation Model V2 2011):
sample area: ca 30m x 30m, between 83N and 65S latitude
result: a single number giving the elevation in meters according to aster gdem, ocean areas have been masked as "no data" and have been assigned a value of -32768
gtopo30:
sample area: ca 1km x 1km
result: a single number giving the elevation in meters according to gtopo30, ocean areas have been masked as "no data" and have been assigned a value of -9999
GeoNames http://www.geonames.org/export/web-services.html
## Not run: user <- Sys.getenv("GEONAMES_USER") occ_key <- name_suggest('Puma concolor')$key[1] dat <- occ_search(taxonKey = occ_key, limit = 300, hasCoordinate = TRUE) head( elevation(dat$data, username = user) ) # Pass in a vector of lat's and a vector of long's elevation(latitude = dat$data$decimalLatitude[1:10], longitude = dat$data$decimalLongitude[1:10], username = user, verbose = TRUE) # Pass in lat/long pairs in a single vector pairs <- list(c(31.8496,-110.576060), c(29.15503,-103.59828)) elevation(latlong=pairs, username = user) # Pass on curl options pairs <- list(c(31.8496,-110.576060), c(29.15503,-103.59828)) elevation(latlong=pairs, username = user, verbose = TRUE) # different elevation models lats <- dat$data$decimalLatitude[1:5] lons <- dat$data$decimalLongitude[1:5] elevation(latitude = lats, longitude = lons, elevation_model = "srtm3") elevation(latitude = lats, longitude = lons, elevation_model = "srtm1") elevation(latitude = lats, longitude = lons, elevation_model = "astergdem") elevation(latitude = lats, longitude = lons, elevation_model = "gtopo30") ## End(Not run)
## Not run: user <- Sys.getenv("GEONAMES_USER") occ_key <- name_suggest('Puma concolor')$key[1] dat <- occ_search(taxonKey = occ_key, limit = 300, hasCoordinate = TRUE) head( elevation(dat$data, username = user) ) # Pass in a vector of lat's and a vector of long's elevation(latitude = dat$data$decimalLatitude[1:10], longitude = dat$data$decimalLongitude[1:10], username = user, verbose = TRUE) # Pass in lat/long pairs in a single vector pairs <- list(c(31.8496,-110.576060), c(29.15503,-103.59828)) elevation(latlong=pairs, username = user) # Pass on curl options pairs <- list(c(31.8496,-110.576060), c(29.15503,-103.59828)) elevation(latlong=pairs, username = user, verbose = TRUE) # different elevation models lats <- dat$data$decimalLatitude[1:5] lons <- dat$data$decimalLongitude[1:5] elevation(latitude = lats, longitude = lons, elevation_model = "srtm3") elevation(latitude = lats, longitude = lons, elevation_model = "srtm1") elevation(latitude = lats, longitude = lons, elevation_model = "astergdem") elevation(latitude = lats, longitude = lons, elevation_model = "gtopo30") ## End(Not run)
Many parts of the GBIF API make use of enumerations, i.e. controlled vocabularies for specific topics - and are available via these functions
enumeration(x = NULL, curlopts = list()) enumeration_country(curlopts = list())
enumeration(x = NULL, curlopts = list()) enumeration_country(curlopts = list())
x |
A given enumeration. |
curlopts |
list of named curl options passed on to
|
enumeration
returns a character vector, while
enumeration_country
returns a data.frame.
## Not run: # basic enumeration enumeration() enumeration("NameType") enumeration("MetadataType") enumeration("TypeStatus") # country enumeration enumeration_country() # curl options enumeration(curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: # basic enumeration enumeration() enumeration("NameType") enumeration("MetadataType") enumeration("TypeStatus") # country enumeration enumeration_country() # curl options enumeration(curlopts = list(verbose=TRUE)) ## End(Not run)
Convert a bounding box to a Well Known Text polygon, and a WKT to a bounding box
gbif_bbox2wkt(minx = NA, miny = NA, maxx = NA, maxy = NA, bbox = NULL) gbif_wkt2bbox(wkt = NULL)
gbif_bbox2wkt(minx = NA, miny = NA, maxx = NA, maxy = NA, bbox = NULL) gbif_wkt2bbox(wkt = NULL)
minx |
(numeric) Minimum x value, or the most western longitude |
miny |
(numeric) Minimum y value, or the most southern latitude |
maxx |
(numeric) Maximum x value, or the most eastern longitude |
maxy |
(numeric) Maximum y value, or the most northern latitude |
bbox |
(numeric) A vector of length 4, with the elements: minx, miny, maxx, maxy |
wkt |
(character) A Well Known Text object. |
gbif_bbox2wkt returns an object of class charactere, a Well Known Text string of the form 'POLYGON((minx miny, maxx miny, maxx maxy, minx maxy, minx miny))'.
gbif_wkt2bbox returns a numeric vector of length 4, like c(minx, miny, maxx, maxy)
## Not run: # Convert a bounding box to a WKT ## Pass in a vector of length 4 with all values gbif_bbox2wkt(bbox=c(-125.0,38.4,-121.8,40.9)) ## Or pass in each value separately gbif_bbox2wkt(minx=-125.0, miny=38.4, maxx=-121.8, maxy=40.9) # Convert a WKT object to a bounding box wkt <- "POLYGON((-125 38.4,-125 40.9,-121.8 40.9,-121.8 38.4,-125 38.4))" gbif_wkt2bbox(wkt) ## End(Not run)
## Not run: # Convert a bounding box to a WKT ## Pass in a vector of length 4 with all values gbif_bbox2wkt(bbox=c(-125.0,38.4,-121.8,40.9)) ## Or pass in each value separately gbif_bbox2wkt(minx=-125.0, miny=38.4, maxx=-121.8, maxy=40.9) # Convert a WKT object to a bounding box wkt <- "POLYGON((-125 38.4,-125 40.9,-121.8 40.9,-121.8 38.4,-125 38.4))" gbif_wkt2bbox(wkt) ## End(Not run)
Get citation for datasets used
gbif_citation(x)
gbif_citation(x)
x |
(character) Result of call to |
The function is deprecated for use with occ_search()
and occ_data()
results, and is deprecated for use with datasetKeys and gbifids. Instead,
we encourage you to use derived_dataset()
instead.
occ_download_get()
and occ_download_meta()
results are still supported.
list with S3 class assigned, used by a print method to pretty print citation information. Though you can unclass the output or just index to the named items as needed.
## Not run: # Downloads ## occ_download_get() # d1 <- occ_download(pred("country", "BG"), pred_gte("year", 2020)) # occ_download_meta(d1) # wait until status = succeeded # d1 <- occ_download_get(d1, overwrite = TRUE) # gbif_citation(d1) ## occ_download_meta() # key <- "0000122-171020152545675" # res <- occ_download_meta(key) # gbif_citation(res) ## End(Not run)
## Not run: # Downloads ## occ_download_get() # d1 <- occ_download(pred("country", "BG"), pred_gte("year", 2020)) # occ_download_meta(d1) # wait until status = succeeded # d1 <- occ_download_get(d1, overwrite = TRUE) # gbif_citation(d1) ## occ_download_meta() # key <- "0000122-171020152545675" # res <- occ_download_meta(key) # gbif_citation(res) ## End(Not run)
Geocode lat-lon point(s) with GBIF's set of geo-polygons (experimental)
gbif_geocode(latitude = NULL, longitude = NULL)
gbif_geocode(latitude = NULL, longitude = NULL)
latitude |
a vector of numeric latitude values between -90 and 90. |
longitude |
a vector of numeric longitude values between -180 and 180. |
A data.frame of results from the GBIF gecoding service.
latitude : The input latitude
longitude : The input longitude
index : The original input rownumber
id : The polygon id from which the geocode comes from
type : One of the following : "Political" (county codes), "IHO" (marine regions), "SeaVox" (marine regions), "WGSRPD" (tdwg regions), "EEZ", (in national waters) or "GADM0","GADM1","GADM2","GADM2"(http://gadm.org/)
title : The name of the source polygon
distance : distance to the polygon boarder
This function uses the GBIF geocoder API which is not guaranteed to be stable and is undocumented. As such, this may return different data over time, may be rate-limited or may stop working if GBIF change the service. Use this function with caution.
http://gadm.org/ http://marineregions.org/ http://www.tdwg.org/standards/ http://api.gbif.org/v1/geocode/reverse?lat=0&lng=0
## Not run: # one pair gbif_geocode(0,0) # or multiple pairs of points gbif_geocode(c(0,50),c(0,20)) ## End(Not run)
## Not run: # one pair gbif_geocode(0,0) # or multiple pairs of points gbif_geocode(c(0,50),c(0,20)) ## End(Not run)
Returns a data.frame of all GBIF issues with the following columns:
code
: issue short code, e.g. gass84
code
: issue full name, e.g. GEODETIC_DATUM_ASSUMED_WGS84
description
: issue description
type
: issue type, either related to occurrence
or name
gbif_issues()
gbif_issues()
https://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/OccurrenceIssue.html https://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/NameUsageIssue.html
Lookup issue definitions and short codes
gbif_issues_lookup(issue = NULL, code = NULL)
gbif_issues_lookup(issue = NULL, code = NULL)
issue |
Full name of issue, e.g, CONTINENT_COUNTRY_MISMATCH |
code |
An issue short code, e.g. 'ccm' |
gbif_issues_lookup(issue = 'CONTINENT_COUNTRY_MISMATCH') gbif_issues_lookup(code = 'ccm') gbif_issues_lookup(issue = 'COORDINATE_INVALID') gbif_issues_lookup(code = 'cdiv')
gbif_issues_lookup(issue = 'CONTINENT_COUNTRY_MISMATCH') gbif_issues_lookup(code = 'ccm') gbif_issues_lookup(issue = 'COORDINATE_INVALID') gbif_issues_lookup(code = 'cdiv')
View highlighted terms in name results from GBIF.
gbif_names(input, output = NULL, browse = TRUE)
gbif_names(input, output = NULL, browse = TRUE)
input |
Input output from occ_search |
output |
Output folder path. If not given uses temporary folder. |
browse |
(logical) Browse output (default: |
## Not run: # browse=FALSE returns path to file gbif_names(name_lookup(query='snake', hl=TRUE), browse=FALSE) (out <- name_lookup(query='canada', hl=TRUE, limit=5)) gbif_names(out) gbif_names(name_lookup(query='snake', hl=TRUE)) gbif_names(name_lookup(query='bird', hl=TRUE)) # or not highlight gbif_names(name_lookup(query='bird', limit=200)) ## End(Not run)
## Not run: # browse=FALSE returns path to file gbif_names(name_lookup(query='snake', hl=TRUE), browse=FALSE) (out <- name_lookup(query='canada', hl=TRUE, limit=5)) gbif_names(out) gbif_names(name_lookup(query='snake', hl=TRUE)) gbif_names(name_lookup(query='bird', hl=TRUE)) # or not highlight gbif_names(name_lookup(query='bird', limit=200)) ## End(Not run)
GBIF registry data via OAI-PMH
gbif_oai_identify(...) gbif_oai_list_identifiers( prefix = "oai_dc", from = NULL, until = NULL, set = NULL, token = NULL, as = "df", ... ) gbif_oai_list_records( prefix = "oai_dc", from = NULL, until = NULL, set = NULL, token = NULL, as = "df", ... ) gbif_oai_list_metadataformats(id = NULL, ...) gbif_oai_list_sets(token = NULL, as = "df", ...) gbif_oai_get_records(ids, prefix = "oai_dc", as = "parsed", ...)
gbif_oai_identify(...) gbif_oai_list_identifiers( prefix = "oai_dc", from = NULL, until = NULL, set = NULL, token = NULL, as = "df", ... ) gbif_oai_list_records( prefix = "oai_dc", from = NULL, until = NULL, set = NULL, token = NULL, as = "df", ... ) gbif_oai_list_metadataformats(id = NULL, ...) gbif_oai_list_sets(token = NULL, as = "df", ...) gbif_oai_get_records(ids, prefix = "oai_dc", as = "parsed", ...)
... |
Curl options passed on to |
prefix |
(character) A string to specify the metadata format in OAI-PMH
requests issued to the repository. The default ( |
from |
(character) string giving datestamp to be used as lower bound for datestamp-based selective harvesting (i.e., only harvest records with datestamps in the given range). Dates and times must be encoded using ISO 8601. The trailing Z must be used when including time. OAI-PMH implies UTC for data/time specifications. |
until |
(character) Datestamp to be used as an upper bound, for datestamp-based selective harvesting (i.e., only harvest records with datestamps in the given range). |
set |
(character) A set to be used for selective harvesting (i.e., only harvest records in the given set). |
token |
(character) a token previously provided by the server to resume a request where it last left off. 50 is max number of records returned. We will loop for you internally to get all the records you asked for. |
as |
(character) What to return. One of "df" (for data.frame;
default), "list" (get a list), or "raw" (raw text). For
|
id , ids
|
(character) The OAI-PMH identifier for the record. Optional. |
These functions only work with GBIF registry data, and do so via the OAI-PMH protocol (https://www.openarchives.org/OAI/openarchivesprotocol.html)
raw text, list or data.frame, depending on requested output via
as
parameter
## Not run: gbif_oai_identify() today <- format(Sys.Date(), "%Y-%m-%d") gbif_oai_list_identifiers(from = today) gbif_oai_list_identifiers(set = "country:NL") gbif_oai_list_records(from = today) gbif_oai_list_records(set = "country:NL") gbif_oai_list_metadataformats() gbif_oai_list_metadataformats(id = "9c4e36c1-d3f9-49ce-8ec1-8c434fa9e6eb") gbif_oai_list_sets() gbif_oai_list_sets(as = "list") gbif_oai_get_records("9c4e36c1-d3f9-49ce-8ec1-8c434fa9e6eb") ids <- c("9c4e36c1-d3f9-49ce-8ec1-8c434fa9e6eb", "e0f1bb8a-2d81-4b2a-9194-d92848d3b82e") gbif_oai_get_records(ids) ## End(Not run)
## Not run: gbif_oai_identify() today <- format(Sys.Date(), "%Y-%m-%d") gbif_oai_list_identifiers(from = today) gbif_oai_list_identifiers(set = "country:NL") gbif_oai_list_records(from = today) gbif_oai_list_records(set = "country:NL") gbif_oai_list_metadataformats() gbif_oai_list_metadataformats(id = "9c4e36c1-d3f9-49ce-8ec1-8c434fa9e6eb") gbif_oai_list_sets() gbif_oai_list_sets(as = "list") gbif_oai_get_records("9c4e36c1-d3f9-49ce-8ec1-8c434fa9e6eb") ids <- c("9c4e36c1-d3f9-49ce-8ec1-8c434fa9e6eb", "e0f1bb8a-2d81-4b2a-9194-d92848d3b82e") gbif_oai_get_records(ids) ## End(Not run)
View photos from GBIF.
gbif_photos(input, output = NULL, which = "table", browse = TRUE)
gbif_photos(input, output = NULL, which = "table", browse = TRUE)
input |
Input output from occ_search |
output |
Output folder path. If not given uses temporary folder. |
which |
One of map or table (default). |
browse |
(logical) Browse output (default: |
The max number of photos you can see when which="map" is ~160, so cycle through if you have more than that.
The maps in the table view may not show up correctly if you are using RStudio
## Not run: res <- occ_search(mediaType = 'StillImage', limit = 100) gbif_photos(res) gbif_photos(res, which='map') res <- occ_search(scientificName = "Aves", mediaType = 'StillImage', limit=150) gbif_photos(res) gbif_photos(res, output = '~/barfoo') ## End(Not run)
## Not run: res <- occ_search(mediaType = 'StillImage', limit = 100) gbif_photos(res) gbif_photos(res, which='map') res <- occ_search(scientificName = "Aves", mediaType = 'StillImage', limit=150) gbif_photos(res) gbif_photos(res, output = '~/barfoo') ## End(Not run)
Installations metadata.
installations( data = "all", uuid = NULL, query = NULL, identifier = NULL, identifierType = NULL, limit = 100, start = NULL, curlopts = list() )
installations( data = "all", uuid = NULL, query = NULL, identifier = NULL, identifierType = NULL, limit = 100, start = NULL, curlopts = list() )
data |
The type of data to get. One or more of: 'contact', 'endpoint',
'dataset', 'comment', 'deleted', 'nonPublishing', or the special 'all'.
Default: |
uuid |
UUID of the data node provider. This must be specified if data is anything other than 'all'. |
query |
Query nodes. Only used when |
identifier |
The value for this parameter can be a simple string or
integer, e.g. |
identifierType |
Used in combination with the identifier parameter to filter identifiers by identifier type. See details. This parameter doesn't seem to work right now. |
limit |
Number of records to return. Default: 100. Maximum: 1000. |
start |
Record number to start at. Default: 0. Use in combination
with |
curlopts |
list of named curl options passed on to
|
identifierType options:
DOI No description.
FTP No description.
GBIF_NODE Identifies the node (e.g: DK
for Denmark, sp2000
for Species 2000).
GBIF_PARTICIPANT Participant identifier from the GBIF IMS Filemaker system.
GBIF_PORTAL Indicates the identifier originated from an auto_increment column in the portal.data_provider or portal.data_resource table respectively.
HANDLER No description.
LSID Reference controlled by a separate system, used for example by DOI.
SOURCE_ID No description.
UNKNOWN No description.
URI No description.
URL No description.
UUID No description.
https://www.gbif.org/developer/registry#installations
## Not run: installations(limit=5) installations(query="france", limit = 25) installations(uuid="b77901f9-d9b0-47fa-94e0-dd96450aa2b4") installations(data='contact', uuid="2e029a0c-87af-42e6-87d7-f38a50b78201") installations(data='endpoint', uuid="b77901f9-d9b0-47fa-94e0-dd96450aa2b4") installations(data='dataset', uuid="b77901f9-d9b0-47fa-94e0-dd96450aa2b4") installations(data='deleted', limit = 25) installations(data='deleted', limit=2) installations(data=c('deleted','nonPublishing'), limit=2) installations(identifierType='DOI', limit=2) # Pass on curl options installations(data='deleted', curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: installations(limit=5) installations(query="france", limit = 25) installations(uuid="b77901f9-d9b0-47fa-94e0-dd96450aa2b4") installations(data='contact', uuid="2e029a0c-87af-42e6-87d7-f38a50b78201") installations(data='endpoint', uuid="b77901f9-d9b0-47fa-94e0-dd96450aa2b4") installations(data='dataset', uuid="b77901f9-d9b0-47fa-94e0-dd96450aa2b4") installations(data='deleted', limit = 25) installations(data='deleted', limit=2) installations(data=c('deleted','nonPublishing'), limit=2) installations(identifierType='DOI', limit=2) # Pass on curl options installations(data='deleted', curlopts = list(verbose=TRUE)) ## End(Not run)
Search for literature that cites GBIF mediated data
lit_search( q = NULL, countriesOfResearcher = NULL, countriesOfCoverage = NULL, literatureType = NULL, relevance = NULL, year = NULL, topics = NULL, datasetKey = NULL, publishingOrg = NULL, peerReview = NULL, openAccess = NULL, downloadKey = NULL, doi = NULL, journalSource = NULL, journalPublisher = NULL, flatten = TRUE, limit = NULL, curlopts = list() ) lit_count(...)
lit_search( q = NULL, countriesOfResearcher = NULL, countriesOfCoverage = NULL, literatureType = NULL, relevance = NULL, year = NULL, topics = NULL, datasetKey = NULL, publishingOrg = NULL, peerReview = NULL, openAccess = NULL, downloadKey = NULL, doi = NULL, journalSource = NULL, journalPublisher = NULL, flatten = TRUE, limit = NULL, curlopts = list() ) lit_count(...)
q |
(character) Simple full text search parameter. The value for this parameter can be a simple word or a phrase. Wildcards are not supported. |
countriesOfResearcher |
(character) Country of institution with which author is affiliated, e.g. DK (for Denmark). Country codes are listed in enumeration_country(). |
countriesOfCoverage |
(character) Country of focus of study, e.g. BR (for Brazil). Country codes are listed in enumeration_country(). |
literatureType |
(character) Type of literature ("JOURNAL", "BOOK_SECTION", "WORKING_PAPER", "REPORT", "GENERIC", "THESIS", "CONFERENCE_PROCEEDINGS", "WEB_PAGE"). |
relevance |
(character) How is the publication relate to GBIF. See details ("GBIF_USED", "GBIF_MENTIONED", "GBIF_PUBLISHED", "GBIF_CITED", "GBIF_CITED", "GBIF_PUBLISHED", "GBIF_ACKNOWLEDGED", "GBIF_AUTHOR"). |
year |
(integer) Year of publication. |
topics |
(character) Topic of publication. |
datasetKey |
(character) GBIF dataset uuid referenced in publication. |
publishingOrg |
(character) Publisher uuid whose dataset is referenced in publication. |
peerReview |
(logical) Has publication undergone peer-review? |
openAccess |
(logical) Is publication Open Access? |
downloadKey |
(character) Download referenced in publication. |
doi |
(character) Digital Object Identifier (DOI). |
journalSource |
(character) Journal of publication. |
journalPublisher |
(character) Publisher of journal. |
flatten |
(logical) should any lists in the resulting data be flattened into comma-seperated strings? |
limit |
how many records to return. limit=NULL will fetch up to 10,000. |
curlopts |
list of named curl options passed on to HttpClient. see curl::curl_options for curl options. |
... |
additional parameters passed to lit_search |
This function enables you to search for literature indexed by GBIF, including peer-reviewed papers, citing GBIF datasets and downloads. The literature API powers the literature search on GBIF.
The GBIF Secretariat maintains an ongoing literature tracking programme, which identifies research uses and citations of biodiversity information accessed through GBIF’s global infrastructure.
In the literature database, relevance refers to how publications relate to GBIF following these definitions:
GBIF_USED : makes substantive use of data in a quantitative analysis (e.g. ecological niche modelling)
GBIF_CITED : cites a qualitative fact derived in data (e.g. a given species is found in a given country)
GBIF_DISCUSSED : discusses GBIF as an infrastructure or the use of data
GBIF_PRIMARY : GBIF is the primary source of data (no longer applied)
GBIF_ACKNOWLEDGED : acknowledges GBIF (but doesn't use or cite data)
GBIF_PUBLISHED : describes or talks about data published to GBIF
GBIF_AUTHOR : authored by GBIF staff
GBIF_MENTIONED : unspecifically mentions GBIF or the GBIF portal
GBIF_FUNDED : funded by GBIF or a GBIF-managed funding programme
The following arguments can take multiple values:
relevance
countriesOfResearcher
countriesOfCoverage
literatureType
topics
datasetKey
publishingOrg
downloadKey
doi
journalSource
journalPublisher
If flatten=TRUE
, then data will be returned as flat
data.frame with no complex column types (i.e. no lists or data.frames).
limit=NULL
will return up to 10,000 records. The maximum value for
limit
is 10,000. If no filters are used, only the first 1,000 records
will be returned, limit must be explicitly set to limit=10000
, to get
the first 10,000 records in this case.
lit_count()
is a convenience wrapper, which will return the number of
literature references for a certain lit_search()
query. This is the
same as running lit_search()$meta$count
.
A named list with two values: $data
and $meta
. $data
is
a data.frame
of literature references.
## Not run: lit_search(q="bats")$data lit_search(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7") lit_search(year=2020) lit_search(year="2011,2020") # year ranges lit_search(relevance=c("GBIF_CITED","GBIF_USED")) # multiple values lit_search(relevance=c("GBIF_USED","GBIF_CITED"), topics=c("EVOLUTION","PHYLOGENETICS")) lit_count() # total number of literature referencing GBIF lit_count(peerReview=TRUE) # number of citations of iNaturalist lit_count(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7") # number of peer-reviewed articles used GBIF mediated data lit_count(peerReview=TRUE,literatureType="JOURNAL",relevance="GBIF_USED") # Typically what is meant by "literature that uses GBIF" lit_search(peerReview=TRUE,literatureType="JOURNAL",relevance="GBIF_USED") lit_count(peerReview=TRUE,literatureType="JOURNAL",relevance="GBIF_USED") ## End(Not run)
## Not run: lit_search(q="bats")$data lit_search(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7") lit_search(year=2020) lit_search(year="2011,2020") # year ranges lit_search(relevance=c("GBIF_CITED","GBIF_USED")) # multiple values lit_search(relevance=c("GBIF_USED","GBIF_CITED"), topics=c("EVOLUTION","PHYLOGENETICS")) lit_count() # total number of literature referencing GBIF lit_count(peerReview=TRUE) # number of citations of iNaturalist lit_count(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7") # number of peer-reviewed articles used GBIF mediated data lit_count(peerReview=TRUE,literatureType="JOURNAL",relevance="GBIF_USED") # Typically what is meant by "literature that uses GBIF" lit_search(peerReview=TRUE,literatureType="JOURNAL",relevance="GBIF_USED") lit_count(peerReview=TRUE,literatureType="JOURNAL",relevance="GBIF_USED") ## End(Not run)
This function is a wrapper for the GBIF mapping api version 2.0. The mapping API is a web map tile service making it straightforward to visualize GBIF content on interactive maps, and overlay content from other sources. It returns tile maps with number of GBIF records per area unit that can be used in a variety of ways, for example in interactive leaflet web maps. Map details are specified by a number of query parameters, some of them optional. Full documentation of the GBIF mapping api can be found at https://www.gbif.org/developer/maps
map_fetch( source = "density", x = 0:1, y = 0, z = 0, format = "@1x.png", srs = "EPSG:4326", bin = NULL, hexPerTile = NULL, squareSize = NULL, style = NULL, taxonKey = NULL, datasetKey = NULL, country = NULL, publishingOrg = NULL, publishingCountry = NULL, year = NULL, basisOfRecord = NULL, return = "png", base_style = NULL, plot_terra = TRUE, curlopts = list(http_version = 2), ... )
map_fetch( source = "density", x = 0:1, y = 0, z = 0, format = "@1x.png", srs = "EPSG:4326", bin = NULL, hexPerTile = NULL, squareSize = NULL, style = NULL, taxonKey = NULL, datasetKey = NULL, country = NULL, publishingOrg = NULL, publishingCountry = NULL, year = NULL, basisOfRecord = NULL, return = "png", base_style = NULL, plot_terra = TRUE, curlopts = list(http_version = 2), ... )
source |
(character) Either |
x |
(integer sequence) the column. Default: 0:1 |
y |
(integer sequence) the row. Default: 0 |
z |
(integer) the zoom. Default: 0 |
format |
(character) The data format, one of:
|
srs |
(character) Spatial reference system. One of:
|
bin |
(character) |
hexPerTile |
(integer) sets the size of the hexagons (the number horizontally across a tile). |
squareSize |
(integer) sets the size of the squares. Choose a factor of 4096 so they tessalate correctly: probably from 8, 16, 32, 64, 128, 256, 512. |
style |
(character) for raster tiles, choose from the available styles. Defaults to classic.point for source="density" and "scaled.circle" for source="adhoc". |
taxonKey |
(integer/numeric/character) search by taxon key, can only supply 1. |
datasetKey |
(character) search by taxon key, can only supply 1. |
country |
(character) search by taxon key, can only supply 1. |
publishingOrg |
(character) search by taxon key, can only supply 1. |
publishingCountry |
(character) search by taxon key, can only supply 1. |
year |
(integer) integer that limits the search to a certain year or,
if passing a vector of integers, multiple years, for example
|
basisOfRecord |
(character) one or more basis of record states to
include records with that basis of record. The full list is: |
return |
(character) Either "png" or "terra". |
base_style |
(character) The style of the base map. |
plot_terra |
(logical) Set whether the terra map be default plotted. |
curlopts |
options passed on to crul::HttpClient |
... |
additional arguments passed to the adhoc interface. |
The default settings, return='png'
, will return a magick-image
png. This image will be a composite image of the the occurrence tiles fetched
and a base map. This map is primarily useful as a high quality image of
occurrence records.
The args x
and y
can both be integer sequences. For example, x=0:3
or
y=0:1
. Note that the tile index starts at 0. Higher values of z
, will
will produce more tiles that can be fetched and stitched together. Selecting
a too high value for x
or y
will produce a blank image.
Setting return='terra'
will return a terra::SpatRaster
object. This
is primarily useful if you were interested in the underlying aggregated
occurrence density data.
See the article
a magick-image
or terra::SpatRaster
object.
John Waller and Laurens Geffert [email protected]
https://www.gbif.org/developer/maps
https://api.gbif.org/v2/map/demo.html
https://api.gbif.org/v2/map/demo13.html
## Not run: # all occurrences map_fetch() # get artic map map_fetch(srs='EPSG:3031') # only preserved specimens map_fetch(basisOfRecord="PRESERVED_SPECIMEN") # Map of occ in Great Britain map_fetch(z=3,y=1,x=7:8,country="GB") # Peguins with artic projection map_fetch(srs='EPSG:3031',taxonKey=2481660,style='glacier.point', base_style="gbif-dark") # occ from a long time ago map_fetch(year=1600) # polygon style map_fetch(style="iNaturalist.poly",bin="hex") # iNaturalist dataset plotted map_fetch(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7", style="iNaturalist.poly") # use source="adhoc" for more filters map_fetch(z=1, source="adhoc", iucn_red_list_category="CR", style="scaled.circles", base_style='gbif-light') # cropped map of Hawaii map_fetch(z=5,x=3:4,y=12,source="adhoc",gadmGid="USA.12_1") ## End(Not run)
## Not run: # all occurrences map_fetch() # get artic map map_fetch(srs='EPSG:3031') # only preserved specimens map_fetch(basisOfRecord="PRESERVED_SPECIMEN") # Map of occ in Great Britain map_fetch(z=3,y=1,x=7:8,country="GB") # Peguins with artic projection map_fetch(srs='EPSG:3031',taxonKey=2481660,style='glacier.point', base_style="gbif-dark") # occ from a long time ago map_fetch(year=1600) # polygon style map_fetch(style="iNaturalist.poly",bin="hex") # iNaturalist dataset plotted map_fetch(datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7", style="iNaturalist.poly") # use source="adhoc" for more filters map_fetch(z=1, source="adhoc", iucn_red_list_category="CR", style="scaled.circles", base_style='gbif-light') # cropped map of Hawaii map_fetch(z=5,x=3:4,y=12,source="adhoc",gadmGid="USA.12_1") ## End(Not run)
This function is a wrapper for the GBIF mapping api version 2.0. The mapping API is a web map tile service making it straightforward to visualize GBIF content on interactive maps, and overlay content from other sources. It returns maps vector tiles with number of GBIF records per area unit that can be used in a variety of ways, for example in interactive leaflet web maps. Map details are specified by a number of query parameters, some of them optional. Full documentation of the GBIF mapping api can be found at https://www.gbif.org/developer/maps
mvt_fetch( source = "density", x = 0, y = 0, z = 0, srs = "EPSG:4326", bin = NULL, hexPerTile = NULL, squareSize = NULL, style = "classic.point", taxonKey = NULL, datasetKey = NULL, country = NULL, publishingOrg = NULL, publishingCountry = NULL, year = NULL, basisOfRecord = NULL, ... )
mvt_fetch( source = "density", x = 0, y = 0, z = 0, srs = "EPSG:4326", bin = NULL, hexPerTile = NULL, squareSize = NULL, style = "classic.point", taxonKey = NULL, datasetKey = NULL, country = NULL, publishingOrg = NULL, publishingCountry = NULL, year = NULL, basisOfRecord = NULL, ... )
source |
(character) Either |
x |
(integer) the column. Default: 0 |
y |
(integer) the row. Default: 0 |
z |
(integer) the zoom. Default: 0 |
srs |
(character) Spatial reference system for the output (input srs for mvt
from GBIF is always
|
bin |
(character) |
hexPerTile |
(integer) sets the size of the hexagons (the number horizontally across a tile). optional |
squareSize |
(integer) sets the size of the squares. Choose a factor of 4096 so they tessalate correctly: probably from 8, 16, 32, 64, 128, 256, 512. optional |
style |
(character) for raster tiles, choose from the available styles. Defaults to classic.point. optional. THESE DON'T WORK YET. |
taxonKey |
(integer/numeric/character) search by taxon key, can only supply 1. optional |
datasetKey |
(character) search by taxon key, can only supply 1. optional |
country |
(character) search by taxon key, can only supply 1. optional |
publishingOrg |
(character) search by taxon key, can only supply 1. optional |
publishingCountry |
(character) search by taxon key, can only supply 1. optional |
year |
(integer) integer that limits the search to a certain year or,
if passing a vector of integers, multiple years, for example
|
basisOfRecord |
(character) one or more basis of record states to
include records with that basis of record. The full list is: |
... |
curl options passed on to crul::HttpClient |
This function uses the arguments passed on to generate a query to the GBIF web map API. The API returns a web tile object as png that is read and converted into an R raster object. The break values or nbreaks generate a custom colour palette for the web tile, with each bin corresponding to one grey value. After retrieval, the raster is reclassified to the actual break values. This is a somewhat hacky but nonetheless functional solution in the absence of a GBIF raster API implementation.
We add extent and set the projection for the output. You can reproject after retrieving the output.
an sf object
https://www.gbif.org/developer/maps
## Not run: if ( requireNamespace("sf", quietly = TRUE) && requireNamespace("protolite", quietly = TRUE) ) { x <- mvt_fetch(taxonKey = 2480498, year = 2007:2011) x # gives an sf object class(x) # different srs ## 3857 y <- mvt_fetch(taxonKey = 2480498, year = 2010, srs = "EPSG:3857") y ## 3031 z <- mvt_fetch(taxonKey = 2480498, year = 2010, srs = "EPSG:3031", verbose = TRUE) z # 3575 z <- mvt_fetch(taxonKey = 2480498, year = 2010, srs = "EPSG:3575") z # bin x <- mvt_fetch(taxonKey = 212, year = 1998, bin = "hex", hexPerTile = 30, style = "classic-noborder.poly") x # query with basisOfRecord mvt_fetch(taxonKey = 2480498, year = 2010, basisOfRecord = "HUMAN_OBSERVATION") mvt_fetch(taxonKey = 2480498, year = 2010, basisOfRecord = c("HUMAN_OBSERVATION", "LIVING_SPECIMEN")) } ## End(Not run)
## Not run: if ( requireNamespace("sf", quietly = TRUE) && requireNamespace("protolite", quietly = TRUE) ) { x <- mvt_fetch(taxonKey = 2480498, year = 2007:2011) x # gives an sf object class(x) # different srs ## 3857 y <- mvt_fetch(taxonKey = 2480498, year = 2010, srs = "EPSG:3857") y ## 3031 z <- mvt_fetch(taxonKey = 2480498, year = 2010, srs = "EPSG:3031", verbose = TRUE) z # 3575 z <- mvt_fetch(taxonKey = 2480498, year = 2010, srs = "EPSG:3575") z # bin x <- mvt_fetch(taxonKey = 212, year = 1998, bin = "hex", hexPerTile = 30, style = "classic-noborder.poly") x # query with basisOfRecord mvt_fetch(taxonKey = 2480498, year = 2010, basisOfRecord = "HUMAN_OBSERVATION") mvt_fetch(taxonKey = 2480498, year = 2010, basisOfRecord = c("HUMAN_OBSERVATION", "LIVING_SPECIMEN")) } ## End(Not run)
Lookup names in the GBIF backbone taxonomy.
name_backbone( name, rank = NULL, kingdom = NULL, phylum = NULL, class = NULL, order = NULL, family = NULL, genus = NULL, strict = FALSE, verbose = FALSE, start = NULL, limit = 100, curlopts = list() ) name_backbone_verbose( name, rank = NULL, kingdom = NULL, phylum = NULL, class = NULL, order = NULL, family = NULL, genus = NULL, strict = FALSE, start = NULL, limit = 100, curlopts = list() )
name_backbone( name, rank = NULL, kingdom = NULL, phylum = NULL, class = NULL, order = NULL, family = NULL, genus = NULL, strict = FALSE, verbose = FALSE, start = NULL, limit = 100, curlopts = list() ) name_backbone_verbose( name, rank = NULL, kingdom = NULL, phylum = NULL, class = NULL, order = NULL, family = NULL, genus = NULL, strict = FALSE, start = NULL, limit = 100, curlopts = list() )
name |
(character) Full scientific name potentially with authorship (required) |
rank |
(character) The rank given as our rank enum. (optional) |
kingdom |
(character) If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) |
phylum |
(character) If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) |
class |
(character) If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) |
order |
(character) If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) |
family |
(character) If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) |
genus |
(character) If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) |
strict |
(logical) If |
verbose |
(logical) should the function give back more (less reliable)
results. See function |
start |
Record number to start at. Default: 0. Use in combination
with |
limit |
Number of records to return. Default: 100. Maximum: 1000. |
curlopts |
list of named curl options passed on to
|
If you don't get a match, GBIF gives back a data.frame with columns
synonym
, confidence
, and matchType='NONE'
.
For name_backbone
, a data.frame for a single taxon with many
columns. For name_backbone_verbose
, a larger number of results in a
data.frame the results of resulting from fuzzy matching.
You will also get back your input name, rank, kingdom, phylum ect. as
columns input_name, input_rank, input_kingdom ect. so you can check the
results.
https://www.gbif.org/developer/species#searching
## Not run: name_backbone(name='Helianthus annuus', kingdom='plants') name_backbone(name='Helianthus', rank='genus', kingdom='plants') name_backbone(name='Poa', rank='genus', family='Poaceae') # Verbose - gives back alternatives ## Strictness name_backbone_verbose(name='Poa', kingdom='plants', strict=FALSE) name_backbone_verbose(name='Helianthus annuus', kingdom='plants', strict=TRUE) # Non-existent name - returns list of lenght 3 stating no match name_backbone(name='Aso') name_backbone(name='Oenante') # Pass on curl options name_backbone(name='Oenante', curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: name_backbone(name='Helianthus annuus', kingdom='plants') name_backbone(name='Helianthus', rank='genus', kingdom='plants') name_backbone(name='Poa', rank='genus', family='Poaceae') # Verbose - gives back alternatives ## Strictness name_backbone_verbose(name='Poa', kingdom='plants', strict=FALSE) name_backbone_verbose(name='Helianthus annuus', kingdom='plants', strict=TRUE) # Non-existent name - returns list of lenght 3 stating no match name_backbone(name='Aso') name_backbone(name='Oenante') # Pass on curl options name_backbone(name='Oenante', curlopts = list(verbose=TRUE)) ## End(Not run)
Lookup names in the GBIF backbone taxonomy in a checklist.
name_backbone_checklist( name_data = NULL, rank = NULL, kingdom = NULL, phylum = NULL, class = NULL, order = NULL, family = NULL, genus = NULL, strict = FALSE, verbose = FALSE, curlopts = list() )
name_backbone_checklist( name_data = NULL, rank = NULL, kingdom = NULL, phylum = NULL, class = NULL, order = NULL, family = NULL, genus = NULL, strict = FALSE, verbose = FALSE, curlopts = list() )
name_data |
(data.frame or vector) see details. |
rank |
(character) default value (optional). |
kingdom |
(character) default value (optional). |
phylum |
(character) default value (optional). |
class |
(character) default value (optional). |
order |
(character) default value (optional). |
family |
(character) default value (optional). |
genus |
(character) default value (optional). |
strict |
(logical) strict=TRUE will not attempt to fuzzy match or return higherrankmatches. |
verbose |
(logical) If true it shows alternative matches which were considered but then rejected. |
curlopts |
list of named curl options passed on to
|
This function is an alternative for name_backbone()
, which will work with
a list of names (a vector or a data.frame). The data.frame should have the
following column names, but only the 'name' column is required. If only
one column is present, then that column is assumed to be the 'name' column.
name : (required)
rank : (optional)
kingdom : (optional)
phylum : (optional)
class : (optional)
order : (optional)
family : (optional)
genus : (optional)
The input columns will be returned as "verbatim_name","verbatim_rank", "verbatim_phylum" ect. A column of "verbatim_index" will also be returned giving the index of the input.
The following aliases for the 'name' column will work (any case or with '_' will work) :
"scientificName", "ScientificName", "scientific_name" ...
"sci_name", "sciname", "SCI_NAME" ...
"names", "NAMES" ...
"species", "SPECIES" ...
"species_name", "speciesname" ...
"sp_name", "SP_NAME", "spname" ...
"taxon_name", "taxonname", "TAXON NAME" ...
If more than one aliases is present and no column is named 'name', then the left-most column with an acceptable aliased name above is used.
If verbose=TRUE
, a column called is_alternative
will be returned,
which species if a name was originally a first choice or not.
is_alternative=TRUE
means the name was not is not considered to be
the best match by GBIF.
Default values for rank, kingdom, phylum, class, order, family, and genus can can be supplied. If a default value is supplied, the values for these fields are ignored in name_data, and the default value is used instead. This is most useful if you have a list of names and you know they are all plants, insects, birds, ect. You can also input multiple values, if they are the same length as list of names you are trying to match.
This function can also be used with a character vector of names. In that case no column names are needed of course.
This function is very similar to the GBIF species-lookup tool. https://www.gbif.org/tools/species-lookup.
If you have 1000s of names to match, it can take some minutes to get back all of the matches. I have tested it with 60K names. Scientific names with author details usually get better matches.
See also article Working With Taxonomic Names.
A data.frame
of matched names.
## Not run: library(rgbif) name_data <- data.frame( scientificName = c( "Cirsium arvense (L.) Scop.", # a plant "Calopteryx splendens (Harris, 1780)", # an insect "Puma concolor (Linnaeus, 1771)", # a big cat "Ceylonosticta alwisi (Priyadarshana & Wijewardhane, 2016)", # newly discovered insect "Puma concuolor (Linnaeus, 1771)", # a mis-spelled big cat "Fake species (John Waller 2021)", # a fake species "Calopteryx" # Just a Genus ), description = c( "a plant", "an insect", "a big cat", "newly discovered insect", "a mis-spelled big cat", "a fake species", "just a GENUS" ), kingdom = c( "Plantae", "Animalia", "Animalia", "Animalia", "Animalia", "Johnlia", "Animalia" )) name_backbone_checklist(name_data) # return more than 1 result per name name_backbone_checklist(name_data,verbose=TRUE) # works with just vectors too name_list <- c( "Cirsium arvense (L.) Scop.", "Calopteryx splendens (Harris, 1780)", "Puma concolor (Linnaeus, 1771)", "Ceylonosticta alwisi (Priyadarshana & Wijewardhane, 2016)", "Puma concuolor", "Fake species (John Waller 2021)", "Calopteryx") name_backbone_checklist(name_list) name_backbone_checklist(name_list,verbose=TRUE) name_backbone_checklist(name_list,strict=TRUE) # default values name_backbone_checklist(c("Aloe arborecens Mill.", "Cirsium arvense (L.) Scop."),kingdom="Plantae") name_backbone_checklist(c("Aloe arborecens Mill.", "Calopteryx splendens (Harris, 1780)"),kingdom=c("Plantae","Animalia")) ## End(Not run)
## Not run: library(rgbif) name_data <- data.frame( scientificName = c( "Cirsium arvense (L.) Scop.", # a plant "Calopteryx splendens (Harris, 1780)", # an insect "Puma concolor (Linnaeus, 1771)", # a big cat "Ceylonosticta alwisi (Priyadarshana & Wijewardhane, 2016)", # newly discovered insect "Puma concuolor (Linnaeus, 1771)", # a mis-spelled big cat "Fake species (John Waller 2021)", # a fake species "Calopteryx" # Just a Genus ), description = c( "a plant", "an insect", "a big cat", "newly discovered insect", "a mis-spelled big cat", "a fake species", "just a GENUS" ), kingdom = c( "Plantae", "Animalia", "Animalia", "Animalia", "Animalia", "Johnlia", "Animalia" )) name_backbone_checklist(name_data) # return more than 1 result per name name_backbone_checklist(name_data,verbose=TRUE) # works with just vectors too name_list <- c( "Cirsium arvense (L.) Scop.", "Calopteryx splendens (Harris, 1780)", "Puma concolor (Linnaeus, 1771)", "Ceylonosticta alwisi (Priyadarshana & Wijewardhane, 2016)", "Puma concuolor", "Fake species (John Waller 2021)", "Calopteryx") name_backbone_checklist(name_list) name_backbone_checklist(name_list,verbose=TRUE) name_backbone_checklist(name_list,strict=TRUE) # default values name_backbone_checklist(c("Aloe arborecens Mill.", "Cirsium arvense (L.) Scop."),kingdom="Plantae") name_backbone_checklist(c("Aloe arborecens Mill.", "Calopteryx splendens (Harris, 1780)"),kingdom=c("Plantae","Animalia")) ## End(Not run)
Parse and examine further GBIF name issues on a dataset.
name_issues(.data, ..., mutate = NULL)
name_issues(.data, ..., mutate = NULL)
.data |
Output from a call to |
... |
Named parameters to only get back (e.g. bbmn), or to remove (e.g. -bbmn). |
mutate |
(character) One of:
For split and split_expand, values in cells become y ("yes") or n ("no") |
https://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/NameUsageIssue.html
## Not run: # what do issues mean, can print whole table head(gbif_issues()) # or just name related issues gbif_issues()[which(gbif_issues()$type %in% c("name")),] # or search for matches gbif_issues()[gbif_issues()$code %in% c('bbmn','clasna','scina'),] # compare out data to after name_issues use (aa <- name_usage(name = "Lupus")) aa %>% name_issues("clasna") ## or parse issues in various ways ### remove data rows with certain issue classes aa %>% name_issues(-clasna, -scina) ### expand issues to more descriptive names aa %>% name_issues(mutate = "expand") ### split and expand aa %>% name_issues(mutate = "split_expand") ### split, expand, and remove an issue class aa %>% name_issues(-bbmn, mutate = "split_expand") ## Or you can use name_issues without %>% name_issues(aa, -bbmn, mutate = "split_expand") ## End(Not run)
## Not run: # what do issues mean, can print whole table head(gbif_issues()) # or just name related issues gbif_issues()[which(gbif_issues()$type %in% c("name")),] # or search for matches gbif_issues()[gbif_issues()$code %in% c('bbmn','clasna','scina'),] # compare out data to after name_issues use (aa <- name_usage(name = "Lupus")) aa %>% name_issues("clasna") ## or parse issues in various ways ### remove data rows with certain issue classes aa %>% name_issues(-clasna, -scina) ### expand issues to more descriptive names aa %>% name_issues(mutate = "expand") ### split and expand aa %>% name_issues(mutate = "split_expand") ### split, expand, and remove an issue class aa %>% name_issues(-bbmn, mutate = "split_expand") ## Or you can use name_issues without %>% name_issues(aa, -bbmn, mutate = "split_expand") ## End(Not run)
This service uses fuzzy lookup so that you can put in partial names and you should get back those things that match. See examples below.
Faceting: If facet=FALSE
or left to the default (NULL), no faceting
is done. And therefore, all parameters with facet in their name are
ignored (facetOnly, facetMincount, facetMultiselect).
name_lookup( query = NULL, rank = NULL, higherTaxonKey = NULL, status = NULL, isExtinct = NULL, habitat = NULL, nameType = NULL, datasetKey = NULL, origin = NULL, nomenclaturalStatus = NULL, limit = 100, start = 0, facet = NULL, facetMincount = NULL, facetMultiselect = NULL, type = NULL, hl = NULL, issue = NULL, constituentKey = NULL, verbose = FALSE, return = NULL, curlopts = list() )
name_lookup( query = NULL, rank = NULL, higherTaxonKey = NULL, status = NULL, isExtinct = NULL, habitat = NULL, nameType = NULL, datasetKey = NULL, origin = NULL, nomenclaturalStatus = NULL, limit = 100, start = 0, facet = NULL, facetMincount = NULL, facetMultiselect = NULL, type = NULL, hl = NULL, issue = NULL, constituentKey = NULL, verbose = FALSE, return = NULL, curlopts = list() )
query |
Query term(s) for full text search. |
rank |
CLASS, CULTIVAR, CULTIVAR_GROUP, DOMAIN, FAMILY, FORM, GENUS, INFORMAL, INFRAGENERIC_NAME, INFRAORDER, INFRASPECIFIC_NAME, INFRASUBSPECIFIC_NAME, KINGDOM, ORDER, PHYLUM, SECTION, SERIES, SPECIES, STRAIN, SUBCLASS, SUBFAMILY, SUBFORM, SUBGENUS, SUBKINGDOM, SUBORDER, SUBPHYLUM, SUBSECTION, SUBSERIES, SUBSPECIES, SUBTRIBE, SUBVARIETY, SUPERCLASS, SUPERFAMILY, SUPERORDER, SUPERPHYLUM, SUPRAGENERIC_NAME, TRIBE, UNRANKED, VARIETY |
higherTaxonKey |
Filters by any of the higher Linnean rank keys. Note this is within the respective checklist and not searching nub keys across all checklists. This parameter accepts many inputs in a vector ( passed in the same request). |
status |
Filters by the taxonomic status as one of:
|
isExtinct |
(logical) Filters by extinction status (e.g.
|
habitat |
(character) Filters by habitat. One of: marine, freshwater, or terrestrial |
nameType |
Filters by the name type as one of:
|
datasetKey |
Filters by the dataset's key (a uuid) |
origin |
(character) Filters by origin. One of:
|
nomenclaturalStatus |
Not yet implemented, but will eventually allow for filtering by a nomenclatural status enum. |
limit |
Number of records to return. Hard maximum limit set by GBIF API: 99999. |
start |
Record number to start at. Default: 0. |
facet |
A vector/list of facet names used to retrieve the 100 most frequent values for a field. Allowed facets are: datasetKey, higherTaxonKey, rank, status, isExtinct, habitat, and nameType. Additionally threat and nomenclaturalStatus are legal values but not yet implemented, so data will not yet be returned for them. |
facetMincount |
Used in combination with the facet parameter. Set facetMincount to exclude facets with a count less than x, e.g. http://bit.ly/2osAUQB only shows the type values 'CHECKLIST' and 'OCCURRENCE' because the other types have counts less than 10000 |
facetMultiselect |
(logical) Used in combination with the facet
parameter. Set |
type |
Type of name. One of occurrence, checklist, or metadata. |
hl |
(logical) Set |
issue |
Filters by issue. Issue has to be related to names. Type
|
constituentKey |
Filters by the dataset's constituent key (a uuid). |
verbose |
(logical) If |
return |
Defunct. All components are returned; index to the one(s) you want |
curlopts |
list of named curl options passed on to
|
An object of class gbif, which is a S3 class list, with slots for
metadata (meta
), the data itself (data
), the taxonomic
hierarchy data (hierarchies
), and vernacular names (names
).
In addition, the object has attributes listing the user supplied arguments
and type of search, which is, differently from occurrence data, always
equals to 'single' even if multiple values for some parameters are given.
meta
is a list of length four with offset, limit, endOfRecords and
count fields. data
is a tibble (aka data.frame) containing all
information about the found taxa. hierarchies
is a list of
data.frame's, one per GBIF key (taxon), containing its taxonomic
classification. Each data.frame contains two columns: rankkey
and
name
. names
returns a list of data.frame's, one per GBIF key
(taxon), containing all vernacular names. Each data.frame contains two
columns: vernacularName
and language
.
A list of length five:
metadata
data: either a data.frame (verbose=FALSE
, default) or a list (verbose=TRUE
).
facets
hierarchies
names
Some parameters can take many inputs, and treated as 'OR' (e.g., a or b or c). The following take many inputs:
rank
higherTaxonKey
status
habitat
nameType
datasetKey
origin
https://www.gbif.org/developer/species#searching
## Not run: # Look up names like mammalia name_lookup(query='mammalia', limit = 20) # Start with an offset name_lookup(query='mammalia', limit=1) name_lookup(query='mammalia', limit=1, start=2) # large requests (paging is internally implemented). # hard maximum limit set by GBIF API: 99999 # name_lookup(query = "Carnivora", limit = 10000) # Get all data and parse it, removing descriptions which can be quite long out <- name_lookup('Helianthus annuus', rank="species", verbose=TRUE) lapply(out$data, function(x) { x[!names(x) %in% c("descriptions","descriptionsSerialized")] }) # Search for a genus name_lookup(query="Cnaemidophorus", rank="genus") # Limit records to certain number name_lookup('Helianthus annuus', rank="species", limit=2) # Query by habitat name_lookup(habitat = "terrestrial", limit=2) name_lookup(habitat = "marine", limit=2) name_lookup(habitat = "freshwater", limit=2) # Using faceting name_lookup(facet='status', limit=0, facetMincount='70000') name_lookup(facet=c('status','higherTaxonKey'), limit=0, facetMincount='700000') name_lookup(facet='nameType', limit=0) name_lookup(facet='habitat', limit=0) name_lookup(facet='datasetKey', limit=0) name_lookup(facet='rank', limit=0) name_lookup(facet='isExtinct', limit=0) name_lookup(isExtinct=TRUE, limit=0) # text highlighting ## turn on highlighting res <- name_lookup(query='canada', hl=TRUE, limit=5) res$data name_lookup(query='canada', hl=TRUE, limit=45) ## and you can pass the output to gbif_names() function res <- name_lookup(query='canada', hl=TRUE, limit=5) gbif_names(res) # Lookup by datasetKey (set up sufficient high limit, API maximum: 99999) # name_lookup(datasetKey='3f8a1297-3259-4700-91fc-acc4170b27ce', # limit = 50000) # Some parameters accept many inputs, treated as OR name_lookup(rank = c("family", "genus")) name_lookup(higherTaxonKey = c("119", "120", "121", "204")) name_lookup(status = c("misapplied", "synonym"))$data name_lookup(habitat = c("marine", "terrestrial")) name_lookup(nameType = c("cultivar", "doubtful")) name_lookup(datasetKey = c("73605f3a-af85-4ade-bbc5-522bfb90d847", "d7c60346-44b6-400d-ba27-8d3fbeffc8a5")) name_lookup(datasetKey = "289244ee-e1c1-49aa-b2d7-d379391ce265", origin = c("SOURCE", "DENORMED_CLASSIFICATION")) # Pass on curl options name_lookup(query='Cnaemidophorus', rank="genus", curlopts = list(verbose = TRUE)) ## End(Not run)
## Not run: # Look up names like mammalia name_lookup(query='mammalia', limit = 20) # Start with an offset name_lookup(query='mammalia', limit=1) name_lookup(query='mammalia', limit=1, start=2) # large requests (paging is internally implemented). # hard maximum limit set by GBIF API: 99999 # name_lookup(query = "Carnivora", limit = 10000) # Get all data and parse it, removing descriptions which can be quite long out <- name_lookup('Helianthus annuus', rank="species", verbose=TRUE) lapply(out$data, function(x) { x[!names(x) %in% c("descriptions","descriptionsSerialized")] }) # Search for a genus name_lookup(query="Cnaemidophorus", rank="genus") # Limit records to certain number name_lookup('Helianthus annuus', rank="species", limit=2) # Query by habitat name_lookup(habitat = "terrestrial", limit=2) name_lookup(habitat = "marine", limit=2) name_lookup(habitat = "freshwater", limit=2) # Using faceting name_lookup(facet='status', limit=0, facetMincount='70000') name_lookup(facet=c('status','higherTaxonKey'), limit=0, facetMincount='700000') name_lookup(facet='nameType', limit=0) name_lookup(facet='habitat', limit=0) name_lookup(facet='datasetKey', limit=0) name_lookup(facet='rank', limit=0) name_lookup(facet='isExtinct', limit=0) name_lookup(isExtinct=TRUE, limit=0) # text highlighting ## turn on highlighting res <- name_lookup(query='canada', hl=TRUE, limit=5) res$data name_lookup(query='canada', hl=TRUE, limit=45) ## and you can pass the output to gbif_names() function res <- name_lookup(query='canada', hl=TRUE, limit=5) gbif_names(res) # Lookup by datasetKey (set up sufficient high limit, API maximum: 99999) # name_lookup(datasetKey='3f8a1297-3259-4700-91fc-acc4170b27ce', # limit = 50000) # Some parameters accept many inputs, treated as OR name_lookup(rank = c("family", "genus")) name_lookup(higherTaxonKey = c("119", "120", "121", "204")) name_lookup(status = c("misapplied", "synonym"))$data name_lookup(habitat = c("marine", "terrestrial")) name_lookup(nameType = c("cultivar", "doubtful")) name_lookup(datasetKey = c("73605f3a-af85-4ade-bbc5-522bfb90d847", "d7c60346-44b6-400d-ba27-8d3fbeffc8a5")) name_lookup(datasetKey = "289244ee-e1c1-49aa-b2d7-d379391ce265", origin = c("SOURCE", "DENORMED_CLASSIFICATION")) # Pass on curl options name_lookup(query='Cnaemidophorus', rank="genus", curlopts = list(verbose = TRUE)) ## End(Not run)
Parse taxon names using the GBIF name parser.
name_parse(scientificname, curlopts = list())
name_parse(scientificname, curlopts = list())
scientificname |
A character vector of scientific names. |
curlopts |
list of named curl options passed on to
|
A data.frame
containing fields extracted from parsed
taxon names. Fields returned are the union of fields extracted from
all species names in scientificname
.
John Baumgartner ([email protected])
https://www.gbif.org/developer/species#parser
## Not run: name_parse(scientificname='x Agropogon littoralis') name_parse(c('Arrhenatherum elatius var. elatius', 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale', 'Vanessa atalanta (Linnaeus, 1758)')) name_parse("Ajuga pyramidata") name_parse("Ajuga pyramidata x reptans") # Pass on curl options # res <- name_parse(c('Arrhenatherum elatius var. elatius', # 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale', # 'Vanessa atalanta (Linnaeus, 1758)'), curlopts=list(verbose=TRUE)) ## End(Not run)
## Not run: name_parse(scientificname='x Agropogon littoralis') name_parse(c('Arrhenatherum elatius var. elatius', 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale', 'Vanessa atalanta (Linnaeus, 1758)')) name_parse("Ajuga pyramidata") name_parse("Ajuga pyramidata x reptans") # Pass on curl options # res <- name_parse(c('Arrhenatherum elatius var. elatius', # 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale', # 'Vanessa atalanta (Linnaeus, 1758)'), curlopts=list(verbose=TRUE)) ## End(Not run)
A quick and simple autocomplete service that returns up to 20 name usages by doing prefix matching against the scientific name. Results are ordered by relevance.
name_suggest( q = NULL, datasetKey = NULL, rank = NULL, fields = NULL, start = NULL, limit = 100, curlopts = list() )
name_suggest( q = NULL, datasetKey = NULL, rank = NULL, fields = NULL, start = NULL, limit = 100, curlopts = list() )
q |
(character, required) Simple search parameter. The value for this parameter can be a simple word or a phrase. Wildcards can be added to the simple word parameters only, e.g. q=puma |
datasetKey |
(character) Filters by the checklist dataset key (a uuid, see examples) |
rank |
(character) A taxonomic rank. One of class, cultivar, cultivar_group, domain, family, form, genus, informal, infrageneric_name, infraorder, infraspecific_name, infrasubspecific_name, kingdom, order, phylum, section, series, species, strain, subclass, subfamily, subform, subgenus, subkingdom, suborder, subphylum, subsection, subseries, subspecies, subtribe, subvariety, superclass, superfamily, superorder, superphylum, suprageneric_name, tribe, unranked, or variety. |
fields |
(character) Fields to return in output data.frame (simply prunes columns off) |
start |
Record number to start at. Default: 0. Use in combination
with |
limit |
Number of records to return. Default: 100. Maximum: 1000. |
curlopts |
list of named curl options passed on to
|
A list, with two elements data
(tibble) and hierarchy
(list of
data.frame's). If 'higherClassificationMap' is one of the fields
requested,
then hierarchy
is a list of data.frame's; if not included, hierarchy
is an empty list.
Some parameters can take many inputs, and treated as 'OR' (e.g., a or b or c). The following take many inputs:
rank
datasetKey
https://www.gbif.org/developer/species#searching
## Not run: name_suggest(q='Puma concolor') name_suggest(q='Puma') name_suggest(q='Puma', rank="genus") name_suggest(q='Puma', rank="subspecies") name_suggest(q='Puma', rank="species") name_suggest(q='Puma', rank="infraspecific_name") name_suggest(q='Puma', limit=2) name_suggest(q='Puma', fields=c('key','canonicalName')) name_suggest(q='Puma', fields=c('key','canonicalName', 'higherClassificationMap')) # Some parameters accept many inputs, treated as OR name_suggest(rank = c("family", "genus")) name_suggest(datasetKey = c("73605f3a-af85-4ade-bbc5-522bfb90d847", "d7c60346-44b6-400d-ba27-8d3fbeffc8a5")) # If 'higherClassificationMap' in fields, a list is returned name_suggest(q='Puma', fields=c('key','higherClassificationMap')) # Pass on curl options name_suggest(q='Puma', limit=200, curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: name_suggest(q='Puma concolor') name_suggest(q='Puma') name_suggest(q='Puma', rank="genus") name_suggest(q='Puma', rank="subspecies") name_suggest(q='Puma', rank="species") name_suggest(q='Puma', rank="infraspecific_name") name_suggest(q='Puma', limit=2) name_suggest(q='Puma', fields=c('key','canonicalName')) name_suggest(q='Puma', fields=c('key','canonicalName', 'higherClassificationMap')) # Some parameters accept many inputs, treated as OR name_suggest(rank = c("family", "genus")) name_suggest(datasetKey = c("73605f3a-af85-4ade-bbc5-522bfb90d847", "d7c60346-44b6-400d-ba27-8d3fbeffc8a5")) # If 'higherClassificationMap' in fields, a list is returned name_suggest(q='Puma', fields=c('key','higherClassificationMap')) # Pass on curl options name_suggest(q='Puma', limit=200, curlopts = list(verbose=TRUE)) ## End(Not run)
Lookup details for specific names in all taxonomies in GBIF.
name_usage( key = NULL, name = NULL, data = "all", language = NULL, datasetKey = NULL, uuid = NULL, rank = NULL, shortname = NULL, start = 0, limit = 100, return = NULL, curlopts = list() )
name_usage( key = NULL, name = NULL, data = "all", language = NULL, datasetKey = NULL, uuid = NULL, rank = NULL, shortname = NULL, start = 0, limit = 100, return = NULL, curlopts = list() )
key |
(numeric or character) A GBIF key for a taxon |
name |
(character) Filters by a case insensitive, canonical namestring, e.g. 'Puma concolor' |
data |
(character) Specify an option to select what data is returned. See Description below. |
language |
(character) Language, default is english |
datasetKey |
(character) Filters by the dataset's key (a uuid). Must be length=1 |
uuid |
(character) A dataset key |
rank |
(character) Taxonomic rank. Filters by taxonomic rank as one of: CLASS, CULTIVAR, CULTIVAR_GROUP, DOMAIN, FAMILY, FORM, GENUS, INFORMAL, INFRAGENERIC_NAME, INFRAORDER, INFRASPECIFIC_NAME, INFRASUBSPECIFIC_NAME, KINGDOM, ORDER, PHYLUM, SECTION, SERIES, SPECIES, STRAIN, SUBCLASS, SUBFAMILY, SUBFORM, SUBGENUS, SUBKINGDOM, SUBORDER, SUBPHYLUM, SUBSECTION, SUBSERIES, SUBSPECIES, SUBTRIBE, SUBVARIETY, SUPERCLASS, SUPERFAMILY, SUPERORDER, SUPERPHYLUM, SUPRAGENERIC_NAME, TRIBE, UNRANKED, VARIETY |
shortname |
(character) A short name for a dataset - it may not do anything |
start |
Record number to start at. Default: 0. |
limit |
Number of records to return. Default: 100. |
return |
Defunct. All components are returned; index to the one(s) you want |
curlopts |
list of named curl options passed on to
|
This service uses fuzzy lookup so that you can put in partial names and you should get back those things that match. See examples below.
This function is different from name_lookup()
in that that function
searches for names. This function encompasses a bunch of API endpoints,
most of which require that you already have a taxon key, but there is one
endpoint that allows name searches (see examples below).
Note that data="verbatim"
hasn't been working.
Options for the data parameter are: 'all', 'verbatim', 'name', 'parents', 'children', 'related', 'synonyms', 'descriptions','distributions', 'media', 'references', 'speciesProfiles', 'vernacularNames', 'typeSpecimens', 'root', 'iucnRedListCategory'
This function used to be vectorized with respect to the data
parameter, where you could pass in multiple values and the function
internally loops over each option making separate requests. This has been
removed. You can still loop over many options for the data
parameter,
just use an lapply
family function, or a for loop, etc.
See name_issues()
for more information about issues in issues
column.
An object of class gbif, which is a S3 class list, with slots for
metadata (meta
) and the data itself (data
). In addition, the
object has attributes listing the user supplied arguments and type of
search, which is, differently from occurrence data, always equals to
'single' even if multiple values for some parameters are given. meta
is a list of length four with offset, limit, endOfRecords and count fields.
data
is a tibble (aka data.frame) containing all information about
the found taxa.
These parameters used to accept many inputs, but no longer do:
rank
name
langugae
datasetKey
https://www.gbif.org/developer/species#nameUsages
## Not run: # A single name usage name_usage(key=1) # Name usage for a taxonomic name name_usage(name='Puma', rank="GENUS") # Name usage for all taxa in a dataset # (set sufficient high limit, but less than 100000) # name_usage(datasetKey = "9ff7d317-609b-4c08-bd86-3bc404b77c42", # limit = 10000) # All name usages name_usage() # References for a name usage name_usage(key=2435099, data='references') # Species profiles, descriptions name_usage(key=3119195, data='speciesProfiles') name_usage(key=3119195, data='descriptions') name_usage(key=2435099, data='children') # Vernacular names for a name usage name_usage(key=3119195, data='vernacularNames') # Limit number of results returned name_usage(key=3119195, data='vernacularNames', limit=3) # Search for names by dataset with datasetKey parameter name_usage(datasetKey="d7dddbf4-2cf0-4f39-9b2a-bb099caae36c") # Search for a particular language name_usage(key=3119195, language="FRENCH", data='vernacularNames') # get root usage with a uuid name_usage(data = "root", uuid = "73605f3a-af85-4ade-bbc5-522bfb90d847") # search by language name_usage(language = "spanish") # Pass on curl options name_usage(name='Puma concolor', limit=300, curlopts = list(verbose=TRUE)) # look up iucn red list category name_usage(key = 7707728, data = 'iucnRedListCategory') ## End(Not run)
## Not run: # A single name usage name_usage(key=1) # Name usage for a taxonomic name name_usage(name='Puma', rank="GENUS") # Name usage for all taxa in a dataset # (set sufficient high limit, but less than 100000) # name_usage(datasetKey = "9ff7d317-609b-4c08-bd86-3bc404b77c42", # limit = 10000) # All name usages name_usage() # References for a name usage name_usage(key=2435099, data='references') # Species profiles, descriptions name_usage(key=3119195, data='speciesProfiles') name_usage(key=3119195, data='descriptions') name_usage(key=2435099, data='children') # Vernacular names for a name usage name_usage(key=3119195, data='vernacularNames') # Limit number of results returned name_usage(key=3119195, data='vernacularNames', limit=3) # Search for names by dataset with datasetKey parameter name_usage(datasetKey="d7dddbf4-2cf0-4f39-9b2a-bb099caae36c") # Search for a particular language name_usage(key=3119195, language="FRENCH", data='vernacularNames') # get root usage with a uuid name_usage(data = "root", uuid = "73605f3a-af85-4ade-bbc5-522bfb90d847") # search by language name_usage(language = "spanish") # Pass on curl options name_usage(name='Puma concolor', limit=300, curlopts = list(verbose=TRUE)) # look up iucn red list category name_usage(key = 7707728, data = 'iucnRedListCategory') ## End(Not run)
Get data about GBIF networks
network( data = "all", uuid = NULL, query = NULL, identifier = NULL, identifierType = NULL, limit = 100, start = NULL, curlopts = list() ) network_constituents(uuid = NULL, limit = 100, start = 0)
network( data = "all", uuid = NULL, query = NULL, identifier = NULL, identifierType = NULL, limit = 100, start = NULL, curlopts = list() ) network_constituents(uuid = NULL, limit = 100, start = 0)
data |
The type of data to get. One or more of: 'contact', 'endpoint',
'identifier', 'tag', 'machineTag', 'comment', 'constituents', or the
special 'all'. Default: |
uuid |
UUID of the data network provider. This must be specified if data is anything other than 'all'. Only 1 can be passed in |
query |
Query nodes. Only used when |
identifier |
The value for this parameter can be a simple string or
integer, e.g. |
identifierType |
Used in combination with the identifier parameter to filter identifiers by identifier type. See details. This parameter doesn't seem to work right now. |
limit |
Number of records to return. Default: 100. Maximum: 1000. |
start |
Record number to start at. Default: 0. Use in combination
with |
curlopts |
list of named curl options passed on to
|
identifierType options:
DOI No description.
FTP No description.
GBIF_NODE Identifies the node (e.g: DK
for Denmark, sp2000
for Species 2000).
GBIF_PARTICIPANT Participant identifier from the GBIF IMS Filemaker system.
GBIF_PORTAL Indicates the identifier originated from an auto_increment column in the portal.data_provider or portal.data_resource table respectively.
HANDLER No description.
LSID Reference controlled by a separate system, used for example by DOI.
SOURCE_ID No description.
UNKNOWN No description.
URI No description.
URL No description.
UUID No description.
Get various information about GBIF networks. network_constituents()
is a
convenience function that allows you to get all the datasets in a network.
network()
returns a list
network_constituents()
returns a data.frame of datasets in the network
https://www.gbif.org/developer/registry#networks
## Not run: network() network(uuid='2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6') network_constituents('2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6') # curl options network(curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: network() network(uuid='2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6') network_constituents('2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6') # curl options network(curlopts = list(verbose=TRUE)) ## End(Not run)
Networks metadata.
networks( data = "all", uuid = NULL, query = NULL, identifier = NULL, identifierType = NULL, limit = 100, start = NULL, curlopts = list() )
networks( data = "all", uuid = NULL, query = NULL, identifier = NULL, identifierType = NULL, limit = 100, start = NULL, curlopts = list() )
data |
The type of data to get. One or more of: 'contact', 'endpoint',
'identifier', 'tag', 'machineTag', 'comment', 'constituents', or the
special 'all'. Default: |
uuid |
UUID of the data network provider. This must be specified if data is anything other than 'all'. Only 1 can be passed in |
query |
Query nodes. Only used when |
identifier |
The value for this parameter can be a simple string or
integer, e.g. |
identifierType |
Used in combination with the identifier parameter to filter identifiers by identifier type. See details. This parameter doesn't seem to work right now. |
limit |
Number of records to return. Default: 100. Maximum: 1000. |
start |
Record number to start at. Default: 0. Use in combination
with |
curlopts |
list of named curl options passed on to
|
identifierType options:
DOI No description.
FTP No description.
GBIF_NODE Identifies the node (e.g: DK
for Denmark, sp2000
for Species 2000).
GBIF_PARTICIPANT Participant identifier from the GBIF IMS Filemaker system.
GBIF_PORTAL Indicates the identifier originated from an auto_increment column in the portal.data_provider or portal.data_resource table respectively.
HANDLER No description.
LSID Reference controlled by a separate system, used for example by DOI.
SOURCE_ID No description.
UNKNOWN No description.
URI No description.
URL No description.
UUID No description.
https://www.gbif.org/developer/registry#networks
## Not run: networks() networks(uuid='2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6') # curl options networks(curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: networks() networks(uuid='2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6') # curl options networks(curlopts = list(verbose=TRUE)) ## End(Not run)
Nodes metadata.
nodes( data = "all", uuid = NULL, query = NULL, identifier = NULL, identifierType = NULL, limit = 100, start = NULL, isocode = NULL, curlopts = list() )
nodes( data = "all", uuid = NULL, query = NULL, identifier = NULL, identifierType = NULL, limit = 100, start = NULL, isocode = NULL, curlopts = list() )
data |
The type of data to get. One or more of: 'organization',
'endpoint', 'identifier', 'tag', 'machineTag', 'comment',
'pendingEndorsement', 'country', 'dataset', 'installation', or the
special 'all'. Default: |
uuid |
UUID of the data node provider. This must be specified if data is anything other than 'all'. |
query |
Query nodes. Only used when |
identifier |
The value for this parameter can be a simple string or
integer, e.g. |
identifierType |
Used in combination with the identifier parameter to filter identifiers by identifier type. See details. This parameter doesn't seem to work right now. |
limit |
Number of records to return. Default: 100. Maximum: 1000. |
start |
Record number to start at. Default: 0. Use in combination
with |
isocode |
A 2 letter country code. Only used if data='country'. |
curlopts |
list of named curl options passed on to
|
identifierType options:
DOI No description.
FTP No description.
GBIF_NODE Identifies the node (e.g: DK
for Denmark, sp2000
for Species 2000).
GBIF_PARTICIPANT Participant identifier from the GBIF IMS Filemaker system.
GBIF_PORTAL Indicates the identifier originated from an auto_increment column in the portal.data_provider or portal.data_resource table respectively.
HANDLER No description.
LSID Reference controlled by a separate system, used for example by DOI.
SOURCE_ID No description.
UNKNOWN No description.
URI No description.
URL No description.
UUID No description.
https://www.gbif.org/developer/registry#nodes
## Not run: nodes(limit=5) nodes(uuid="1193638d-32d1-43f0-a855-8727c94299d8") nodes(data='identifier', uuid="03e816b3-8f58-49ae-bc12-4e18b358d6d9") nodes(data=c('identifier','organization','comment'), uuid="03e816b3-8f58-49ae-bc12-4e18b358d6d9") uuids = c("8cb55387-7802-40e8-86d6-d357a583c596", "02c40d2a-1cba-4633-90b7-e36e5e97aba8", "7a17efec-0a6a-424c-b743-f715852c3c1f", "b797ce0f-47e6-4231-b048-6b62ca3b0f55", "1193638d-32d1-43f0-a855-8727c94299d8", "d3499f89-5bc0-4454-8cdb-60bead228a6d", "cdc9736d-5ff7-4ece-9959-3c744360cdb3", "a8b16421-d80b-4ef3-8f22-098b01a89255", "8df8d012-8e64-4c8a-886e-521a3bdfa623", "b35cf8f1-748d-467a-adca-4f9170f20a4e", "03e816b3-8f58-49ae-bc12-4e18b358d6d9", "073d1223-70b1-4433-bb21-dd70afe3053b", "07dfe2f9-5116-4922-9a8a-3e0912276a72", "086f5148-c0a8-469b-84cc-cce5342f9242", "0909d601-bda2-42df-9e63-a6d51847ebce", "0e0181bf-9c78-4676-bdc3-54765e661bb8", "109aea14-c252-4a85-96e2-f5f4d5d088f4", "169eb292-376b-4cc6-8e31-9c2c432de0ad", "1e789bc9-79fc-4e60-a49e-89dfc45a7188", "1f94b3ca-9345-4d65-afe2-4bace93aa0fe") res <- lapply(uuids, function(x) nodes(x, data='identifier')$data) res <- res[!sapply(res, NROW)==0] res[1] # Pass on curl options nodes(limit=20, curlopts=list(verbose=TRUE)) ## End(Not run)
## Not run: nodes(limit=5) nodes(uuid="1193638d-32d1-43f0-a855-8727c94299d8") nodes(data='identifier', uuid="03e816b3-8f58-49ae-bc12-4e18b358d6d9") nodes(data=c('identifier','organization','comment'), uuid="03e816b3-8f58-49ae-bc12-4e18b358d6d9") uuids = c("8cb55387-7802-40e8-86d6-d357a583c596", "02c40d2a-1cba-4633-90b7-e36e5e97aba8", "7a17efec-0a6a-424c-b743-f715852c3c1f", "b797ce0f-47e6-4231-b048-6b62ca3b0f55", "1193638d-32d1-43f0-a855-8727c94299d8", "d3499f89-5bc0-4454-8cdb-60bead228a6d", "cdc9736d-5ff7-4ece-9959-3c744360cdb3", "a8b16421-d80b-4ef3-8f22-098b01a89255", "8df8d012-8e64-4c8a-886e-521a3bdfa623", "b35cf8f1-748d-467a-adca-4f9170f20a4e", "03e816b3-8f58-49ae-bc12-4e18b358d6d9", "073d1223-70b1-4433-bb21-dd70afe3053b", "07dfe2f9-5116-4922-9a8a-3e0912276a72", "086f5148-c0a8-469b-84cc-cce5342f9242", "0909d601-bda2-42df-9e63-a6d51847ebce", "0e0181bf-9c78-4676-bdc3-54765e661bb8", "109aea14-c252-4a85-96e2-f5f4d5d088f4", "169eb292-376b-4cc6-8e31-9c2c432de0ad", "1e789bc9-79fc-4e60-a49e-89dfc45a7188", "1f94b3ca-9345-4d65-afe2-4bace93aa0fe") res <- lapply(uuids, function(x) nodes(x, data='identifier')$data) res <- res[!sapply(res, NROW)==0] res[1] # Pass on curl options nodes(limit=20, curlopts=list(verbose=TRUE)) ## End(Not run)
Get number of occurrence records.
occ_count(..., occurrenceStatus = "PRESENT", curlopts = list())
occ_count(..., occurrenceStatus = "PRESENT", curlopts = list())
... |
parameters passed to |
occurrenceStatus |
(character) Default is "PRESENT". Specify whether search should return "PRESENT" or "ABSENT" data. |
curlopts |
(list) curl options. |
occ_count()
is a short convenience wrapper for
occ_search(limit=0)$meta$count
.
The current version (since rgbif 3.7.6) of occ_count()
uses a different
GBIF API endpoint from previous versions. This change greatly improves
the usability of occ_count()
. Legacy parameters georeferenced
, type
,
date
, to
, from
are no longer supported and not guaranteed to work
correctly.
Multiple values of the type c("a","b")
will give an error,
but "a;b"
will work.
The occurrence count of the occ_search()
query.
occ_count_year()
, occ_count_country()
, occ_count_pub_country()
,
occ_count_basis_of_record()
## Not run: # total occurrences mediated by GBIF occ_count() # should be > 2 billion! # number of plant occurrences occ_count(kingdomKey=name_backbone("Plantea")$usageKey) occ_count(scientificName = 'Ursus americanus') occ_count(country="DK") # found in Denmark occ_count(country="DK;US") # found in Denmark and United States occ_count(publishingCountry="US") # published by the United States # number of repatriated eBird records in India occ_count(repatriated = TRUE,country="IN") occ_count(taxonKey=212) # number of bird occurrences # between years 1800-1900 occ_count(basisOfRecord="PRESERVED_SPECIMEN", year="1800,1900") occ_count(recordedBy="John Waller") # recorded by John Waller occ_count(decimalLatitude=0, decimalLongitude=0) # exactly on 0,0 # close to a known iso2 centroid occ_count(distanceFromCentroidInMeters="0,2000") # close to a known iso2 centroid in Sweden occ_count(distanceFromCentroidInMeters="0,2000",country="SE") occ_count(hasCoordinate=TRUE) # with coordinates occ_count(protocol = "DIGIR") # published using DIGIR format occ_count(mediaType = 'StillImage') # with images # number of occurrences iucn status "critically endangered" occ_count(iucnRedListCategory="CR") occ_count(verbatimScientificName="Calopteryx splendens;Calopteryx virgo") occ_count( geometry="POLYGON((24.70938 48.9221,24.71056 48.92175,24.71107 48.92296,24.71002 48.92318,24.70938 48.9221))") # getting a table of counts using the facets interface # occurrence counts by year occ_count(facet="year") occ_count(facet="year",facetLimit=400) # top scientificNames from Japan occ_count(facet="scientificName",country="JP") # top countries publishing specimen bird records between 1850 and 1880 occ_count(facet="scientificName",taxonKey=212,basisOfRecord="PRESERVED_SPECIMEN" ,year="1850,1880") # Number of present or absence records of Elephants occ_count(facet="occurrenceStatus",scientificName="Elephantidae") # top 100 datasets publshing occurrences to GBIF occ_count(facet="datasetKey",facetLimit=100) # top datasets publishing country centroids on GBIF occ_count(facet="datasetKey",distanceFromCentroidInMeters="0") # common values for coordinateUncertaintyInMeters for museum specimens occ_count(facet="coordinateUncertaintyInMeters",basisOfRecord="PRESERVED_SPECIMEN") # number of iucn listed bird and insect occurrences in Mexico occ_count(facet="iucnRedListCategory",taxonKey="212;216",country="MX") # most common latitude values mediated by GBIF occ_count(facet="decimalLatitude") # top iNaturalist users publishing research-grade obs to GBIF occ_count(facet="recordedBy",datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7") # top 100 iNaturalist users from Ukraine occ_count(facet="recordedBy",datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7" ,country="UA",facetLimit=100) # top institutions publishing specimen occurrences to GBIF occ_count(facet="institutionCode",basisOfRecord="PRESERVED_SPECIMEN") ## End(Not run)
## Not run: # total occurrences mediated by GBIF occ_count() # should be > 2 billion! # number of plant occurrences occ_count(kingdomKey=name_backbone("Plantea")$usageKey) occ_count(scientificName = 'Ursus americanus') occ_count(country="DK") # found in Denmark occ_count(country="DK;US") # found in Denmark and United States occ_count(publishingCountry="US") # published by the United States # number of repatriated eBird records in India occ_count(repatriated = TRUE,country="IN") occ_count(taxonKey=212) # number of bird occurrences # between years 1800-1900 occ_count(basisOfRecord="PRESERVED_SPECIMEN", year="1800,1900") occ_count(recordedBy="John Waller") # recorded by John Waller occ_count(decimalLatitude=0, decimalLongitude=0) # exactly on 0,0 # close to a known iso2 centroid occ_count(distanceFromCentroidInMeters="0,2000") # close to a known iso2 centroid in Sweden occ_count(distanceFromCentroidInMeters="0,2000",country="SE") occ_count(hasCoordinate=TRUE) # with coordinates occ_count(protocol = "DIGIR") # published using DIGIR format occ_count(mediaType = 'StillImage') # with images # number of occurrences iucn status "critically endangered" occ_count(iucnRedListCategory="CR") occ_count(verbatimScientificName="Calopteryx splendens;Calopteryx virgo") occ_count( geometry="POLYGON((24.70938 48.9221,24.71056 48.92175,24.71107 48.92296,24.71002 48.92318,24.70938 48.9221))") # getting a table of counts using the facets interface # occurrence counts by year occ_count(facet="year") occ_count(facet="year",facetLimit=400) # top scientificNames from Japan occ_count(facet="scientificName",country="JP") # top countries publishing specimen bird records between 1850 and 1880 occ_count(facet="scientificName",taxonKey=212,basisOfRecord="PRESERVED_SPECIMEN" ,year="1850,1880") # Number of present or absence records of Elephants occ_count(facet="occurrenceStatus",scientificName="Elephantidae") # top 100 datasets publshing occurrences to GBIF occ_count(facet="datasetKey",facetLimit=100) # top datasets publishing country centroids on GBIF occ_count(facet="datasetKey",distanceFromCentroidInMeters="0") # common values for coordinateUncertaintyInMeters for museum specimens occ_count(facet="coordinateUncertaintyInMeters",basisOfRecord="PRESERVED_SPECIMEN") # number of iucn listed bird and insect occurrences in Mexico occ_count(facet="iucnRedListCategory",taxonKey="212;216",country="MX") # most common latitude values mediated by GBIF occ_count(facet="decimalLatitude") # top iNaturalist users publishing research-grade obs to GBIF occ_count(facet="recordedBy",datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7") # top 100 iNaturalist users from Ukraine occ_count(facet="recordedBy",datasetKey="50c9509d-22c7-4a22-a47d-8c48425ef4a7" ,country="UA",facetLimit=100) # top institutions publishing specimen occurrences to GBIF occ_count(facet="institutionCode",basisOfRecord="PRESERVED_SPECIMEN") ## End(Not run)
Get quick pre-computed occurrence counts of a limited number of dimensions.
occ_count_country(publishingCountry = NULL) occ_count_pub_country(country = NULL) occ_count_year(year = NULL) occ_count_basis_of_record(curlopts = list())
occ_count_country(publishingCountry = NULL) occ_count_pub_country(country = NULL) occ_count_year(year = NULL) occ_count_basis_of_record(curlopts = list())
publishingCountry |
The 2-letter country code (as per ISO-3166-1) the country from which the occurrence was published. |
country |
(character) The 2-letter country code (ISO-3166-1) in which the occurrence was recorded. |
year |
The 4 digit year. Supports range queries, 'smaller,larger' (e.g., '1990,1991', whereas 1991, 1990' wouldn't work). |
curlopts |
(list) curl options. |
Get quick pre-computed counts of a limited number of dimensions.
occ_count_country()
will return a data.frame with occurrence counts by
country. By using occ_count_country(publishingCountry="DK")
will
return the occurrence contributions Denmark has made to each country.
occ_count_pub_country()
will return a data.frame with occurrence counts by
publishing country. Using occ_count_pub_country(country="DK")
, will return
the occurrence contributions each country has made to that focal country=DK
.
occ_count_year()
will return a data.frame with the total occurrences
mediated by GBIF for each year. By using occ_counts_year(year="1800,1900")
will only return counts for that range.
occ_count_basis_of_record()
will return a data.frame with total occurrences
mediated by GBIF for each basis of record.
A data.frame
of counts.
## Not run: # total occurrence counts for all countries and iso2 places occ_count_country() # the occurrences Mexico has published in other countries occ_count_country("MX") # the occurrences Denmark has published in other countries occ_count_country("DK") # the occurrences other countries have published in Denmark occ_count_pub_country("DK") # the occurrences other countries have published in Mexico occ_count_pub_country("MX") # total occurrence counts for each year that an occurrence was # recorded or collected. occ_count_year() # supports ranges occ_count_year("1800,1900") # table of occurrence counts by basis of record occ_count_basis_of_record() ## End(Not run)
## Not run: # total occurrence counts for all countries and iso2 places occ_count_country() # the occurrences Mexico has published in other countries occ_count_country("MX") # the occurrences Denmark has published in other countries occ_count_country("DK") # the occurrences other countries have published in Denmark occ_count_pub_country("DK") # the occurrences other countries have published in Mexico occ_count_pub_country("MX") # total occurrence counts for each year that an occurrence was # recorded or collected. occ_count_year() # supports ranges occ_count_year("1800,1900") # table of occurrence counts by basis of record occ_count_basis_of_record() ## End(Not run)
Legacy alternative to occ_search
occ_data( taxonKey = NULL, scientificName = NULL, country = NULL, publishingCountry = NULL, hasCoordinate = NULL, typeStatus = NULL, recordNumber = NULL, lastInterpreted = NULL, continent = NULL, geometry = NULL, geom_big = "asis", geom_size = 40, geom_n = 10, recordedBy = NULL, recordedByID = NULL, identifiedByID = NULL, basisOfRecord = NULL, datasetKey = NULL, eventDate = NULL, catalogNumber = NULL, year = NULL, month = NULL, decimalLatitude = NULL, decimalLongitude = NULL, elevation = NULL, depth = NULL, institutionCode = NULL, collectionCode = NULL, hasGeospatialIssue = NULL, issue = NULL, search = NULL, mediaType = NULL, subgenusKey = NULL, repatriated = NULL, phylumKey = NULL, kingdomKey = NULL, classKey = NULL, orderKey = NULL, familyKey = NULL, genusKey = NULL, speciesKey = NULL, establishmentMeans = NULL, degreeOfEstablishment = NULL, protocol = NULL, license = NULL, organismId = NULL, publishingOrg = NULL, stateProvince = NULL, waterBody = NULL, locality = NULL, occurrenceStatus = "PRESENT", gadmGid = NULL, coordinateUncertaintyInMeters = NULL, verbatimScientificName = NULL, eventId = NULL, identifiedBy = NULL, networkKey = NULL, verbatimTaxonId = NULL, occurrenceId = NULL, organismQuantity = NULL, organismQuantityType = NULL, relativeOrganismQuantity = NULL, iucnRedListCategory = NULL, lifeStage = NULL, isInCluster = NULL, distanceFromCentroidInMeters = NULL, skip_validate = TRUE, limit = 500, start = 0, curlopts = list(http_version = 2) )
occ_data( taxonKey = NULL, scientificName = NULL, country = NULL, publishingCountry = NULL, hasCoordinate = NULL, typeStatus = NULL, recordNumber = NULL, lastInterpreted = NULL, continent = NULL, geometry = NULL, geom_big = "asis", geom_size = 40, geom_n = 10, recordedBy = NULL, recordedByID = NULL, identifiedByID = NULL, basisOfRecord = NULL, datasetKey = NULL, eventDate = NULL, catalogNumber = NULL, year = NULL, month = NULL, decimalLatitude = NULL, decimalLongitude = NULL, elevation = NULL, depth = NULL, institutionCode = NULL, collectionCode = NULL, hasGeospatialIssue = NULL, issue = NULL, search = NULL, mediaType = NULL, subgenusKey = NULL, repatriated = NULL, phylumKey = NULL, kingdomKey = NULL, classKey = NULL, orderKey = NULL, familyKey = NULL, genusKey = NULL, speciesKey = NULL, establishmentMeans = NULL, degreeOfEstablishment = NULL, protocol = NULL, license = NULL, organismId = NULL, publishingOrg = NULL, stateProvince = NULL, waterBody = NULL, locality = NULL, occurrenceStatus = "PRESENT", gadmGid = NULL, coordinateUncertaintyInMeters = NULL, verbatimScientificName = NULL, eventId = NULL, identifiedBy = NULL, networkKey = NULL, verbatimTaxonId = NULL, occurrenceId = NULL, organismQuantity = NULL, organismQuantityType = NULL, relativeOrganismQuantity = NULL, iucnRedListCategory = NULL, lifeStage = NULL, isInCluster = NULL, distanceFromCentroidInMeters = NULL, skip_validate = TRUE, limit = 500, start = 0, curlopts = list(http_version = 2) )
taxonKey |
(numeric) A taxon key from the GBIF backbone. All included
and synonym taxa are included in the search, so a search for aves with
taxononKey=212 will match all birds, no matter which species. You can pass
many keys to |
scientificName |
A scientific name from the GBIF backbone. All included and synonym taxa are included in the search. |
country |
(character) The 2-letter country code (ISO-3166-1)
in which the occurrence was recorded. |
publishingCountry |
The 2-letter country code (as per ISO-3166-1) of
the country in which the occurrence was recorded. See
|
hasCoordinate |
(logical) Return only occurrence records with lat/long
data ( |
typeStatus |
Type status of the specimen. One of many options. |
recordNumber |
Number recorded by collector of the data, different from GBIF record number. |
lastInterpreted |
Date the record was last modified in GBIF, in ISO 8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, 'smaller,larger' (e.g., '1990,1991', whereas '1991,1990' wouldn't work). |
continent |
The source supplied continent.
Continent is not inferred but only populated if provided by the dataset publisher. Applying this filter may exclude many relevant records. |
geometry |
(character) Searches for occurrences inside a polygon in Well Known Text (WKT) format. A WKT shape written as either
For Example, "POLYGON((37.08 46.86,38.06 46.86,38.06 47.28,37.08 47.28, 37.0 46.8))". See also the section WKT below. |
geom_big |
(character) One"bbox" or "asis" (default). |
geom_size |
(integer) An integer indicating size of the cell. Default: 40. |
geom_n |
(integer) An integer indicating number of cells in each dimension. Default: 10. |
recordedBy |
(character) The person who recorded the occurrence. |
recordedByID |
(character) Identifier (e.g. ORCID) for the person who recorded the occurrence |
identifiedByID |
(character) Identifier (e.g. ORCID) for the person who provided the taxonomic identification of the occurrence. |
basisOfRecord |
(character) The specific nature of the data record. See here.
|
datasetKey |
(character) The occurrence dataset uuid key. That can be found in the dataset page url. For example, "7e380070-f762-11e1-a439-00145 eb45e9a" is the key for Natural History Museum (London) Collection Specimens. |
eventDate |
(character) Occurrence date in ISO 8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, 'smaller,larger' ('1990,1991', whereas '1991,1990' wouldn't work). |
catalogNumber |
(character) An identifier of any form assigned by the source within a physical collection or digital dataset for the record which may not unique, but should be fairly unique in combination with the institution and collection code. |
year |
The 4 digit year. A year of 98 will be interpreted as AD 98. Supports range queries, 'smaller,larger' (e.g., '1990,1991', whereas 1991, 1990' wouldn't work). |
month |
The month of the year, starting with 1 for January. Supports range queries, 'smaller,larger' (e.g., '1,2', whereas '2,1' wouldn't work). |
decimalLatitude |
Latitude in decimals between -90 and 90 based on WGS84. Supports range queries, 'smaller,larger' (e.g., '25,30', whereas '30,25' wouldn't work). |
decimalLongitude |
Longitude in decimals between -180 and 180 based on WGS84. Supports range queries (e.g., '-0.4,-0.2', whereas '-0.2,-0.4' wouldn't work). |
elevation |
Elevation in meters above sea level. Supports range queries, 'smaller,larger' (e.g., '5,30', whereas '30,5' wouldn't work). |
depth |
Depth in meters relative to elevation. For example 10 meters below a lake surface with given elevation. Supports range queries, 'smaller,larger' (e.g., '5,30', whereas '30,5' wouldn't work). |
institutionCode |
An identifier of any form assigned by the source to identify the institution the record belongs to. |
collectionCode |
(character) An identifier of any form assigned by the source to identify the physical collection or digital dataset uniquely within the text of an institution. |
hasGeospatialIssue |
(logical) Includes/excludes occurrence records
which contain spatial issues (as determined in our record interpretation),
i.e. |
issue |
(character) One or more of many possible issues with each occurrence record. Issues passed to this parameter filter results by the issue. One of many options. See here for definitions. |
search |
(character) Query terms. The value for this parameter can be a simple word or a phrase. For example, search="puma" |
mediaType |
(character) Media type of "MovingImage", "Sound", or "StillImage". |
subgenusKey |
(numeric) Subgenus classification key. |
repatriated |
(character) Searches for records whose publishing country is different to the country where the record was recorded in. |
phylumKey |
(numeric) Phylum classification key. |
kingdomKey |
(numeric) Kingdom classification key. |
classKey |
(numeric) Class classification key. |
orderKey |
(numeric) Order classification key. |
familyKey |
(numeric) Family classification key. |
genusKey |
(numeric) Genus classification key. |
speciesKey |
(numeric) Species classification key. |
establishmentMeans |
(character) provides information about whether an organism or organisms have been introduced to a given place and time through the direct or indirect activity of modern humans.
|
degreeOfEstablishment |
(character) Provides information about degree to which an Organism survives, reproduces, and expands its range at the given place and time. One of many options. |
protocol |
(character) Protocol or mechanism used to provide the occurrence record. One of many options. |
license |
(character) The type license applied to the dataset or record.
|
organismId |
(numeric) An identifier for the Organism instance (as opposed to a particular digital record of the Organism). May be a globally unique identifier or an identifier specific to the data set. |
publishingOrg |
(character) The publishing organization key (a UUID). |
stateProvince |
(character) The name of the next smaller administrative region than country (state, province, canton, department, region, etc.) in which the Location occurs. |
waterBody |
(character) The name of the water body in which the locations occur |
locality |
(character) The specific description of the place. |
occurrenceStatus |
(character) Default is "PRESENT". Specify whether search should return "PRESENT" or "ABSENT" data. |
gadmGid |
(character) The gadm id of the area occurrences are desired from. https://gadm.org/. |
coordinateUncertaintyInMeters |
A number or range between 0-1,000,000 which specifies the desired coordinate uncertainty. A coordinateUncertainty InMeters=1000 will be interpreted all records with exactly 1000m. Supports range queries, 'smaller,larger' (e.g., '1000,10000', whereas '10000,1000' wouldn't work). |
verbatimScientificName |
(character) Scientific name as provided by the source. |
eventId |
(character) identifier(s) for a sampling event. |
identifiedBy |
(character) names of people, groups, or organizations. |
networkKey |
(character) The occurrence network key (a uuid) who assigned the Taxon to the subject. |
verbatimTaxonId |
(character) The taxon identifier provided to GBIF by the data publisher. |
occurrenceId |
(character) occurrence id from source. |
organismQuantity |
A number or range which specifies the desired organism quantity. An organismQuantity=5 will be interpreted all records with exactly 5. Supports range queries, smaller,larger (e.g., '5,20', whereas '20,5' wouldn't work). |
organismQuantityType |
(character) The type of quantification system used for the quantity of organisms. For example, "individuals" or "biomass". |
relativeOrganismQuantity |
(numeric) A relativeOrganismQuantity=0.1 will be interpreted all records with exactly 0.1 The relative measurement of the quantity of the organism (a number between 0-1). Supports range queries, "smaller,larger" (e.g., '0.1,0.5', whereas '0.5,0.1' wouldn't work). |
iucnRedListCategory |
(character) The IUCN threat status category.
|
lifeStage |
(character) the life stage of the occurrence. One of many options. |
isInCluster |
(logical) identify potentially related records on GBIF. |
distanceFromCentroidInMeters |
A number or range. A value of "2000,*" means at least 2km from known centroids. A value of "0" would mean occurrences exactly on known centroids. A value of "0,2000" would mean within 2km of centroids. Max value is 5000. |
skip_validate |
(logical) whether to skip wellknown::validate_wkt call or not. passed down to check_wkt(). Default: TRUE |
limit |
Number of records to return. Default: 500. Note that the per
request maximum is 300, but since we set it at 500 for the function, we
do two requests to get you the 500 records (if there are that many).
Note that there is a hard maximum of 100,000, which is calculated as the
|
start |
Record number to start at. Use in combination with limit to
page through results. Note that we do the paging internally for you, but
you can manually set the |
curlopts |
(list) |
This function is a legacy alternative to occ_search()
. It is not
recommended to use occ_data()
as it is not as flexible as occ_search()
.
New search terms will not be added to this function and it is only supported
for legacy reasons.
An object of class gbif_data
, which is a S3 class list, with
slots for metadata (meta
) and the occurrence data itself
(data
), and with attributes listing the user supplied arguments
and whether it was a "single" or "many" search; that is, if you supply
two values of the datasetKey
parameter to searches are done, and
it's a "many". meta
is a list of length four with offset, limit,
endOfRecords and count fields. data
is a tibble (aka data.frame)
Spin up a download request for GBIF occurrence data.
occ_download( ..., body = NULL, type = "and", format = "DWCA", user = NULL, pwd = NULL, email = NULL, curlopts = list() ) occ_download_prep( ..., body = NULL, type = "and", format = "DWCA", user = NULL, pwd = NULL, email = NULL, curlopts = list() )
occ_download( ..., body = NULL, type = "and", format = "DWCA", user = NULL, pwd = NULL, email = NULL, curlopts = list() ) occ_download_prep( ..., body = NULL, type = "and", format = "DWCA", user = NULL, pwd = NULL, email = NULL, curlopts = list() )
... |
For |
body |
if you prefer to pass in the payload yourself, use this parameter. If you use this, don't pass anything to the dots. Accepts either an R list, or JSON. JSON is likely easier, since the JSON library jsonlite requires that you unbox strings that shouldn't be auto-converted to arrays, which is a bit tedious for large queries. optional |
type |
(character) One of equals (=), and (&), or (|), lessThan (<), lessThanOrEquals (<=), greaterThan (>), greaterThanOrEquals (>=), in, within, not (!), like, isNotNull |
format |
(character) The download format. One of 'DWCA' (default), 'SIMPLE_CSV', or 'SPECIES_LIST' |
user |
(character) User name within GBIF's website. Required. See "Authentication" below |
pwd |
(character) User password within GBIF's website. Required. See "Authentication" below |
email |
(character) Email address to receive download notice done email. Required. See "Authentication" below |
curlopts |
list of named curl options passed on to
|
When using the geometry parameter, make sure that your well known text
(WKT) is formatted as GBIF expects it. They expect WKT to have a
counter-clockwise winding order. For example, the following is clockwise
POLYGON((-19.5 34.1, -25.3 68.1, 35.9 68.1, 27.8 34.1, -19.5 34.1))
,
whereas they expect the other order:
POLYGON((-19.5 34.1, 27.8 34.1, 35.9 68.1, -25.3 68.1, -19.5 34.1))
note that coordinate pairs are longitude latitude
, longitude first, then
latitude
you should not get any results if you supply WKT that has clockwise winding order.
also note that occ_search()
/occ_data()
behave differently with
respect to WKT in that you can supply clockwise WKT to those
functions but they treat it as an exclusion, so get all data not
inside the WKT area.
occ_download_prep
: prepares a download request, but DOES NOT execute it.
meant for use with occ_download_queue()
occ_download
: prepares a download request and DOES execute it
For user
, pwd
, and email
parameters, you can set them in one of
three ways:
Set them in your .Rprofile
file with the names gbif_user
,
gbif_pwd
, and gbif_email
Set them in your .Renviron
/.bash_profile
(or similar) file with the
names GBIF_USER
, GBIF_PWD
, and GBIF_EMAIL
Simply pass strings to each of the parameters in the function call
We strongly recommend the second option - storing your details as environment variables as it's the most widely used way to store secrets.
See ?Startup
for help.
GBIF has a limit of 12,000 characters for a download query. This means that you can have a pretty long query, but at some point it may lead to an error on GBIF's side and you'll have to split your query into a few.
see downloads for an overview of GBIF downloads methods
See the API docs https://www.gbif.org/developer/occurrence#download for more info, and the predicates docs https://www.gbif.org/developer/occurrence#predicates
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
## Not run: # occ_download(pred("basisOfRecord", "LITERATURE")) # occ_download(pred("taxonKey", 3119195), pred_gt("elevation", 5000)) # occ_download(pred_gt("decimalLatitude", 50)) # occ_download(pred_gte("elevation", 9000)) # occ_download(pred_gte('decimalLatitude", 65)) # occ_download(pred("country", "US")) # occ_download(pred("institutionCode", "TLMF")) # occ_download(pred("catalogNumber", 217880)) # occ_download(pred("gbifId", 142317604)) # download format # z <- occ_download(pred_gte("decimalLatitude", 75), # format = "SPECIES_LIST") # res <- occ_download(pred("taxonKey", 7264332), pred("hasCoordinate", TRUE)) # pass output directly, or later, to occ_download_meta for more information # occ_download(pred_gt('decimalLatitude', 75)) %>% occ_download_meta # Multiple queries # occ_download(pred_gte("decimalLatitude", 65), # pred_lte("decimalLatitude", -65), type="or") # gg <- occ_download(pred("depth", 80), pred("taxonKey", 2343454), # type="or") # x <- occ_download(pred_and(pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))"), # pred_gte("elevation", 5000))) # complex example with many predicates # shows example of how to do date ranges for both year and month # res <- occ_download( # pred_gt("elevation", 5000), # pred_in("basisOfRecord", c('HUMAN_OBSERVATION','OBSERVATION','MACHINE_OBSERVATION')), # pred("country", "US"), # pred("hasCoordinate", TRUE), # pred("hasGeospatialIssue", FALSE), # pred_gte("year", 1999), # pred_lte("year", 2011), # pred_gte("month", 3), # pred_lte("month", 8) # ) # Using body parameter - pass in your own complete query ## as JSON query1 <- '{"creator":"sckott", "notification_address":["[email protected]"], "predicate":{"type":"and","predicates":[ {"type":"equals","key":"TAXON_KEY","value":"7264332"}, {"type":"equals","key":"HAS_COORDINATE","value":"TRUE"}]} }' # res <- occ_download(body = query1, curlopts=list(verbose=TRUE)) ## as a list library(jsonlite) query <- list( creator = unbox("sckott"), notification_address = "[email protected]", predicate = list( type = unbox("and"), predicates = list( list(type = unbox("equals"), key = unbox("TAXON_KEY"), value = unbox("7264332")), list(type = unbox("equals"), key = unbox("HAS_COORDINATE"), value = unbox("TRUE")) ) ) ) # res <- occ_download(body = query, curlopts = list(verbose = TRUE)) # Prepared query occ_download_prep(pred("basisOfRecord", "LITERATURE")) occ_download_prep(pred("basisOfRecord", "LITERATURE"), format = "SIMPLE_CSV") occ_download_prep(pred("basisOfRecord", "LITERATURE"), format = "SPECIES_LIST") occ_download_prep(pred_in("taxonKey", c(2977832, 2977901, 2977966, 2977835))) occ_download_prep(pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))")) ## a complicated example occ_download_prep( pred_in("basisOfRecord", c("MACHINE_OBSERVATION", "HUMAN_OBSERVATION")), pred_in("taxonKey", c(2498343, 2481776, 2481890)), pred_in("country", c("GB", "IE")), pred_or(pred_lte("year", 1989), pred("year", 2000)) ) # x = occ_download( # pred_in("basisOfRecord", c("MACHINE_OBSERVATION", "HUMAN_OBSERVATION")), # pred_in("taxonKey", c(9206251, 3112648)), # pred_in("country", c("US", "MX")), # pred_and(pred_gte("year", 1989), pred_lte("year", 1991)) # ) # occ_download_meta(x) # z <- occ_download_get(x) # df <- occ_download_import(z) # str(df) # library(dplyr) # unique(df$basisOfRecord) # unique(df$taxonKey) # unique(df$countryCode) # sort(unique(df$year)) ## End(Not run)
## Not run: # occ_download(pred("basisOfRecord", "LITERATURE")) # occ_download(pred("taxonKey", 3119195), pred_gt("elevation", 5000)) # occ_download(pred_gt("decimalLatitude", 50)) # occ_download(pred_gte("elevation", 9000)) # occ_download(pred_gte('decimalLatitude", 65)) # occ_download(pred("country", "US")) # occ_download(pred("institutionCode", "TLMF")) # occ_download(pred("catalogNumber", 217880)) # occ_download(pred("gbifId", 142317604)) # download format # z <- occ_download(pred_gte("decimalLatitude", 75), # format = "SPECIES_LIST") # res <- occ_download(pred("taxonKey", 7264332), pred("hasCoordinate", TRUE)) # pass output directly, or later, to occ_download_meta for more information # occ_download(pred_gt('decimalLatitude', 75)) %>% occ_download_meta # Multiple queries # occ_download(pred_gte("decimalLatitude", 65), # pred_lte("decimalLatitude", -65), type="or") # gg <- occ_download(pred("depth", 80), pred("taxonKey", 2343454), # type="or") # x <- occ_download(pred_and(pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))"), # pred_gte("elevation", 5000))) # complex example with many predicates # shows example of how to do date ranges for both year and month # res <- occ_download( # pred_gt("elevation", 5000), # pred_in("basisOfRecord", c('HUMAN_OBSERVATION','OBSERVATION','MACHINE_OBSERVATION')), # pred("country", "US"), # pred("hasCoordinate", TRUE), # pred("hasGeospatialIssue", FALSE), # pred_gte("year", 1999), # pred_lte("year", 2011), # pred_gte("month", 3), # pred_lte("month", 8) # ) # Using body parameter - pass in your own complete query ## as JSON query1 <- '{"creator":"sckott", "notification_address":["[email protected]"], "predicate":{"type":"and","predicates":[ {"type":"equals","key":"TAXON_KEY","value":"7264332"}, {"type":"equals","key":"HAS_COORDINATE","value":"TRUE"}]} }' # res <- occ_download(body = query1, curlopts=list(verbose=TRUE)) ## as a list library(jsonlite) query <- list( creator = unbox("sckott"), notification_address = "[email protected]", predicate = list( type = unbox("and"), predicates = list( list(type = unbox("equals"), key = unbox("TAXON_KEY"), value = unbox("7264332")), list(type = unbox("equals"), key = unbox("HAS_COORDINATE"), value = unbox("TRUE")) ) ) ) # res <- occ_download(body = query, curlopts = list(verbose = TRUE)) # Prepared query occ_download_prep(pred("basisOfRecord", "LITERATURE")) occ_download_prep(pred("basisOfRecord", "LITERATURE"), format = "SIMPLE_CSV") occ_download_prep(pred("basisOfRecord", "LITERATURE"), format = "SPECIES_LIST") occ_download_prep(pred_in("taxonKey", c(2977832, 2977901, 2977966, 2977835))) occ_download_prep(pred_within("POLYGON((-14 42, 9 38, -7 26, -14 42))")) ## a complicated example occ_download_prep( pred_in("basisOfRecord", c("MACHINE_OBSERVATION", "HUMAN_OBSERVATION")), pred_in("taxonKey", c(2498343, 2481776, 2481890)), pred_in("country", c("GB", "IE")), pred_or(pred_lte("year", 1989), pred("year", 2000)) ) # x = occ_download( # pred_in("basisOfRecord", c("MACHINE_OBSERVATION", "HUMAN_OBSERVATION")), # pred_in("taxonKey", c(9206251, 3112648)), # pred_in("country", c("US", "MX")), # pred_and(pred_gte("year", 1989), pred_lte("year", 1991)) # ) # occ_download_meta(x) # z <- occ_download_get(x) # df <- occ_download_import(z) # str(df) # library(dplyr) # unique(df$basisOfRecord) # unique(df$taxonKey) # unique(df$countryCode) # sort(unique(df$year)) ## End(Not run)
Check for downloads already in your GBIF account
occ_download_cached( ..., body = NULL, type = "and", format = "DWCA", user = NULL, pwd = NULL, email = NULL, refresh = FALSE, age = 30, curlopts = list() )
occ_download_cached( ..., body = NULL, type = "and", format = "DWCA", user = NULL, pwd = NULL, email = NULL, refresh = FALSE, age = 30, curlopts = list() )
... |
For |
body |
if you prefer to pass in the payload yourself, use this parameter. If you use this, don't pass anything to the dots. Accepts either an R list, or JSON. JSON is likely easier, since the JSON library jsonlite requires that you unbox strings that shouldn't be auto-converted to arrays, which is a bit tedious for large queries. optional |
type |
(character) One of equals (=), and (&), or (|), lessThan (<), lessThanOrEquals (<=), greaterThan (>), greaterThanOrEquals (>=), in, within, not (!), like, isNotNull |
format |
(character) The download format. One of 'DWCA' (default), 'SIMPLE_CSV', or 'SPECIES_LIST' |
user |
(character) User name within GBIF's website. Required. See "Authentication" below |
pwd |
(character) User password within GBIF's website. Required. See "Authentication" below |
email |
(character) Email address to receive download notice done email. Required. See "Authentication" below |
refresh |
(logical) refresh your list of downloads. on the first
request of each R session we'll cache your stored GBIF occurrence
downloads locally. you can refresh this list by setting |
age |
(integer) number of days after which you want a new download. default: 30 |
curlopts |
list of named curl options passed on to
|
see downloads for an overview of GBIF downloads methods
Other downloads:
download_predicate_dsl
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
## Not run: # these are examples from the package maintainer's account; # outcomes will vary by user occ_download_cached(pred_gte("elevation", 12000L)) occ_download_cached(pred("catalogNumber", 217880)) occ_download_cached(pred_gte("decimalLatitude", 65), pred_lte("decimalLatitude", -65), type="or") occ_download_cached(pred_gte("elevation", 12000L)) occ_download_cached(pred_gte("elevation", 12000L), refresh = TRUE) ## End(Not run)
## Not run: # these are examples from the package maintainer's account; # outcomes will vary by user occ_download_cached(pred_gte("elevation", 12000L)) occ_download_cached(pred("catalogNumber", 217880)) occ_download_cached(pred_gte("decimalLatitude", 65), pred_lte("decimalLatitude", -65), type="or") occ_download_cached(pred_gte("elevation", 12000L)) occ_download_cached(pred_gte("elevation", 12000L), refresh = TRUE) ## End(Not run)
Cancel a download creation process.
occ_download_cancel(key, user = NULL, pwd = NULL, curlopts = list()) occ_download_cancel_staged( user = NULL, pwd = NULL, limit = 20, start = 0, curlopts = list() )
occ_download_cancel(key, user = NULL, pwd = NULL, curlopts = list()) occ_download_cancel_staged( user = NULL, pwd = NULL, limit = 20, start = 0, curlopts = list() )
key |
(character) A key generated from a request, like that from
|
user |
(character) User name within GBIF's website. Required. See Details. |
pwd |
(character) User password within GBIF's website. Required. See Details. |
curlopts |
list of named curl options passed on to
|
limit |
Number of records to return. Default: 20 |
start |
Record number to start at. Default: 0 |
Note, these functions only cancel a job in progress. If your download is already prepared for you, this won't do anything to change that.
occ_download_cancel
cancels a specific job by download key - returns
success message
occ_download_cancel_staged
cancels all jobs with status RUNNING
or PREPARING
- if none are found, returns a message saying so -
if some found, they are cancelled, returning message saying so
see downloads for an overview of GBIF downloads methods
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
## Not run: # occ_download_cancel(key="0003984-140910143529206") # occ_download_cancel_staged() ## End(Not run)
## Not run: # occ_download_cancel(key="0003984-140910143529206") # occ_download_cancel_staged() ## End(Not run)
Lists the downloads activity of a dataset
occ_download_dataset_activity( dataset, limit = 20, start = 0, curlopts = list() )
occ_download_dataset_activity( dataset, limit = 20, start = 0, curlopts = list() )
dataset |
(character) A dataset key |
limit |
(integer/numeric) Number of records to return. Default: 20, Max: 1000 |
start |
(integer/numeric) Record number to start at. Default: 0 |
curlopts |
list of named curl options passed on to
|
a list with two slots:
meta: a single row data.frame with columns: offset
, limit
,
endofrecords
, count
results: a tibble with the nested data flattened, with many
columns with the same download.
or download.request.
prefixes
see downloads for an overview of GBIF downloads methods
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
## Not run: res <- occ_download_dataset_activity("7f2edc10-f762-11e1-a439-00145eb45e9a") res res$meta res$meta$count # pagination occ_download_dataset_activity("7f2edc10-f762-11e1-a439-00145eb45e9a", limit = 3000) occ_download_dataset_activity("7f2edc10-f762-11e1-a439-00145eb45e9a", limit = 3, start = 10) ## End(Not run)
## Not run: res <- occ_download_dataset_activity("7f2edc10-f762-11e1-a439-00145eb45e9a") res res$meta res$meta$count # pagination occ_download_dataset_activity("7f2edc10-f762-11e1-a439-00145eb45e9a", limit = 3000) occ_download_dataset_activity("7f2edc10-f762-11e1-a439-00145eb45e9a", limit = 3, start = 10) ## End(Not run)
List datasets for a download
occ_download_datasets(key, limit = 20, start = 0, curlopts = list())
occ_download_datasets(key, limit = 20, start = 0, curlopts = list())
key |
A key generated from a request, like that from |
limit |
(integer/numeric) Number of records to return. Default: 20, Max: 1000 |
start |
(integer/numeric) Record number to start at. Default: 0 |
curlopts |
list of named curl options passed on to
|
a list with two slots:
meta: a single row data.frame with columns: offset
, limit
,
endofrecords
, count
results: a tibble with the results, of three columns: downloadKey
,
datasetKey
, numberRecords
see downloads for an overview of GBIF downloads methods
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
## Not run: occ_download_datasets(key="0003983-140910143529206") occ_download_datasets(key="0003983-140910143529206", limit = 3) occ_download_datasets(key="0003983-140910143529206", limit = 3, start = 10) ## End(Not run)
## Not run: occ_download_datasets(key="0003983-140910143529206") occ_download_datasets(key="0003983-140910143529206", limit = 3) occ_download_datasets(key="0003983-140910143529206", limit = 3, start = 10) ## End(Not run)
Describes the fields available in GBIF downloads
occ_download_describe(x = "dwca")
occ_download_describe(x = "dwca")
x |
a character string (default: "dwca"). Accepted values: "simpleCsv", "simpleAvro", "simpleParquet","speciesList". |
The function returns a list with the fields available in GBIF downloads. It is considered experimental by GBIF, so the output might change in the future.
a list.
## Not run: occ_download_describe("dwca")$verbatimFields occ_download_describe("dwca")$verbatimExtensions occ_download_describe("simpleCsv")$fields ## End(Not run)
## Not run: occ_download_describe("dwca")$verbatimFields occ_download_describe("dwca")$verbatimExtensions occ_download_describe("simpleCsv")$fields ## End(Not run)
Get a download from GBIF.
occ_download_get(key, path = ".", overwrite = FALSE, ...)
occ_download_get(key, path = ".", overwrite = FALSE, ...)
key |
A key generated from a request, like that from |
path |
Path to write zip file to. Default: |
overwrite |
Will only overwrite existing path if TRUE. |
... |
named curl options passed on to
crul::verb-GET. see |
Downloads the zip file to a directory you specify on your machine.
crul::HttpClient()
is used internally to write the zip file to
disk. See crul::writing-options. This function only downloads the file.
See occ_download_import
to open a downloaded file in your R session.
The speed of this function is of course proportional to the size of the
file to download. For example, a 58 MB file on my machine took about
26 seconds.
see downloads for an overview of GBIF downloads methods
This function used to check for HTTP response content type, but it has changed enough that we no longer check it. If you run into issues with this function, open an issue in the GitHub repository.
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
## Not run: occ_download_get("0000066-140928181241064") occ_download_get("0003983-140910143529206", overwrite = TRUE) ## End(Not run)
## Not run: occ_download_get("0000066-140928181241064") occ_download_get("0003983-140910143529206", overwrite = TRUE) ## End(Not run)
Import a downloaded file from GBIF.
occ_download_import( x = NULL, key = NULL, path = ".", fill = FALSE, encoding = "UTF-8", ... ) as.download(path = ".", key = NULL) ## S3 method for class 'character' as.download(path = ".", key = NULL) ## S3 method for class 'download' as.download(path = ".", key = NULL)
occ_download_import( x = NULL, key = NULL, path = ".", fill = FALSE, encoding = "UTF-8", ... ) as.download(path = ".", key = NULL) ## S3 method for class 'character' as.download(path = ".", key = NULL) ## S3 method for class 'download' as.download(path = ".", key = NULL)
x |
The output of a call to |
key |
A key generated from a request, like that from
|
path |
Path to unzip file to. Default: |
fill |
(logical) (default: |
encoding |
(character) encoding to read in data; passed to
|
... |
parameters passed on to |
You can provide either x as input, or both key and path. We use
data.table::fread()
internally to read data.
a tibble (data.frame)
You may run into errors when using occ_download_import()
; most often
these are due to data.table::fread()
not being able to parse the
occurrence.txt
file correctly. The fill
parameter passes down to
data.table::fread()
and the ...
allows you to pass on any other
parameters that data.table::fread()
accepts. Read the docs for fread
for help.
The country code for Namibia is "NA"
. Unfortunately in R an "NA"
string
will be read in to R as an NA/missing. To avoid this, in this function
we read in the data, then convert an NA/missing values to the character
string "NA"
. When a country code is truly missing it will be an empty
string.
see downloads for an overview of GBIF downloads methods
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
## Not run: # First, kick off at least 1 download, then wait for the job to be complete # Then use your download keys res <- occ_download_get(key="0000066-140928181241064", overwrite=TRUE) occ_download_import(res) occ_download_get(key="0000066-140928181241064", overwrite = TRUE) %>% occ_download_import # coerce a file path to the right class to feed to occ_download_import # as.download("0000066-140928181241064.zip") # as.download(key = "0000066-140928181241064") # occ_download_import(as.download("0000066-140928181241064.zip")) # download a dump that has a CSV file # res <- occ_download_get(key = "0001369-160509122628363", overwrite=TRUE) # occ_download_import(res) # occ_download_import(key = "0001369-160509122628363") # download and import a species list (in csv format) # x <- occ_download_get("0000172-190415153152247") # occ_download_import(x) ## End(Not run)
## Not run: # First, kick off at least 1 download, then wait for the job to be complete # Then use your download keys res <- occ_download_get(key="0000066-140928181241064", overwrite=TRUE) occ_download_import(res) occ_download_get(key="0000066-140928181241064", overwrite = TRUE) %>% occ_download_import # coerce a file path to the right class to feed to occ_download_import # as.download("0000066-140928181241064.zip") # as.download(key = "0000066-140928181241064") # occ_download_import(as.download("0000066-140928181241064.zip")) # download a dump that has a CSV file # res <- occ_download_get(key = "0001369-160509122628363", overwrite=TRUE) # occ_download_import(res) # occ_download_import(key = "0001369-160509122628363") # download and import a species list (in csv format) # x <- occ_download_get("0000172-190415153152247") # occ_download_import(x) ## End(Not run)
Lists the downloads created by a user.
occ_download_list( user = NULL, pwd = NULL, limit = 20, start = 0, curlopts = list() )
occ_download_list( user = NULL, pwd = NULL, limit = 20, start = 0, curlopts = list() )
user |
(character) User name within GBIF's website. Required. See Details. |
pwd |
(character) User password within GBIF's website. Required. See Details. |
limit |
(integer/numeric) Number of records to return. Default: 20, Max: 1000 |
start |
(integer/numeric) Record number to start at. Default: 0 |
curlopts |
list of named curl options passed on to
|
a list with two slots:
meta: a single row data.frame with columns: offset
, limit
,
endofrecords
, count
results: a tibble with the nested data flattened, with many
columns with the same request.
prefix
see downloads for an overview of GBIF downloads methods
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
## Not run: occ_download_list(user="sckott") occ_download_list(user="sckott", limit = 5) occ_download_list(user="sckott", start = 21) ## End(Not run)
## Not run: occ_download_list(user="sckott") occ_download_list(user="sckott", limit = 5) occ_download_list(user="sckott", start = 21) ## End(Not run)
Retrieves the occurrence download metadata by its unique key.
occ_download_meta(key, curlopts = list())
occ_download_meta(key, curlopts = list())
key |
A key generated from a request, like that from
|
curlopts |
list of named curl options passed on to
|
an object of class occ_download_meta
, a list with slots for
the download key, the DOI assigned to the download, license link,
the request details you sent in the occ_download()
request,
and metadata about the size and date/time of the request
see downloads for an overview of GBIF downloads methods
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_queue()
,
occ_download_wait()
,
occ_download()
## Not run: occ_download_meta(key="0003983-140910143529206") occ_download_meta("0000066-140928181241064") ## End(Not run)
## Not run: occ_download_meta(key="0003983-140910143529206") occ_download_meta("0000066-140928181241064") ## End(Not run)
Download requests in a queue
occ_download_queue(..., .list = list(), status_ping = 10)
occ_download_queue(..., .list = list(), status_ping = 10)
... |
any number of |
.list |
any number of |
status_ping |
(integer) seconds between pings checking status of the download request. generally larger numbers for larger requests. default: 10 (i.e., 10 seconds). must be 10 or greater |
This function is a convenience wrapper around occ_download()
,
allowing the user to kick off any number of requests, while abiding by
GBIF rules of 3 concurrent requests per user.
a list of occ_download
class objects, see occ_download_get()
to fetch data
It works by using lazy evaluation to collect your requests into a queue
(but does not use lazy evaluation if use the .list
parameter).
Then it kicks of the first 3 requests. Then in a while loop, we check
status of those requests, and when any request finishes (see
When is a job done?
below), we kick off the
next, and so on. So in theory, there may not always strictly be 3 running
concurrently, but the function will usually provide for 3 running
concurrently.
We mark a job as done by checking the /occurrence/download/
API route
with our occ_download_meta()
function. If the status of the job is
any of "succeeded", "killed", or "cancelled", then we mark the job as done
and move on to other jobs in the queue.
This function is still in development. There's a lot of complexity to this problem. We'll be rolling out fixes and improvements in future versions of the package, so expect to have to adjust your code with new versions.
see downloads for an overview of GBIF downloads methods
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_wait()
,
occ_download()
## Not run: if (interactive()) { # dont run in automated example runs, too costly # passing occ_download() requests via ... out <- occ_download_queue( occ_download(pred('taxonKey', 3119195), pred("year", 1976)), occ_download(pred('taxonKey', 3119195), pred("year", 2001)), occ_download(pred('taxonKey', 3119195), pred("year", 2001), pred_lte("month", 8)), occ_download(pred('taxonKey', 5229208), pred("year", 2011)), occ_download(pred('taxonKey', 2480946), pred("year", 2015)), occ_download(pred("country", "NZ"), pred("year", 1999), pred("month", 3)), occ_download(pred("catalogNumber", "Bird.27847588"), pred("year", 1998), pred("month", 2)) ) # supports <= 3 requests too out <- occ_download_queue( occ_download(pred("country", "NZ"), pred("year", 1999), pred("month", 3)), occ_download(pred("catalogNumber", "Bird.27847588"), pred("year", 1998), pred("month", 2)) ) # using pre-prepared requests via .list keys <- c(7905507, 5384395, 8911082) queries <- list() for (i in seq_along(keys)) { queries[[i]] <- occ_download_prep( pred("taxonKey", keys[i]), pred_in("basisOfRecord", c("HUMAN_OBSERVATION","OBSERVATION")), pred("hasCoordinate", TRUE), pred("hasGeospatialIssue", FALSE), pred("year", 1993) ) } out <- occ_download_queue(.list = queries) out # another pre-prepared example yrs <- 1930:1934 queries <- list() for (i in seq_along(yrs)) { queries[[i]] <- occ_download_prep( pred("taxonKey", 2877951), pred_in("basisOfRecord", c("HUMAN_OBSERVATION","OBSERVATION")), pred("hasCoordinate", TRUE), pred("hasGeospatialIssue", FALSE), pred("year", yrs[i]) ) } out <- occ_download_queue(.list = queries) out } ## End(Not run)
## Not run: if (interactive()) { # dont run in automated example runs, too costly # passing occ_download() requests via ... out <- occ_download_queue( occ_download(pred('taxonKey', 3119195), pred("year", 1976)), occ_download(pred('taxonKey', 3119195), pred("year", 2001)), occ_download(pred('taxonKey', 3119195), pred("year", 2001), pred_lte("month", 8)), occ_download(pred('taxonKey', 5229208), pred("year", 2011)), occ_download(pred('taxonKey', 2480946), pred("year", 2015)), occ_download(pred("country", "NZ"), pred("year", 1999), pred("month", 3)), occ_download(pred("catalogNumber", "Bird.27847588"), pred("year", 1998), pred("month", 2)) ) # supports <= 3 requests too out <- occ_download_queue( occ_download(pred("country", "NZ"), pred("year", 1999), pred("month", 3)), occ_download(pred("catalogNumber", "Bird.27847588"), pred("year", 1998), pred("month", 2)) ) # using pre-prepared requests via .list keys <- c(7905507, 5384395, 8911082) queries <- list() for (i in seq_along(keys)) { queries[[i]] <- occ_download_prep( pred("taxonKey", keys[i]), pred_in("basisOfRecord", c("HUMAN_OBSERVATION","OBSERVATION")), pred("hasCoordinate", TRUE), pred("hasGeospatialIssue", FALSE), pred("year", 1993) ) } out <- occ_download_queue(.list = queries) out # another pre-prepared example yrs <- 1930:1934 queries <- list() for (i in seq_along(yrs)) { queries[[i]] <- occ_download_prep( pred("taxonKey", 2877951), pred_in("basisOfRecord", c("HUMAN_OBSERVATION","OBSERVATION")), pred("hasCoordinate", TRUE), pred("hasGeospatialIssue", FALSE), pred("year", yrs[i]) ) } out <- occ_download_queue(.list = queries) out } ## End(Not run)
Download occurrence data using a SQL query
occ_download_sql( q = NULL, format = "SQL_TSV_ZIP", user = NULL, pwd = NULL, email = NULL, validate = TRUE, curlopts = list() ) occ_download_sql_validate(q = NULL, user = NULL, pwd = NULL) occ_download_sql_prep( q = NULL, format = "SQL_TSV_ZIP", user = NULL, pwd = NULL, email = NULL, validate = TRUE, curlopts = list() )
occ_download_sql( q = NULL, format = "SQL_TSV_ZIP", user = NULL, pwd = NULL, email = NULL, validate = TRUE, curlopts = list() ) occ_download_sql_validate(q = NULL, user = NULL, pwd = NULL) occ_download_sql_prep( q = NULL, format = "SQL_TSV_ZIP", user = NULL, pwd = NULL, email = NULL, validate = TRUE, curlopts = list() )
q |
sql query |
format |
only "SQL_TSV_ZIP" is supported right now |
user |
your GBIF user name |
pwd |
your GBIF password |
email |
your email address |
validate |
should the query be validated before submission. Default is TRUE. |
curlopts |
list of curl options |
This is an experimental feature, and the implementation may change throughout
2024. The feature is currently only available for preview by invited users.
Contact [email protected]
to request access.
Please see the article here for more information: https://docs.ropensci.org/rgbif/articles/gbif_sql_downloads.html
an object of class 'occ_download_sql'
https://techdocs.gbif.org/en/data-use/api-sql-downloads
## Not run: occ_download_sql("SELECT gbifid,countryCode FROM occurrence WHERE genusKey = 2435098") ## End(Not run)
## Not run: occ_download_sql("SELECT gbifid,countryCode FROM occurrence WHERE genusKey = 2435098") ## End(Not run)
Wait for an occurrence download to be done
occ_download_wait( x, status_ping = 5, curlopts = list(http_version = 2), quiet = FALSE )
occ_download_wait( x, status_ping = 5, curlopts = list(http_version = 2), quiet = FALSE )
x |
and object of class |
status_ping |
(integer) seconds between each |
curlopts |
(list) curl options, as named list, passed on to
|
quiet |
(logical) suppress messages. default: |
an object of class occ_download_meta
, see occ_download_meta()
for details
occ_download_queue()
is similar, but handles many requests
at once; occ_download_wait
handles one request at a time
Other downloads:
download_predicate_dsl
,
occ_download_cached()
,
occ_download_cancel()
,
occ_download_dataset_activity()
,
occ_download_datasets()
,
occ_download_get()
,
occ_download_import()
,
occ_download_list()
,
occ_download_meta()
,
occ_download_queue()
,
occ_download()
## Not run: x <- occ_download( pred("taxonKey", 9206251), pred_in("country", c("US", "MX")), pred_gte("year", 1971) ) res <- occ_download_wait(x) occ_download_meta(x) # works also with a downloadkey occ_download_wait("0000066-140928181241064") ## End(Not run)
## Not run: x <- occ_download( pred("taxonKey", 9206251), pred_in("country", c("US", "MX")), pred_gte("year", 1971) ) res <- occ_download_wait(x) occ_download_meta(x) # works also with a downloadkey occ_download_wait("0000066-140928181241064") ## End(Not run)
Facet GBIF occurrences
occ_facet(facet, facetMincount = NULL, curlopts = list(), ...)
occ_facet(facet, facetMincount = NULL, curlopts = list(), ...)
facet |
(character) a character vector of length 1 or greater. Required. |
facetMincount |
(numeric) minimum number of records to be included in the faceting results |
curlopts |
list of named curl options passed on to
|
... |
Facet parameters, such as for paging based on each facet
variable, e.g., |
All fields can be faceted on except for last "lastInterpreted", "eventDate", and "geometry"
If a faceted variable is not found, it is silently dropped, returning nothing for that query
A list of tibbles (data.frame's) for each facet (each element of the facet parameter).
occ_search()
also has faceting ability, but
can include occurrence data in addition to facets.
## Not run: occ_facet(facet = "country") # facetMincount - minimum number of records to be included # in the faceting results occ_facet(facet = "country", facetMincount = 30000000L) occ_facet(facet = c("country", "basisOfRecord")) # paging with many facets occ_facet( facet = c("country", "basisOfRecord", "hasCoordinate"), country.facetLimit = 3, basisOfRecord.facetLimit = 6 ) # paging ## limit occ_facet(facet = "country", country.facetLimit = 3) ## offset occ_facet(facet = "country", country.facetLimit = 3, country.facetOffset = 3) # Pass on curl options occ_facet(facet = "country", country.facetLimit = 3, curlopts = list(verbose = TRUE)) ## End(Not run)
## Not run: occ_facet(facet = "country") # facetMincount - minimum number of records to be included # in the faceting results occ_facet(facet = "country", facetMincount = 30000000L) occ_facet(facet = c("country", "basisOfRecord")) # paging with many facets occ_facet( facet = c("country", "basisOfRecord", "hasCoordinate"), country.facetLimit = 3, basisOfRecord.facetLimit = 6 ) # paging ## limit occ_facet(facet = "country", country.facetLimit = 3) ## offset occ_facet(facet = "country", country.facetLimit = 3, country.facetOffset = 3) # Pass on curl options occ_facet(facet = "country", country.facetLimit = 3, curlopts = list(verbose = TRUE)) ## End(Not run)
Get data for GBIF occurrences by occurrence key
occ_get( key, fields = "minimal", curlopts = list(), return = NULL, verbatim = NULL ) occ_get_verbatim(key, fields = "minimal", curlopts = list())
occ_get( key, fields = "minimal", curlopts = list(), return = NULL, verbatim = NULL ) occ_get_verbatim(key, fields = "minimal", curlopts = list())
key |
(numeric/integer) one or more occurrence keys. required |
fields |
(character) Default ("minimal") will return just taxon name, key, latitude, and longitude. 'all' returns all fields. Or specify each field you want returned by name, e.g. fields = c('name', 'decimalLatitude','altitude'). |
curlopts |
list of named curl options passed on to
|
return |
Defunct. All components are returned now; index to the one(s) you want |
verbatim |
Defunct. verbatim records can now be retrieved using
|
For occ_get
a list of lists. For occ_get_verbatim
a data.frame
https://www.gbif.org/developer/occurrence#occurrence
## Not run: occ_get(key=855998194) # many occurrences occ_get(key=c(101010, 240713150, 855998194)) # Verbatim data occ_get_verbatim(key=855998194) occ_get_verbatim(key=855998194, fields='all') occ_get_verbatim(key=855998194, fields=c('scientificName', 'lastCrawled', 'county')) occ_get_verbatim(key=c(855998194, 620594291)) occ_get_verbatim(key=c(855998194, 620594291), fields='all') occ_get_verbatim(key=c(855998194, 620594291), fields=c('scientificName', 'decimalLatitude', 'basisOfRecord')) # curl options, pass in a named list occ_get(key=855998194, curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: occ_get(key=855998194) # many occurrences occ_get(key=c(101010, 240713150, 855998194)) # Verbatim data occ_get_verbatim(key=855998194) occ_get_verbatim(key=855998194, fields='all') occ_get_verbatim(key=855998194, fields=c('scientificName', 'lastCrawled', 'county')) occ_get_verbatim(key=c(855998194, 620594291)) occ_get_verbatim(key=c(855998194, 620594291), fields='all') occ_get_verbatim(key=c(855998194, 620594291), fields=c('scientificName', 'decimalLatitude', 'basisOfRecord')) # curl options, pass in a named list occ_get(key=855998194, curlopts = list(verbose=TRUE)) ## End(Not run)
Parse and examine further GBIF occurrence issues on a dataset.
occ_issues(.data, ..., mutate = NULL)
occ_issues(.data, ..., mutate = NULL)
.data |
Output from a call to |
... |
Named parameters to only get back (e.g. cdround), or to remove (e.g. -cdround). |
mutate |
(character) One of:
For split and split_expand, values in cells become y ("yes") or n ("no") |
See also the vignette Cleaning data using GBIF issues
Note that you can also query based on issues, e.g.,
occ_search(taxonKey=1, issue='DEPTH_UNLIKELY')
. However, I imagine
it's more likely that you want to search for occurrences based on a
taxonomic name, or geographic area, not based on issues, so it makes sense
to pull data down, then clean as needed using this function.
This function only affects the data
element in the gbif
class that is
returned from a call to occ_search()
. Maybe in a future version
we will remove the associated records from the hierarchy
and media
elements as they are removed from the data
element.
You'll notice that we sort columns to make it easier to glimpse the important parts of your data, namely taxonomic name, taxon key, latitude and longitude, and the issues. The columns are unchanged otherwise.
https://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/OccurrenceIssue.html
## Not run: # what do issues mean, can print whole table head(gbif_issues()) # or just occurrence related issues gbif_issues()[which(gbif_issues()$type %in% c("occurrence")),] # or search for matches iss <- c('cdround','cudc','gass84','txmathi') gbif_issues()[ gbif_issues()$code %in% iss, ] # compare out data to after occ_issues use (out <- occ_search(limit=100)) out %>% occ_issues(cdround) # occ_data (out <- occ_data(limit=100)) out %>% occ_issues(cdround) # Parsing output by issue (res <- occ_data( geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit = 600)) ## or parse issues in various ways ### include only rows with cdround issue gg <- res %>% occ_issues(cdround) NROW(res$data) NROW(gg$data) head(res$data)[,c(1:5)] head(gg$data)[,c(1:5)] ### remove data rows with certain issue classes res %>% occ_issues(-cdround, -cudc) ### split issues into separate columns res %>% occ_issues(mutate = "split") res %>% occ_issues(-cudc, -mdatunl, mutate = "split") res %>% occ_issues(gass84, mutate = "split") ### expand issues to more descriptive names res %>% occ_issues(mutate = "expand") ### split and expand res %>% occ_issues(mutate = "split_expand") ### split, expand, and remove an issue class res %>% occ_issues(-cdround, mutate = "split_expand") ## Or you can use occ_issues without %>% occ_issues(res, -cdround, mutate = "split_expand") # from GBIF downloaded data via occ_download_* functions res <- occ_download_get(key="0000066-140928181241064", overwrite=TRUE) x <- occ_download_import(res) occ_issues(x, -txmathi) occ_issues(x, txmathi) occ_issues(x, gass84) occ_issues(x, zerocd) occ_issues(x, gass84, txmathi) occ_issues(x, mutate = "split") occ_issues(x, -gass84, mutate = "split") occ_issues(x, mutate = "expand") occ_issues(x, mutate = "split_expand") # occ_search/occ_data with many inputs - give slightly different output # format than normal 2482598, 2498387 xyz <- occ_data(taxonKey = c(9362842, 2492483, 2435099), limit = 300) xyz length(xyz) # length 3 names(xyz) # matches taxonKey values passed in occ_issues(xyz, -gass84) occ_issues(xyz, -cdround) occ_issues(xyz, -cdround, -gass84) ## End(Not run)
## Not run: # what do issues mean, can print whole table head(gbif_issues()) # or just occurrence related issues gbif_issues()[which(gbif_issues()$type %in% c("occurrence")),] # or search for matches iss <- c('cdround','cudc','gass84','txmathi') gbif_issues()[ gbif_issues()$code %in% iss, ] # compare out data to after occ_issues use (out <- occ_search(limit=100)) out %>% occ_issues(cdround) # occ_data (out <- occ_data(limit=100)) out %>% occ_issues(cdround) # Parsing output by issue (res <- occ_data( geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit = 600)) ## or parse issues in various ways ### include only rows with cdround issue gg <- res %>% occ_issues(cdround) NROW(res$data) NROW(gg$data) head(res$data)[,c(1:5)] head(gg$data)[,c(1:5)] ### remove data rows with certain issue classes res %>% occ_issues(-cdround, -cudc) ### split issues into separate columns res %>% occ_issues(mutate = "split") res %>% occ_issues(-cudc, -mdatunl, mutate = "split") res %>% occ_issues(gass84, mutate = "split") ### expand issues to more descriptive names res %>% occ_issues(mutate = "expand") ### split and expand res %>% occ_issues(mutate = "split_expand") ### split, expand, and remove an issue class res %>% occ_issues(-cdround, mutate = "split_expand") ## Or you can use occ_issues without %>% occ_issues(res, -cdround, mutate = "split_expand") # from GBIF downloaded data via occ_download_* functions res <- occ_download_get(key="0000066-140928181241064", overwrite=TRUE) x <- occ_download_import(res) occ_issues(x, -txmathi) occ_issues(x, txmathi) occ_issues(x, gass84) occ_issues(x, zerocd) occ_issues(x, gass84, txmathi) occ_issues(x, mutate = "split") occ_issues(x, -gass84, mutate = "split") occ_issues(x, mutate = "expand") occ_issues(x, mutate = "split_expand") # occ_search/occ_data with many inputs - give slightly different output # format than normal 2482598, 2498387 xyz <- occ_data(taxonKey = c(9362842, 2492483, 2435099), limit = 300) xyz length(xyz) # length 3 names(xyz) # matches taxonKey values passed in occ_issues(xyz, -gass84) occ_issues(xyz, -cdround) occ_issues(xyz, -cdround, -gass84) ## End(Not run)
Search for catalog numbers, collection codes, collector names, and institution codes.
occ_metadata( type = "catalogNumber", q = NULL, limit = 5, pretty = TRUE, curlopts = list() )
occ_metadata( type = "catalogNumber", q = NULL, limit = 5, pretty = TRUE, curlopts = list() )
type |
Type of data, one of catalogNumber, collectionCode, recordedBy, or institutionCode. Unique partial strings work too, like 'cat' for catalogNumber |
q |
Search term |
limit |
Number of results, default=5 |
pretty |
Pretty as true (Default) uses cat to print data, |
curlopts |
list of named curl options passed on to
|
https://www.gbif.org/developer/occurrence#search
## Not run: # catalog number occ_metadata(type = "catalogNumber", q=122) # collection code occ_metadata(type = "collectionCode", q=12) # institution code occ_metadata(type = "institutionCode", q='GB') # recorded by occ_metadata(type = "recordedBy", q='scott') # data as character strings occ_metadata(type = "catalogNumber", q=122, pretty=FALSE) # Change number of results returned occ_metadata(type = "catalogNumber", q=122, limit=10) # Partial unique type strings work too occ_metadata(type = "cat", q=122) # Pass on curl options occ_metadata(type = "cat", q=122, curlopts = list(verbose = TRUE)) ## End(Not run)
## Not run: # catalog number occ_metadata(type = "catalogNumber", q=122) # collection code occ_metadata(type = "collectionCode", q=12) # institution code occ_metadata(type = "institutionCode", q='GB') # recorded by occ_metadata(type = "recordedBy", q='scott') # data as character strings occ_metadata(type = "catalogNumber", q=122, pretty=FALSE) # Change number of results returned occ_metadata(type = "catalogNumber", q=122, limit=10) # Partial unique type strings work too occ_metadata(type = "cat", q=122) # Pass on curl options occ_metadata(type = "cat", q=122, curlopts = list(verbose = TRUE)) ## End(Not run)
Search for GBIF occurrences
occ_search( taxonKey = NULL, scientificName = NULL, country = NULL, publishingCountry = NULL, hasCoordinate = NULL, typeStatus = NULL, recordNumber = NULL, lastInterpreted = NULL, continent = NULL, geometry = NULL, geom_big = "asis", geom_size = 40, geom_n = 10, recordedBy = NULL, recordedByID = NULL, identifiedByID = NULL, basisOfRecord = NULL, datasetKey = NULL, eventDate = NULL, catalogNumber = NULL, year = NULL, month = NULL, decimalLatitude = NULL, decimalLongitude = NULL, elevation = NULL, depth = NULL, institutionCode = NULL, collectionCode = NULL, hasGeospatialIssue = NULL, issue = NULL, search = NULL, mediaType = NULL, subgenusKey = NULL, repatriated = NULL, phylumKey = NULL, kingdomKey = NULL, classKey = NULL, orderKey = NULL, familyKey = NULL, genusKey = NULL, speciesKey = NULL, establishmentMeans = NULL, degreeOfEstablishment = NULL, protocol = NULL, license = NULL, organismId = NULL, publishingOrg = NULL, stateProvince = NULL, waterBody = NULL, locality = NULL, occurrenceStatus = "PRESENT", gadmGid = NULL, coordinateUncertaintyInMeters = NULL, verbatimScientificName = NULL, eventId = NULL, identifiedBy = NULL, networkKey = NULL, verbatimTaxonId = NULL, occurrenceId = NULL, organismQuantity = NULL, organismQuantityType = NULL, relativeOrganismQuantity = NULL, iucnRedListCategory = NULL, lifeStage = NULL, isInCluster = NULL, distanceFromCentroidInMeters = NULL, geoDistance = NULL, sex = NULL, dwcaExtension = NULL, gbifId = NULL, gbifRegion = NULL, projectId = NULL, programme = NULL, preparations = NULL, datasetId = NULL, datasetName = NULL, publishedByGbifRegion = NULL, island = NULL, islandGroup = NULL, taxonId = NULL, taxonConceptId = NULL, taxonomicStatus = NULL, acceptedTaxonKey = NULL, collectionKey = NULL, institutionKey = NULL, otherCatalogNumbers = NULL, georeferencedBy = NULL, installationKey = NULL, hostingOrganizationKey = NULL, crawlId = NULL, modified = NULL, higherGeography = NULL, fieldNumber = NULL, parentEventId = NULL, samplingProtocol = NULL, sampleSizeUnit = NULL, pathway = NULL, gadmLevel0Gid = NULL, gadmLevel1Gid = NULL, gadmLevel2Gid = NULL, gadmLevel3Gid = NULL, earliestEonOrLowestEonothem = NULL, latestEonOrHighestEonothem = NULL, earliestEraOrLowestErathem = NULL, latestEraOrHighestErathem = NULL, earliestPeriodOrLowestSystem = NULL, latestPeriodOrHighestSystem = NULL, earliestEpochOrLowestSeries = NULL, latestEpochOrHighestSeries = NULL, earliestAgeOrLowestStage = NULL, latestAgeOrHighestStage = NULL, lowestBiostratigraphicZone = NULL, highestBiostratigraphicZone = NULL, group = NULL, formation = NULL, member = NULL, bed = NULL, associatedSequences = NULL, isSequenced = NULL, startDayOfYear = NULL, endDayOfYear = NULL, limit = 500, start = 0, fields = "all", return = NULL, facet = NULL, facetMincount = NULL, facetMultiselect = NULL, skip_validate = TRUE, curlopts = list(http_version = 2), ... )
occ_search( taxonKey = NULL, scientificName = NULL, country = NULL, publishingCountry = NULL, hasCoordinate = NULL, typeStatus = NULL, recordNumber = NULL, lastInterpreted = NULL, continent = NULL, geometry = NULL, geom_big = "asis", geom_size = 40, geom_n = 10, recordedBy = NULL, recordedByID = NULL, identifiedByID = NULL, basisOfRecord = NULL, datasetKey = NULL, eventDate = NULL, catalogNumber = NULL, year = NULL, month = NULL, decimalLatitude = NULL, decimalLongitude = NULL, elevation = NULL, depth = NULL, institutionCode = NULL, collectionCode = NULL, hasGeospatialIssue = NULL, issue = NULL, search = NULL, mediaType = NULL, subgenusKey = NULL, repatriated = NULL, phylumKey = NULL, kingdomKey = NULL, classKey = NULL, orderKey = NULL, familyKey = NULL, genusKey = NULL, speciesKey = NULL, establishmentMeans = NULL, degreeOfEstablishment = NULL, protocol = NULL, license = NULL, organismId = NULL, publishingOrg = NULL, stateProvince = NULL, waterBody = NULL, locality = NULL, occurrenceStatus = "PRESENT", gadmGid = NULL, coordinateUncertaintyInMeters = NULL, verbatimScientificName = NULL, eventId = NULL, identifiedBy = NULL, networkKey = NULL, verbatimTaxonId = NULL, occurrenceId = NULL, organismQuantity = NULL, organismQuantityType = NULL, relativeOrganismQuantity = NULL, iucnRedListCategory = NULL, lifeStage = NULL, isInCluster = NULL, distanceFromCentroidInMeters = NULL, geoDistance = NULL, sex = NULL, dwcaExtension = NULL, gbifId = NULL, gbifRegion = NULL, projectId = NULL, programme = NULL, preparations = NULL, datasetId = NULL, datasetName = NULL, publishedByGbifRegion = NULL, island = NULL, islandGroup = NULL, taxonId = NULL, taxonConceptId = NULL, taxonomicStatus = NULL, acceptedTaxonKey = NULL, collectionKey = NULL, institutionKey = NULL, otherCatalogNumbers = NULL, georeferencedBy = NULL, installationKey = NULL, hostingOrganizationKey = NULL, crawlId = NULL, modified = NULL, higherGeography = NULL, fieldNumber = NULL, parentEventId = NULL, samplingProtocol = NULL, sampleSizeUnit = NULL, pathway = NULL, gadmLevel0Gid = NULL, gadmLevel1Gid = NULL, gadmLevel2Gid = NULL, gadmLevel3Gid = NULL, earliestEonOrLowestEonothem = NULL, latestEonOrHighestEonothem = NULL, earliestEraOrLowestErathem = NULL, latestEraOrHighestErathem = NULL, earliestPeriodOrLowestSystem = NULL, latestPeriodOrHighestSystem = NULL, earliestEpochOrLowestSeries = NULL, latestEpochOrHighestSeries = NULL, earliestAgeOrLowestStage = NULL, latestAgeOrHighestStage = NULL, lowestBiostratigraphicZone = NULL, highestBiostratigraphicZone = NULL, group = NULL, formation = NULL, member = NULL, bed = NULL, associatedSequences = NULL, isSequenced = NULL, startDayOfYear = NULL, endDayOfYear = NULL, limit = 500, start = 0, fields = "all", return = NULL, facet = NULL, facetMincount = NULL, facetMultiselect = NULL, skip_validate = TRUE, curlopts = list(http_version = 2), ... )
taxonKey |
(numeric) A taxon key from the GBIF backbone. All included
and synonym taxa are included in the search, so a search for aves with
taxononKey=212 will match all birds, no matter which species. You can pass
many keys to |
scientificName |
A scientific name from the GBIF backbone. All included and synonym taxa are included in the search. |
country |
(character) The 2-letter country code (ISO-3166-1)
in which the occurrence was recorded. |
publishingCountry |
The 2-letter country code (as per ISO-3166-1) of
the country in which the occurrence was recorded. See
|
hasCoordinate |
(logical) Return only occurrence records with lat/long
data ( |
typeStatus |
Type status of the specimen. One of many options. |
recordNumber |
Number recorded by collector of the data, different from GBIF record number. |
lastInterpreted |
Date the record was last modified in GBIF, in ISO 8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, 'smaller,larger' (e.g., '1990,1991', whereas '1991,1990' wouldn't work). |
continent |
The source supplied continent.
Continent is not inferred but only populated if provided by the dataset publisher. Applying this filter may exclude many relevant records. |
geometry |
(character) Searches for occurrences inside a polygon in Well Known Text (WKT) format. A WKT shape written as either
For Example, "POLYGON((37.08 46.86,38.06 46.86,38.06 47.28,37.08 47.28, 37.0 46.8))". See also the section WKT below. |
geom_big |
(character) One"bbox" or "asis" (default). |
geom_size |
(integer) An integer indicating size of the cell. Default: 40. |
geom_n |
(integer) An integer indicating number of cells in each dimension. Default: 10. |
recordedBy |
(character) The person who recorded the occurrence. |
recordedByID |
(character) Identifier (e.g. ORCID) for the person who recorded the occurrence |
identifiedByID |
(character) Identifier (e.g. ORCID) for the person who provided the taxonomic identification of the occurrence. |
basisOfRecord |
(character) The specific nature of the data record. See here.
|
datasetKey |
(character) The occurrence dataset uuid key. That can be found in the dataset page url. For example, "7e380070-f762-11e1-a439-00145 eb45e9a" is the key for Natural History Museum (London) Collection Specimens. |
eventDate |
(character) Occurrence date in ISO 8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, 'smaller,larger' ('1990,1991', whereas '1991,1990' wouldn't work). |
catalogNumber |
(character) An identifier of any form assigned by the source within a physical collection or digital dataset for the record which may not unique, but should be fairly unique in combination with the institution and collection code. |
year |
The 4 digit year. A year of 98 will be interpreted as AD 98. Supports range queries, 'smaller,larger' (e.g., '1990,1991', whereas 1991, 1990' wouldn't work). |
month |
The month of the year, starting with 1 for January. Supports range queries, 'smaller,larger' (e.g., '1,2', whereas '2,1' wouldn't work). |
decimalLatitude |
Latitude in decimals between -90 and 90 based on WGS84. Supports range queries, 'smaller,larger' (e.g., '25,30', whereas '30,25' wouldn't work). |
decimalLongitude |
Longitude in decimals between -180 and 180 based on WGS84. Supports range queries (e.g., '-0.4,-0.2', whereas '-0.2,-0.4' wouldn't work). |
elevation |
Elevation in meters above sea level. Supports range queries, 'smaller,larger' (e.g., '5,30', whereas '30,5' wouldn't work). |
depth |
Depth in meters relative to elevation. For example 10 meters below a lake surface with given elevation. Supports range queries, 'smaller,larger' (e.g., '5,30', whereas '30,5' wouldn't work). |
institutionCode |
An identifier of any form assigned by the source to identify the institution the record belongs to. |
collectionCode |
(character) An identifier of any form assigned by the source to identify the physical collection or digital dataset uniquely within the text of an institution. |
hasGeospatialIssue |
(logical) Includes/excludes occurrence records
which contain spatial issues (as determined in our record interpretation),
i.e. |
issue |
(character) One or more of many possible issues with each occurrence record. Issues passed to this parameter filter results by the issue. One of many options. See here for definitions. |
search |
(character) Query terms. The value for this parameter can be a simple word or a phrase. For example, search="puma" |
mediaType |
(character) Media type of "MovingImage", "Sound", or "StillImage". |
subgenusKey |
(numeric) Subgenus classification key. |
repatriated |
(character) Searches for records whose publishing country is different to the country where the record was recorded in. |
phylumKey |
(numeric) Phylum classification key. |
kingdomKey |
(numeric) Kingdom classification key. |
classKey |
(numeric) Class classification key. |
orderKey |
(numeric) Order classification key. |
familyKey |
(numeric) Family classification key. |
genusKey |
(numeric) Genus classification key. |
speciesKey |
(numeric) Species classification key. |
establishmentMeans |
(character) provides information about whether an organism or organisms have been introduced to a given place and time through the direct or indirect activity of modern humans.
|
degreeOfEstablishment |
(character) Provides information about degree to which an Organism survives, reproduces, and expands its range at the given place and time. One of many options. |
protocol |
(character) Protocol or mechanism used to provide the occurrence record. One of many options. |
license |
(character) The type license applied to the dataset or record.
|
organismId |
(numeric) An identifier for the Organism instance (as opposed to a particular digital record of the Organism). May be a globally unique identifier or an identifier specific to the data set. |
publishingOrg |
(character) The publishing organization key (a UUID). |
stateProvince |
(character) The name of the next smaller administrative region than country (state, province, canton, department, region, etc.) in which the Location occurs. |
waterBody |
(character) The name of the water body in which the locations occur |
locality |
(character) The specific description of the place. |
occurrenceStatus |
(character) Default is "PRESENT". Specify whether search should return "PRESENT" or "ABSENT" data. |
gadmGid |
(character) The gadm id of the area occurrences are desired from. https://gadm.org/. |
coordinateUncertaintyInMeters |
A number or range between 0-1,000,000 which specifies the desired coordinate uncertainty. A coordinateUncertainty InMeters=1000 will be interpreted all records with exactly 1000m. Supports range queries, 'smaller,larger' (e.g., '1000,10000', whereas '10000,1000' wouldn't work). |
verbatimScientificName |
(character) Scientific name as provided by the source. |
eventId |
(character) identifier(s) for a sampling event. |
identifiedBy |
(character) names of people, groups, or organizations. |
networkKey |
(character) The occurrence network key (a uuid) who assigned the Taxon to the subject. |
verbatimTaxonId |
(character) The taxon identifier provided to GBIF by the data publisher. |
occurrenceId |
(character) occurrence id from source. |
organismQuantity |
A number or range which specifies the desired organism quantity. An organismQuantity=5 will be interpreted all records with exactly 5. Supports range queries, smaller,larger (e.g., '5,20', whereas '20,5' wouldn't work). |
organismQuantityType |
(character) The type of quantification system used for the quantity of organisms. For example, "individuals" or "biomass". |
relativeOrganismQuantity |
(numeric) A relativeOrganismQuantity=0.1 will be interpreted all records with exactly 0.1 The relative measurement of the quantity of the organism (a number between 0-1). Supports range queries, "smaller,larger" (e.g., '0.1,0.5', whereas '0.5,0.1' wouldn't work). |
iucnRedListCategory |
(character) The IUCN threat status category.
|
lifeStage |
(character) the life stage of the occurrence. One of many options. |
isInCluster |
(logical) identify potentially related records on GBIF. |
distanceFromCentroidInMeters |
A number or range. A value of "2000,*" means at least 2km from known centroids. A value of "0" would mean occurrences exactly on known centroids. A value of "0,2000" would mean within 2km of centroids. Max value is 5000. |
geoDistance |
(character) Filters to match occurrence records with coordinate values within a specified distance of a coordinate. Distance may be specified in kilometres (km) or metres (m). Example : "90,100,5km" |
sex |
(character) The sex of the biological individual(s) represented in the occurrence. |
dwcaExtension |
(character) A known Darwin Core Archive extension RowType. Limits the search to occurrences which have this extension, although they will not necessarily have any useful data recorded using the extension. |
gbifId |
(numeric) The unique GBIF key for a single occurrence. |
gbifRegion |
(character) Gbif region based on country code. |
projectId |
(character) The identifier for a project, which is often assigned by a funded programme. |
programme |
(character) A group of activities, often associated with a specific funding stream, such as the GBIF BID programme. |
preparations |
(character) Preparation or preservation method for a specimen. |
datasetId |
(character) The ID of the dataset. Parameter may be repeated. Example : https://doi.org/10.1594/PANGAEA.315492 |
datasetName |
(character) The exact name of the dataset. Not the same as dataset title. |
publishedByGbifRegion |
(character) GBIF region based on the owning organization's country. |
island |
(character) The name of the island on or near which the location occurs. |
islandGroup |
(character) The name of the island group in which the location occurs. |
taxonId |
(character) The taxon identifier provided to GBIF by the data publisher. Example : urn:lsid:dyntaxa.se:Taxon:103026 |
taxonConceptId |
(character) An identifier for the taxonomic concept to which the record refers - not for the nomenclatural details of a taxon. Example : 8fa58e08-08de-4ac1-b69c-1235340b7001 |
taxonomicStatus |
(character) A taxonomic status. Example : SYNONYM |
acceptedTaxonKey |
(numeric) A taxon key from the GBIF backbone. Only synonym taxa are included in the search, so a search for Aves with acceptedTaxonKey=212 will match occurrences identified as birds, but not any known family, genus or species of bird. |
collectionKey |
(character) A key (UUID) for a collection registered in the Global Registry of Scientific Collections. Example : dceb8d52-094c-4c2c-8960-75e0097c6861 |
institutionKey |
(character) A key (UUID) for an institution registered in the Global Registry of Scientific Collections. |
otherCatalogNumbers |
(character) Previous or alternate fully qualified catalog numbers. |
georeferencedBy |
(character) Name of a person, group, or organization who determined the georeference (spatial representation) for the location. Example : Brad Millen |
installationKey |
(character) The occurrence installation key (a UUID). Example : 17a83780-3060-4851-9d6f-029d5fcb81c9 |
hostingOrganizationKey |
(character) The key (UUID) of the publishing organization whose installation (server) hosts the original dataset. Example : fbca90e3-8aed-48b1-84e3-369afbd000ce |
crawlId |
(numeric) Crawl attempt that harvested this record. |
modified |
(character) The most recent date-time on which the occurrence was changed, according to the publisher. Can be a range. Example : 2023-02-20 |
higherGeography |
(character) Geographic name less specific than the information captured in the locality term. |
fieldNumber |
(character) An identifier given to the event in the field. Often serves as a link between field notes and the event. |
parentEventId |
(character) An identifier for the information associated with a sampling event. |
samplingProtocol |
(character) The name of, reference to, or description of the method or protocol used during a sampling event. Example : malaise trap |
sampleSizeUnit |
(character) The unit of measurement of the size (time duration, length, area, or volume) of a sample in a sampling event. Example : hectares |
pathway |
(character) The process by which an organism came to be in a given place at a given time, as defined in the GBIF Pathway vocabulary. Example : Agriculture |
gadmLevel0Gid |
(character) A GADM geographic identifier at the zero level, for example AGO. |
gadmLevel1Gid |
(character) A GADM geographic identifier at the first level, for example AGO.1_1. |
gadmLevel2Gid |
(character) A GADM geographic identifier at the second level, for example AFG.1.1_1. |
gadmLevel3Gid |
(character) A GADM geographic identifier at the third level, for example AFG.1.1.1_1. |
earliestEonOrLowestEonothem |
(character) geochronologic era term. |
latestEonOrHighestEonothem |
(character) geochronologic era term. |
earliestEraOrLowestErathem |
(character) geochronologic era term. |
latestEraOrHighestErathem |
(character) geochronologic era term. |
earliestPeriodOrLowestSystem |
(character) geochronologic era term. |
latestPeriodOrHighestSystem |
(character) geochronologic era term. |
earliestEpochOrLowestSeries |
(character) geochronologic era term. |
latestEpochOrHighestSeries |
(character) geochronologic era term. |
earliestAgeOrLowestStage |
(character) geochronologic era term. |
latestAgeOrHighestStage |
(character) geochronologic era term. |
lowestBiostratigraphicZone |
(character) geochronologic era term. |
highestBiostratigraphicZone |
(character) geochronologic era term. |
group |
(character) The full name of the lithostratigraphic group from which the material entity was collected. |
formation |
(character) The full name of the lithostratigraphic formation from which the material entity was collected. |
member |
(character) The full name of the lithostratigraphic member from which the material entity was collected. |
bed |
(character) The full name of the lithostratigraphic bed from which the material entity was collected. |
associatedSequences |
(character) Identifier (publication, global unique identifier, URI) of genetic sequence information associated with the material entity. Example : http://www.ncbi.nlm.nih.gov/nuccore/U34853.1 |
isSequenced |
(logical) Indicates whether |
startDayOfYear |
(numeric) The earliest integer day of the year on which the event occurred. |
endDayOfYear |
(numeric) The latest integer day of the year on which the event occurred. |
limit |
Number of records to return. Default: 500. Note that the per
request maximum is 300, but since we set it at 500 for the function, we
do two requests to get you the 500 records (if there are that many).
Note that there is a hard maximum of 100,000, which is calculated as the
|
start |
Record number to start at. Use in combination with limit to
page through results. Note that we do the paging internally for you, but
you can manually set the |
fields |
(character) Default ('all') returns all fields. 'minimal' returns just taxon name, key, datasetKey, latitude, and longitute. Or specify each field you want returned by name, e.g. fields = c('name','latitude','elevation'). |
return |
Defunct. All components (meta, hierarchy, data, media,
facets) are returned now; index to the one(s) you want. See |
facet |
(character) a character vector of length 1 or greater. Required. |
facetMincount |
(numeric) minimum number of records to be included in the faceting results |
facetMultiselect |
(logical) Set to Faceting: All fields can be faceted on except for last "lastInterpreted", "eventDate", and "geometry" You can do facet searches alongside searching occurrence data, and return both, or only return facets, or only occurrence data, etc. |
skip_validate |
(logical) whether to skip |
curlopts |
list of named curl options passed on to
|
... |
additional facet parameters |
An object of class gbif
, which is a S3 class list, with
slots for metadata (meta
), the occurrence data itself (data
),
the taxonomic hierarchy data (hier
), and media metadata
(media
).
In addition, the object has attributes listing the user supplied arguments
and whether it was a 'single' or 'many' search; that is, if you supply two
values of the datasetKey
parameter to searches are done, and it's a
'many'. meta
is a list of length four with offset, limit,
endOfRecords and count fields. data
is a tibble (aka data.frame). hier
is a list of data.frames of the unique set of taxa found, where each
data.frame is its taxonomic classification. media
is a list of media
objects, where each element holds a set of metadata about the media object.
There are some parameters you can pass multiple values to in a vector,
each value of which produces a different request (multiple different
requests = c("a","b")). Some parameters allow multiple values to be passed
in the same request (multiple same request = "a;b") in a semicolon separated
string (e.g., 'a;b'); if given we'll do a single request with that parameter
repeated for each value given (e.g., foo=a&foo=b
if the parameter
is foo
).
See article Multiple Values.
Hierarchies are returned with each occurrence object. There is no
option to return them from the API. However, within the occ_search
function you can select whether to return just hierarchies, just data, all
of data and hierarchies and metadata, or just metadata. If all hierarchies
are the same we just return one for you.
You can pass parameters not defined in this function into the call to
the GBIF API to control things about the call itself using curlopts
.
See an example below that passes in the verbose
function to get
details on the http call.
Examples of valid WKT objects:
'POLYGON((-19.5 34.1, 27.8 34.1, 35.9 68.1, -25.3 68.1, -19.5 34.1))'
'MULTIPOLYGON(((-123 38,-116 38,-116 43,-123 43,-123 38)),((-97 41,-93 41,-93 45,-97 45,-97 41)))'
'POINT(-120 40)'
'LINESTRING(3 4,10 50,20 25)'
Note that GBIF expects counter-clockwise winding order for WKT. You can
supply clockwise WKT, but GBIF treats it as an exclusion, so you get all
data not inside the WKT area. occ_download()
behaves differently
in that you should simply get no data back at all with clockwise WKT.
Options for handling long WKT strings:
Note that long WKT strings are specially handled when using occ_search
or
occ_data
. Here are the three options for long WKT strings (> 1500 characters),
set one of these three via the parameter geom_big
:
asis - the default setting. This means we don't do anything internally. That is, we just pass on your WKT string just as we've done before in this package.
axe - this option is deprecated since rgbif v3.8.0. Might return error, since the GBIF's polygon interpretation has changed.
This method uses sf::st_make_grid
and sf::st_intersection
, which has
two parameters cellsize
and n
. You can tweak those parameters here by
tweaking geom_size
and geom_n
. geom_size
seems to be more useful in
toggling the number of WKT strings you get back.
See wkt_parse
to manually break make WKT bounding box from a larger WKT
string, or break a larger WKT string into many smaller ones.
bbox - this option checks whether your WKT string is longer than 1500 characters,
and if it is we create a bounding box from the WKT, do the GBIF search with that
bounding box, then prune the resulting data to only those occurrences in your original
WKT string. There is a big caveat however. Because we create a bounding box from the WKT,
and the limit
parameter determines some subset of records to get, then when we
prune the resulting data to the WKT, the number of records you get could be less than
what you set with your limit
parameter. However, you could set the limit to be
high enough so that you get all records back found in that bounding box, then you'll
get all the records available within the WKT.
There is a slight difference in the way records are counted here vs.
results from occ_count
. For equivalent outcomes, in this
function use hasCoordinate=TRUE
, and hasGeospatialIssue=FALSE
to have the same outcome using occ_count
with
isGeoreferenced=TRUE
Maximum number of records you can get with this function is 100,000. See https://www.gbif.org/developer/occurrence
https://www.gbif.org/developer/occurrence#search
## Not run: # Search by species name, using \code{\link{name_backbone}} first to get key (key <- name_suggest(q='Helianthus annuus', rank='species')$data$key[1]) occ_search(taxonKey=key, limit=2) # Return 20 results, this is the default by the way occ_search(taxonKey=key, limit=20) # Get just metadata occ_search(taxonKey=key, limit=0)$meta # Instead of getting a taxon key first, you can search for a name directly ## However, note that using this approach (with \code{scientificName="..."}) ## you are getting synonyms too. The results for using \code{scientifcName} and ## \code{taxonKey} parameters are the same in this case, but I wouldn't be surprised if for some ## names they return different results occ_search(scientificName = 'Ursus americanus') key <- name_backbone(name = 'Ursus americanus', rank='species')$usageKey occ_search(taxonKey = key) # Search by dataset key occ_search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', limit=20)$data # Search by catalog number occ_search(catalogNumber="49366", limit=20) ## separate requests: use a vector of strings occ_search(catalogNumber=c("49366","Bird.27847588"), limit=10) ## one request, many instances of same parameter: use semi-colon sep. string occ_search(catalogNumber="49366;Bird.27847588", limit=10) # Get all data, not just lat/long and name occ_search(taxonKey=key, fields='all', limit=20) # Or get specific fields. Note that this isn't done on GBIF's side of things. This # is done in R, but before you get the return object, so other fields are garbage # collected occ_search(taxonKey=key, fields=c('name','basisOfRecord','protocol'), limit=20) # Use paging parameters (limit and start) to page. Note the different results # for the two queries below. occ_search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a',start=10,limit=5)$data occ_search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a',start=20,limit=5)$data # Many dataset keys ## separate requests: use a vector of strings occ_search(datasetKey=c("50c9509d-22c7-4a22-a47d-8c48425ef4a7", "7b5d6a48-f762-11e1-a439-00145eb45e9a"), limit=20) ## one request, many instances of same parameter: use semi-colon sep. string v="50c9509d-22c7-4a22-a47d-8c48425ef4a7;7b5d6a48-f762-11e1-a439-00145eb45e9a" occ_search(datasetKey = v, limit=20) # Occurrence data: lat/long data, and associated metadata with occurrences ## The `data` slot has a data.frame of all data together ## for easy manipulation occ_search(taxonKey=key, limit=20)$data # Taxonomic hierarchy data ## In the `hier` slot occ_search(taxonKey=key, limit=10)$hier # Search by recorder occ_search(recordedBy="smith", limit=20) # Many collector names occ_search(recordedBy=c("smith","BJ Stacey"), limit=20) # recordedByID occ_search(recordedByID="https://orcid.org/0000-0003-1691-239X", limit=20) # identifiedByID occ_search(identifiedByID="https://orcid.org/0000-0003-4710-2648", limit=20) # Pass in curl options for extra fun occ_search(taxonKey=2433407, limit=20, curlopts=list(verbose=TRUE))$hier occ_search(taxonKey=2433407, limit=20, curlopts = list( noprogress = FALSE, progressfunction = function(down, up) { cat(sprintf("up: %d | down %d\n", up, down)) return(TRUE) } ) )$hier # occ_search(taxonKey=2433407, limit=20, # curlopts = list(timeout_ms = 1)) # Search for many species splist <- c('Cyanocitta stelleri', 'Junco hyemalis', 'Aix sponsa') keys <- sapply(splist, function(x) name_suggest(x)$data$key[1], USE.NAMES=FALSE) ## separate requests: use a vector of strings occ_search(taxonKey = keys, limit=5) ## one request, many instances of same parameter: use semi-colon sep. string occ_search(taxonKey = paste0(keys, collapse = ";"), limit=5) # Search using a synonym name # Note that you'll see a message printing out that the accepted name will be used occ_search(scientificName = 'Pulsatilla patens', fields = c('name','scientificName'), limit=5) # Search on latitidue and longitude occ_search(decimalLatitude=48, decimalLongitude=10) # Search on a bounding box ## in well known text format ### polygon occ_search(geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit=20) ### multipolygon wkt <- 'MULTIPOLYGON(((-123 38,-116 38,-116 43,-123 43,-123 38)), ((-97 41,-93 41,-93 45,-97 45,-97 41)))' occ_search(geometry = gsub("\n\\s+", "", wkt), limit = 20) ## taxonKey + WKT key <- name_suggest(q='Aesculus hippocastanum')$data$key[1] occ_search(taxonKey=key, geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit=20) ## or using bounding box, converted to WKT internally occ_search(geometry=c(-125.0,38.4,-121.8,40.9), limit=20) # Search on a long WKT string - too long for a GBIF search API request ## We internally convert your WKT string to a bounding box ## then do the query ## then clip the results down to just those in the original polygon ## - Alternatively, you can set the parameter `geom_big="bbox"` ## - An additional alternative is to use the GBIF download API, see ?downloads wkt <- "POLYGON((-9.178796777343678 53.22769021556159, -12.167078027343678 51.56540789297837, -12.958093652343678 49.78333685689162,-11.024499902343678 49.21251756301334, -12.079187402343678 46.68179685941719,-15.067468652343678 45.83103608186854, -15.770593652343678 43.58271629699817,-15.067468652343678 41.57676278827219, -11.815515527343678 40.44938999172728,-12.958093652343678 37.72112962230871, -11.639734277343678 36.52987439429357,-8.299890527343678 34.96062625095747, -8.739343652343678 32.62357394385735,-5.223718652343678 30.90497915232165, 1.1044063476563224 31.80562077746643,1.1044063476563224 30.754036557416256, 6.905187597656322 32.02942785462211,5.147375097656322 32.99292810780193, 9.629796972656322 34.164474406524725,10.860265722656322 32.91918014319603, 14.551671972656322 33.72700959356651,13.409093847656322 34.888564192275204, 16.748937597656322 35.104560368110114,19.561437597656322 34.81643887792552, 18.594640722656322 36.38849705969625,22.989171972656322 37.162874858929854, 19.825109472656322 39.50651757842751,13.760656347656322 38.89353140585116, 14.112218847656322 42.36091601976124,10.596593847656322 41.11488736647705, 9.366125097656322 43.70991402658437,5.059484472656322 42.62015372417812, 2.3348750976563224 45.21526500321446,-0.7412967773436776 46.80225692528942, 6.114171972656322 47.102229890207894,8.047765722656322 45.52399303437107, 12.881750097656322 48.22681126957933,9.190343847656322 48.693079457106684, 8.750890722656322 50.68283120621287,5.059484472656322 50.40356146487845, 4.268468847656322 52.377558897655156,1.4559688476563224 53.28027243658647, 0.8407344726563224 51.62000971578333,0.5770625976563224 49.32721423860726, -2.5869999023436776 49.49875947592088,-2.4991092773436776 51.18135535408638, -2.0596561523436776 52.53822562473851,-4.696374902343678 51.67454591918756, -5.311609277343678 50.009802108095776,-6.629968652343678 48.75106196817059, -7.684656152343678 50.12263634382465,-6.190515527343678 51.83776110910459, -5.047937402343678 54.267098895684235,-6.893640527343678 53.69860705549198, -8.915124902343678 54.77719740243195,-12.079187402343678 54.52294465763567, -13.573328027343678 53.437631551347174, -11.288171777343678 53.48995552517918, -9.178796777343678 53.22769021556159))" wkt <- gsub("\n", " ", wkt) #### Default option with large WKT string fails # res <- occ_search(geometry = wkt) #### if WKT too long, with 'geom_big=bbox': makes into bounding box res <- occ_search(geometry = wkt, geom_big = "bbox")$data # Search on country occ_search(country='US', fields=c('name','country'), limit=20) occ_search(country='FR', fields=c('name','country'), limit=20) occ_search(country='DE', fields=c('name','country'), limit=20) ### separate requests: use a vector of strings occ_search(country=c('US','DE'), limit=20) ### one request, many instances of same parameter: use semi-colon sep. string occ_search(country = 'US;DE', limit=20) # Get only occurrences with lat/long data occ_search(taxonKey=key, hasCoordinate=TRUE, limit=20) # Get only occurrences that were recorded as living specimens occ_search(taxonKey=key, basisOfRecord="LIVING_SPECIMEN", hasCoordinate=TRUE, limit=20) ## multiple values in a vector = a separate request for each value occ_search(taxonKey=key, basisOfRecord=c("LIVING_SPECIMEN", "HUMAN_OBSERVATION"), limit=20) ## mutiple values in a single string, ";" separated = one request including all values occ_search(taxonKey=key, basisOfRecord="LIVING_SPECIMEN;HUMAN_OBSERVATION", limit=20) # Get occurrences for a particular eventDate occ_search(taxonKey=key, eventDate="2013", limit=20) occ_search(taxonKey=key, year="2013", limit=20) occ_search(taxonKey=key, month="6", limit=20) # Get occurrences based on depth key <- name_backbone(name='Salmo salar', kingdom='animals')$speciesKey occ_search(taxonKey=key, depth="5", limit=20) # Get occurrences based on elevation key <- name_backbone(name='Puma concolor', kingdom='animals')$speciesKey occ_search(taxonKey=key, elevation=50, hasCoordinate=TRUE, limit=20) # Get occurrences based on institutionCode occ_search(institutionCode="TLMF", limit=20) ### separate requests: use a vector of strings occ_search(institutionCode=c("TLMF","ArtDatabanken"), limit=20) ### one request, many instances of same parameter: use semi-colon sep. string occ_search(institutionCode = "TLMF;ArtDatabanken", limit=20) # Get occurrences based on collectionCode occ_search(collectionCode="Floristic Databases MV - Higher Plants", limit=20) occ_search(collectionCode=c("Floristic Databases MV - Higher Plants","Artport")) # Get only those occurrences with spatial issues occ_search(taxonKey=key, hasGeospatialIssue=TRUE, limit=20) # Search using a query string occ_search(search = "kingfisher", limit=20) # search on repatriated - doesn't work right now # occ_search(repatriated = "") # search on phylumKey occ_search(phylumKey = 7707728, limit = 5) # search on kingdomKey occ_search(kingdomKey = 1, limit = 5) # search on classKey occ_search(classKey = 216, limit = 5) # search on orderKey occ_search(orderKey = 7192402, limit = 5) # search on familyKey occ_search(familyKey = 3925, limit = 5) # search on genusKey occ_search(genusKey = 1935496, limit = 5) # search on establishmentMeans occ_search(establishmentMeans = "INVASIVE", limit = 5) occ_search(establishmentMeans = "NATIVE", limit = 5) occ_search(establishmentMeans = "UNCERTAIN", limit = 5) # search on protocol occ_search(protocol = "DIGIR", limit = 5) # search on license occ_search(license = "CC_BY_4_0", limit = 5) # search on organismId occ_search(organismId = "100", limit = 5) # search on publishingOrg occ_search(publishingOrg = "28eb1a3f-1c15-4a95-931a-4af90ecb574d", limit = 5) # search on stateProvince occ_search(stateProvince = "California", limit = 5) # search on waterBody occ_search(waterBody = "AMAZONAS BASIN, RIO JURUA", limit = 5) # search on locality res <- occ_search(locality = c("Trondheim", "Hovekilen"), limit = 5) res$Trondheim$data res$Hovekilen$data # Range queries ## See Detail for parameters that support range queries occ_search(depth='50,100') # this is a range depth, with lower/upper limits in character string occ_search(depth=c(50,100)) # this is not a range search, but does two searches for each depth ## Range search with year occ_search(year='1999,2000', limit=20) ## Range search with latitude occ_search(decimalLatitude='29.59,29.6') ## Range search with distanceFromCentroidInMeters occ_search(distanceFromCentroidInMeters = "2000,*") # at least 2km from centroids occ_search(distanceFromCentroidInMeters = "0,2000") # close to centroids within 2km occ_search(distanceFromCentroidInMeters = 0) # exactly on centroids # Search by specimen type status ## Look for possible values of the typeStatus parameter looking at the typestatus dataset occ_search(typeStatus = 'allotype', fields = c('name','typeStatus')) # Search by specimen record number ## This is the record number of the person/group that submitted the data, not GBIF's numbers ## You can see that many different groups have record number 1, so not super helpful occ_search(recordNumber = 1, fields = c('name','recordNumber','recordedBy')) # Search by last time interpreted: Date the record was last modified in GBIF ## The lastInterpreted parameter accepts ISO 8601 format dates, including ## yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Range queries are accepted for lastInterpreted occ_search(lastInterpreted = '2014-04-02', fields = c('name','lastInterpreted')) # Search by continent ## One of africa, antarctica, asia, europe, north_america, oceania, or south_america occ_search(continent = 'south_america')$meta occ_search(continent = 'africa')$meta occ_search(continent = 'oceania')$meta occ_search(continent = 'antarctica')$meta # Search for occurrences with images occ_search(mediaType = 'StillImage')$media occ_search(mediaType = 'MovingImage')$media occ_search(mediaType = 'Sound')$media # Query based on issues - see Details for options ## one issue occ_search(taxonKey=1, issue='DEPTH_UNLIKELY', fields = c('name','key','decimalLatitude','decimalLongitude','depth')) ## two issues occ_search(taxonKey=1, issue=c('DEPTH_UNLIKELY','COORDINATE_ROUNDED')) # Show all records in the Arizona State Lichen Collection that cant be matched to the GBIF # backbone properly: occ_search(datasetKey='84c0e1a0-f762-11e1-a439-00145eb45e9a', issue=c('TAXON_MATCH_NONE','TAXON_MATCH_HIGHERRANK')) # Parsing output by issue (res <- occ_search(geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit = 50)) ## what do issues mean, can print whole table, or search for matches head(gbif_issues()) gbif_issues()[ gbif_issues()$code %in% c('cdround','cudc','gass84','txmathi'), ] ## or parse issues in various ways ### remove data rows with certain issue classes library('magrittr') res %>% occ_issues(gass84) ### split issues into separate columns res %>% occ_issues(mutate = "split") ### expand issues to more descriptive names res %>% occ_issues(mutate = "expand") ### split and expand res %>% occ_issues(mutate = "split_expand") ### split, expand, and remove an issue class res %>% occ_issues(-cudc, mutate = "split_expand") # If you try multiple values for two different parameters you are wacked on the hand # occ_search(taxonKey=c(2482598,2492010), recordedBy=c("smith","BJ Stacey")) # Get a lot of data, here 1500 records for Helianthus annuus # out <- occ_search(taxonKey=key, limit=1500) # nrow(out$data) # If you pass in an invalid polygon you get hopefully informative errors ### the WKT string is fine, but GBIF says bad polygon wkt <- 'POLYGON((-178.59375 64.83258989321493,-165.9375 59.24622380205539, -147.3046875 59.065977905449806,-130.78125 51.04484764446178,-125.859375 36.70806354647625, -112.1484375 23.367471303759686,-105.1171875 16.093320185359257,-86.8359375 9.23767076398516, -82.96875 2.9485268155066175,-82.6171875 -14.812060061226388,-74.8828125 -18.849111862023985, -77.34375 -47.661687803329166,-84.375 -49.975955187343295,174.7265625 -50.649460483096114, 179.296875 -42.19189902447192,-176.8359375 -35.634976650677295,176.8359375 -31.835565983656227, 163.4765625 -6.528187613695323,152.578125 1.894796132058301,135.703125 4.702353722559447, 127.96875 15.077427674847987,127.96875 23.689804541429606,139.921875 32.06861069132688, 149.4140625 42.65416193033991,159.2578125 48.3160811030533,168.3984375 57.019804336633165, 178.2421875 59.95776046458139,-179.6484375 61.16708631440347,-178.59375 64.83258989321493))' # occ_search(geometry = gsub("\n", '', wkt)) ### unable to parse due to last number pair needing two numbers, not one # wkt <- 'POLYGON((-178.5 64.8,-165.9 59.2,-147.3 59.0,-130.7 51.0,-125.8))' # occ_search(geometry = wkt) ### unable to parse due to unclosed string # wkt <- 'POLYGON((-178.5 64.8,-165.9 59.2,-147.3 59.0,-130.7 51.0))' # occ_search(geometry = wkt) ### another of the same # wkt <- 'POLYGON((-178.5 64.8,-165.9 59.2,-147.3 59.0,-130.7 51.0,-125.8 36.7))' # occ_search(geometry = wkt) ### returns no results # wkt <- 'LINESTRING(3 4,10 50,20 25)' # occ_search(geometry = wkt) ### Apparently a point is allowed, but errors # wkt <- 'POINT(45 -122)' # occ_search(geometry = wkt) ## Faceting x <- occ_search(facet = "country", limit = 0) x$facets x <- occ_search(facet = "establishmentMeans", limit = 10) x$facets x$data x <- occ_search(facet = c("country", "basisOfRecord"), limit = 10) x$data x$facets x$facets$country x$facets$basisOfRecord x$facets$basisOfRecord$count x <- occ_search(facet = "country", facetMincount = 30000000L, limit = 10) x$facets x$data # paging per each faceted variable (x <- occ_search( facet = c("country", "basisOfRecord", "hasCoordinate"), country.facetLimit = 3, basisOfRecord.facetLimit = 6, limit = 0 )) x$facets # You can set limit=0 to get number of results found occ_search(datasetKey = '7b5d6a48-f762-11e1-a439-00145eb45e9a', limit = 0)$meta occ_search(scientificName = 'Ursus americanus', limit = 0)$meta occ_search(scientificName = 'Ursus americanus', limit = 0)$meta ## End(Not run)
## Not run: # Search by species name, using \code{\link{name_backbone}} first to get key (key <- name_suggest(q='Helianthus annuus', rank='species')$data$key[1]) occ_search(taxonKey=key, limit=2) # Return 20 results, this is the default by the way occ_search(taxonKey=key, limit=20) # Get just metadata occ_search(taxonKey=key, limit=0)$meta # Instead of getting a taxon key first, you can search for a name directly ## However, note that using this approach (with \code{scientificName="..."}) ## you are getting synonyms too. The results for using \code{scientifcName} and ## \code{taxonKey} parameters are the same in this case, but I wouldn't be surprised if for some ## names they return different results occ_search(scientificName = 'Ursus americanus') key <- name_backbone(name = 'Ursus americanus', rank='species')$usageKey occ_search(taxonKey = key) # Search by dataset key occ_search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', limit=20)$data # Search by catalog number occ_search(catalogNumber="49366", limit=20) ## separate requests: use a vector of strings occ_search(catalogNumber=c("49366","Bird.27847588"), limit=10) ## one request, many instances of same parameter: use semi-colon sep. string occ_search(catalogNumber="49366;Bird.27847588", limit=10) # Get all data, not just lat/long and name occ_search(taxonKey=key, fields='all', limit=20) # Or get specific fields. Note that this isn't done on GBIF's side of things. This # is done in R, but before you get the return object, so other fields are garbage # collected occ_search(taxonKey=key, fields=c('name','basisOfRecord','protocol'), limit=20) # Use paging parameters (limit and start) to page. Note the different results # for the two queries below. occ_search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a',start=10,limit=5)$data occ_search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a',start=20,limit=5)$data # Many dataset keys ## separate requests: use a vector of strings occ_search(datasetKey=c("50c9509d-22c7-4a22-a47d-8c48425ef4a7", "7b5d6a48-f762-11e1-a439-00145eb45e9a"), limit=20) ## one request, many instances of same parameter: use semi-colon sep. string v="50c9509d-22c7-4a22-a47d-8c48425ef4a7;7b5d6a48-f762-11e1-a439-00145eb45e9a" occ_search(datasetKey = v, limit=20) # Occurrence data: lat/long data, and associated metadata with occurrences ## The `data` slot has a data.frame of all data together ## for easy manipulation occ_search(taxonKey=key, limit=20)$data # Taxonomic hierarchy data ## In the `hier` slot occ_search(taxonKey=key, limit=10)$hier # Search by recorder occ_search(recordedBy="smith", limit=20) # Many collector names occ_search(recordedBy=c("smith","BJ Stacey"), limit=20) # recordedByID occ_search(recordedByID="https://orcid.org/0000-0003-1691-239X", limit=20) # identifiedByID occ_search(identifiedByID="https://orcid.org/0000-0003-4710-2648", limit=20) # Pass in curl options for extra fun occ_search(taxonKey=2433407, limit=20, curlopts=list(verbose=TRUE))$hier occ_search(taxonKey=2433407, limit=20, curlopts = list( noprogress = FALSE, progressfunction = function(down, up) { cat(sprintf("up: %d | down %d\n", up, down)) return(TRUE) } ) )$hier # occ_search(taxonKey=2433407, limit=20, # curlopts = list(timeout_ms = 1)) # Search for many species splist <- c('Cyanocitta stelleri', 'Junco hyemalis', 'Aix sponsa') keys <- sapply(splist, function(x) name_suggest(x)$data$key[1], USE.NAMES=FALSE) ## separate requests: use a vector of strings occ_search(taxonKey = keys, limit=5) ## one request, many instances of same parameter: use semi-colon sep. string occ_search(taxonKey = paste0(keys, collapse = ";"), limit=5) # Search using a synonym name # Note that you'll see a message printing out that the accepted name will be used occ_search(scientificName = 'Pulsatilla patens', fields = c('name','scientificName'), limit=5) # Search on latitidue and longitude occ_search(decimalLatitude=48, decimalLongitude=10) # Search on a bounding box ## in well known text format ### polygon occ_search(geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit=20) ### multipolygon wkt <- 'MULTIPOLYGON(((-123 38,-116 38,-116 43,-123 43,-123 38)), ((-97 41,-93 41,-93 45,-97 45,-97 41)))' occ_search(geometry = gsub("\n\\s+", "", wkt), limit = 20) ## taxonKey + WKT key <- name_suggest(q='Aesculus hippocastanum')$data$key[1] occ_search(taxonKey=key, geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit=20) ## or using bounding box, converted to WKT internally occ_search(geometry=c(-125.0,38.4,-121.8,40.9), limit=20) # Search on a long WKT string - too long for a GBIF search API request ## We internally convert your WKT string to a bounding box ## then do the query ## then clip the results down to just those in the original polygon ## - Alternatively, you can set the parameter `geom_big="bbox"` ## - An additional alternative is to use the GBIF download API, see ?downloads wkt <- "POLYGON((-9.178796777343678 53.22769021556159, -12.167078027343678 51.56540789297837, -12.958093652343678 49.78333685689162,-11.024499902343678 49.21251756301334, -12.079187402343678 46.68179685941719,-15.067468652343678 45.83103608186854, -15.770593652343678 43.58271629699817,-15.067468652343678 41.57676278827219, -11.815515527343678 40.44938999172728,-12.958093652343678 37.72112962230871, -11.639734277343678 36.52987439429357,-8.299890527343678 34.96062625095747, -8.739343652343678 32.62357394385735,-5.223718652343678 30.90497915232165, 1.1044063476563224 31.80562077746643,1.1044063476563224 30.754036557416256, 6.905187597656322 32.02942785462211,5.147375097656322 32.99292810780193, 9.629796972656322 34.164474406524725,10.860265722656322 32.91918014319603, 14.551671972656322 33.72700959356651,13.409093847656322 34.888564192275204, 16.748937597656322 35.104560368110114,19.561437597656322 34.81643887792552, 18.594640722656322 36.38849705969625,22.989171972656322 37.162874858929854, 19.825109472656322 39.50651757842751,13.760656347656322 38.89353140585116, 14.112218847656322 42.36091601976124,10.596593847656322 41.11488736647705, 9.366125097656322 43.70991402658437,5.059484472656322 42.62015372417812, 2.3348750976563224 45.21526500321446,-0.7412967773436776 46.80225692528942, 6.114171972656322 47.102229890207894,8.047765722656322 45.52399303437107, 12.881750097656322 48.22681126957933,9.190343847656322 48.693079457106684, 8.750890722656322 50.68283120621287,5.059484472656322 50.40356146487845, 4.268468847656322 52.377558897655156,1.4559688476563224 53.28027243658647, 0.8407344726563224 51.62000971578333,0.5770625976563224 49.32721423860726, -2.5869999023436776 49.49875947592088,-2.4991092773436776 51.18135535408638, -2.0596561523436776 52.53822562473851,-4.696374902343678 51.67454591918756, -5.311609277343678 50.009802108095776,-6.629968652343678 48.75106196817059, -7.684656152343678 50.12263634382465,-6.190515527343678 51.83776110910459, -5.047937402343678 54.267098895684235,-6.893640527343678 53.69860705549198, -8.915124902343678 54.77719740243195,-12.079187402343678 54.52294465763567, -13.573328027343678 53.437631551347174, -11.288171777343678 53.48995552517918, -9.178796777343678 53.22769021556159))" wkt <- gsub("\n", " ", wkt) #### Default option with large WKT string fails # res <- occ_search(geometry = wkt) #### if WKT too long, with 'geom_big=bbox': makes into bounding box res <- occ_search(geometry = wkt, geom_big = "bbox")$data # Search on country occ_search(country='US', fields=c('name','country'), limit=20) occ_search(country='FR', fields=c('name','country'), limit=20) occ_search(country='DE', fields=c('name','country'), limit=20) ### separate requests: use a vector of strings occ_search(country=c('US','DE'), limit=20) ### one request, many instances of same parameter: use semi-colon sep. string occ_search(country = 'US;DE', limit=20) # Get only occurrences with lat/long data occ_search(taxonKey=key, hasCoordinate=TRUE, limit=20) # Get only occurrences that were recorded as living specimens occ_search(taxonKey=key, basisOfRecord="LIVING_SPECIMEN", hasCoordinate=TRUE, limit=20) ## multiple values in a vector = a separate request for each value occ_search(taxonKey=key, basisOfRecord=c("LIVING_SPECIMEN", "HUMAN_OBSERVATION"), limit=20) ## mutiple values in a single string, ";" separated = one request including all values occ_search(taxonKey=key, basisOfRecord="LIVING_SPECIMEN;HUMAN_OBSERVATION", limit=20) # Get occurrences for a particular eventDate occ_search(taxonKey=key, eventDate="2013", limit=20) occ_search(taxonKey=key, year="2013", limit=20) occ_search(taxonKey=key, month="6", limit=20) # Get occurrences based on depth key <- name_backbone(name='Salmo salar', kingdom='animals')$speciesKey occ_search(taxonKey=key, depth="5", limit=20) # Get occurrences based on elevation key <- name_backbone(name='Puma concolor', kingdom='animals')$speciesKey occ_search(taxonKey=key, elevation=50, hasCoordinate=TRUE, limit=20) # Get occurrences based on institutionCode occ_search(institutionCode="TLMF", limit=20) ### separate requests: use a vector of strings occ_search(institutionCode=c("TLMF","ArtDatabanken"), limit=20) ### one request, many instances of same parameter: use semi-colon sep. string occ_search(institutionCode = "TLMF;ArtDatabanken", limit=20) # Get occurrences based on collectionCode occ_search(collectionCode="Floristic Databases MV - Higher Plants", limit=20) occ_search(collectionCode=c("Floristic Databases MV - Higher Plants","Artport")) # Get only those occurrences with spatial issues occ_search(taxonKey=key, hasGeospatialIssue=TRUE, limit=20) # Search using a query string occ_search(search = "kingfisher", limit=20) # search on repatriated - doesn't work right now # occ_search(repatriated = "") # search on phylumKey occ_search(phylumKey = 7707728, limit = 5) # search on kingdomKey occ_search(kingdomKey = 1, limit = 5) # search on classKey occ_search(classKey = 216, limit = 5) # search on orderKey occ_search(orderKey = 7192402, limit = 5) # search on familyKey occ_search(familyKey = 3925, limit = 5) # search on genusKey occ_search(genusKey = 1935496, limit = 5) # search on establishmentMeans occ_search(establishmentMeans = "INVASIVE", limit = 5) occ_search(establishmentMeans = "NATIVE", limit = 5) occ_search(establishmentMeans = "UNCERTAIN", limit = 5) # search on protocol occ_search(protocol = "DIGIR", limit = 5) # search on license occ_search(license = "CC_BY_4_0", limit = 5) # search on organismId occ_search(organismId = "100", limit = 5) # search on publishingOrg occ_search(publishingOrg = "28eb1a3f-1c15-4a95-931a-4af90ecb574d", limit = 5) # search on stateProvince occ_search(stateProvince = "California", limit = 5) # search on waterBody occ_search(waterBody = "AMAZONAS BASIN, RIO JURUA", limit = 5) # search on locality res <- occ_search(locality = c("Trondheim", "Hovekilen"), limit = 5) res$Trondheim$data res$Hovekilen$data # Range queries ## See Detail for parameters that support range queries occ_search(depth='50,100') # this is a range depth, with lower/upper limits in character string occ_search(depth=c(50,100)) # this is not a range search, but does two searches for each depth ## Range search with year occ_search(year='1999,2000', limit=20) ## Range search with latitude occ_search(decimalLatitude='29.59,29.6') ## Range search with distanceFromCentroidInMeters occ_search(distanceFromCentroidInMeters = "2000,*") # at least 2km from centroids occ_search(distanceFromCentroidInMeters = "0,2000") # close to centroids within 2km occ_search(distanceFromCentroidInMeters = 0) # exactly on centroids # Search by specimen type status ## Look for possible values of the typeStatus parameter looking at the typestatus dataset occ_search(typeStatus = 'allotype', fields = c('name','typeStatus')) # Search by specimen record number ## This is the record number of the person/group that submitted the data, not GBIF's numbers ## You can see that many different groups have record number 1, so not super helpful occ_search(recordNumber = 1, fields = c('name','recordNumber','recordedBy')) # Search by last time interpreted: Date the record was last modified in GBIF ## The lastInterpreted parameter accepts ISO 8601 format dates, including ## yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Range queries are accepted for lastInterpreted occ_search(lastInterpreted = '2014-04-02', fields = c('name','lastInterpreted')) # Search by continent ## One of africa, antarctica, asia, europe, north_america, oceania, or south_america occ_search(continent = 'south_america')$meta occ_search(continent = 'africa')$meta occ_search(continent = 'oceania')$meta occ_search(continent = 'antarctica')$meta # Search for occurrences with images occ_search(mediaType = 'StillImage')$media occ_search(mediaType = 'MovingImage')$media occ_search(mediaType = 'Sound')$media # Query based on issues - see Details for options ## one issue occ_search(taxonKey=1, issue='DEPTH_UNLIKELY', fields = c('name','key','decimalLatitude','decimalLongitude','depth')) ## two issues occ_search(taxonKey=1, issue=c('DEPTH_UNLIKELY','COORDINATE_ROUNDED')) # Show all records in the Arizona State Lichen Collection that cant be matched to the GBIF # backbone properly: occ_search(datasetKey='84c0e1a0-f762-11e1-a439-00145eb45e9a', issue=c('TAXON_MATCH_NONE','TAXON_MATCH_HIGHERRANK')) # Parsing output by issue (res <- occ_search(geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit = 50)) ## what do issues mean, can print whole table, or search for matches head(gbif_issues()) gbif_issues()[ gbif_issues()$code %in% c('cdround','cudc','gass84','txmathi'), ] ## or parse issues in various ways ### remove data rows with certain issue classes library('magrittr') res %>% occ_issues(gass84) ### split issues into separate columns res %>% occ_issues(mutate = "split") ### expand issues to more descriptive names res %>% occ_issues(mutate = "expand") ### split and expand res %>% occ_issues(mutate = "split_expand") ### split, expand, and remove an issue class res %>% occ_issues(-cudc, mutate = "split_expand") # If you try multiple values for two different parameters you are wacked on the hand # occ_search(taxonKey=c(2482598,2492010), recordedBy=c("smith","BJ Stacey")) # Get a lot of data, here 1500 records for Helianthus annuus # out <- occ_search(taxonKey=key, limit=1500) # nrow(out$data) # If you pass in an invalid polygon you get hopefully informative errors ### the WKT string is fine, but GBIF says bad polygon wkt <- 'POLYGON((-178.59375 64.83258989321493,-165.9375 59.24622380205539, -147.3046875 59.065977905449806,-130.78125 51.04484764446178,-125.859375 36.70806354647625, -112.1484375 23.367471303759686,-105.1171875 16.093320185359257,-86.8359375 9.23767076398516, -82.96875 2.9485268155066175,-82.6171875 -14.812060061226388,-74.8828125 -18.849111862023985, -77.34375 -47.661687803329166,-84.375 -49.975955187343295,174.7265625 -50.649460483096114, 179.296875 -42.19189902447192,-176.8359375 -35.634976650677295,176.8359375 -31.835565983656227, 163.4765625 -6.528187613695323,152.578125 1.894796132058301,135.703125 4.702353722559447, 127.96875 15.077427674847987,127.96875 23.689804541429606,139.921875 32.06861069132688, 149.4140625 42.65416193033991,159.2578125 48.3160811030533,168.3984375 57.019804336633165, 178.2421875 59.95776046458139,-179.6484375 61.16708631440347,-178.59375 64.83258989321493))' # occ_search(geometry = gsub("\n", '', wkt)) ### unable to parse due to last number pair needing two numbers, not one # wkt <- 'POLYGON((-178.5 64.8,-165.9 59.2,-147.3 59.0,-130.7 51.0,-125.8))' # occ_search(geometry = wkt) ### unable to parse due to unclosed string # wkt <- 'POLYGON((-178.5 64.8,-165.9 59.2,-147.3 59.0,-130.7 51.0))' # occ_search(geometry = wkt) ### another of the same # wkt <- 'POLYGON((-178.5 64.8,-165.9 59.2,-147.3 59.0,-130.7 51.0,-125.8 36.7))' # occ_search(geometry = wkt) ### returns no results # wkt <- 'LINESTRING(3 4,10 50,20 25)' # occ_search(geometry = wkt) ### Apparently a point is allowed, but errors # wkt <- 'POINT(45 -122)' # occ_search(geometry = wkt) ## Faceting x <- occ_search(facet = "country", limit = 0) x$facets x <- occ_search(facet = "establishmentMeans", limit = 10) x$facets x$data x <- occ_search(facet = c("country", "basisOfRecord"), limit = 10) x$data x$facets x$facets$country x$facets$basisOfRecord x$facets$basisOfRecord$count x <- occ_search(facet = "country", facetMincount = 30000000L, limit = 10) x$facets x$data # paging per each faceted variable (x <- occ_search( facet = c("country", "basisOfRecord", "hasCoordinate"), country.facetLimit = 3, basisOfRecord.facetLimit = 6, limit = 0 )) x$facets # You can set limit=0 to get number of results found occ_search(datasetKey = '7b5d6a48-f762-11e1-a439-00145eb45e9a', limit = 0)$meta occ_search(scientificName = 'Ursus americanus', limit = 0)$meta occ_search(scientificName = 'Ursus americanus', limit = 0)$meta ## End(Not run)
Organizations metadata.
organizations( data = "all", country = NULL, uuid = NULL, query = NULL, limit = 100, start = NULL, curlopts = list() )
organizations( data = "all", country = NULL, uuid = NULL, query = NULL, limit = 100, start = NULL, curlopts = list() )
data |
(character) The type of data to get. One or more of:
'organization', 'contact', 'endpoint', 'identifier', 'tag', 'machineTag',
'comment', 'hostedDataset', 'ownedDataset', 'deleted', 'pending',
'nonPublishing', or the special 'all'. Default: |
country |
(character) Filters by country. |
uuid |
(character) UUID of the data node provider. This must be specified if data is anything other than 'all', 'deleted', 'pending', or 'nonPublishing'. |
query |
(character) Query nodes. Only used when |
limit |
Number of records to return. Default: 100. Maximum: 1000. |
start |
Record number to start at. Default: 0. Use in combination
with |
curlopts |
list of named curl options passed on to
|
A list of length of two, consisting of a data.frame meta
when
uuid is NULL, and data
which can either be a list or a data.frame
depending on the requested type of data.
https://www.gbif.org/developer/registry#organizations
## Not run: organizations(limit=5) organizations(query="france", limit=5) organizations(country = "SPAIN") organizations(uuid="4b4b2111-ee51-45f5-bf5e-f535f4a1c9dc") organizations(data='contact', uuid="4b4b2111-ee51-45f5-bf5e-f535f4a1c9dc") organizations(data='pending') organizations(data=c('contact','endpoint'), uuid="4b4b2111-ee51-45f5-bf5e-f535f4a1c9dc") # Pass on curl options organizations(query="spain", curlopts = list(verbose=TRUE)) ## End(Not run)
## Not run: organizations(limit=5) organizations(query="france", limit=5) organizations(country = "SPAIN") organizations(uuid="4b4b2111-ee51-45f5-bf5e-f535f4a1c9dc") organizations(data='contact', uuid="4b4b2111-ee51-45f5-bf5e-f535f4a1c9dc") organizations(data='pending') organizations(data=c('contact','endpoint'), uuid="4b4b2111-ee51-45f5-bf5e-f535f4a1c9dc") # Pass on curl options organizations(query="spain", curlopts = list(verbose=TRUE)) ## End(Not run)
Parse taxon names using the GBIF name parser.
parsenames(scientificname, curlopts = list())
parsenames(scientificname, curlopts = list())
scientificname |
A character vector of scientific names. |
curlopts |
list of named curl options passed on to
|
A data.frame
containing fields extracted from parsed
taxon names. Fields returned are the union of fields extracted from
all species names in scientificname
.
John Baumgartner ([email protected])
https://www.gbif.org/developer/species#parser
## Not run: parsenames(scientificname='x Agropogon littoralis') parsenames(c('Arrhenatherum elatius var. elatius', 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale', 'Vanessa atalanta (Linnaeus, 1758)')) parsenames("Ajuga pyramidata") parsenames("Ajuga pyramidata x reptans") # Pass on curl options # res <- parsenames(c('Arrhenatherum elatius var. elatius', # 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale', # 'Vanessa atalanta (Linnaeus, 1758)'), curlopts=list(verbose=TRUE)) ## End(Not run)
## Not run: parsenames(scientificname='x Agropogon littoralis') parsenames(c('Arrhenatherum elatius var. elatius', 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale', 'Vanessa atalanta (Linnaeus, 1758)')) parsenames("Ajuga pyramidata") parsenames("Ajuga pyramidata x reptans") # Pass on curl options # res <- parsenames(c('Arrhenatherum elatius var. elatius', # 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale', # 'Vanessa atalanta (Linnaeus, 1758)'), curlopts=list(verbose=TRUE)) ## End(Not run)
Look up 2 character ISO country codes
rgb_country_codes(country_name, fuzzy = FALSE, ...)
rgb_country_codes(country_name, fuzzy = FALSE, ...)
country_name |
Name of country to look up |
fuzzy |
If TRUE, uses agrep to do fuzzy search on names. |
... |
Further arguments passed on to agrep or grep |
## Not run: rgb_country_codes(country_name="United") ## End(Not run)
## Not run: rgb_country_codes(country_name="United") ## End(Not run)
density_spplist()
: service no longer provided
densitylist()
: service no longer provided
gbifdata()
: service no longer provided
gbifmap_dens()
: service no longer provided
gbifmap_list()
: service no longer provided
occurrencedensity()
: service no longer provided
providers()
: service no longer provided
resources()
: service no longer provided
taxoncount()
: service no longer provided
taxonget()
: service no longer provided
taxonsearch()
: service no longer provided
stylegeojson()
: moving this functionality to spocc package, will be
removed soon
togeojson()
: moving this functionality to spocc package, will be
removed soon
gist()
: moving this functionality to spocc package, will be
removed soon
occ_spellcheck()
: GBIF has removed the spellCheck
parameter
from their API
The above functions have been removed. See https://github.com/ropensci/rgbif and poke around the code if you want to find the old functions in previous versions of the package
Get the possible values to be used for (taxonomic) rank arguments in GBIF API methods.
taxrank()
taxrank()
## Not run: taxrank() ## End(Not run)
## Not run: taxrank() ## End(Not run)
parse wkt into smaller bits
wkt_parse(wkt, geom_big = "bbox", geom_size = 40, geom_n = 10)
wkt_parse(wkt, geom_big = "bbox", geom_size = 40, geom_n = 10)
wkt |
(character) A WKT string. Required. |
geom_big |
(character) Only "bbox" works since rgbif 3.8.0. |
geom_size |
(integer) An integer indicating size of the cell. Default: 40. |
geom_n |
(integer) An integer indicating number of cells in each dimension. Default: 10. |
wkt <- "POLYGON((13.26349675655365 52.53991761181831,18.36115300655365 54.11445544219924, 21.87677800655365 53.80418956368524,24.68927800655365 54.217364774722455,28.20490300655365 54.320018299365124,30.49005925655365 52.85948216284084,34.70880925655365 52.753220564427814, 35.93927800655365 50.46131871049754,39.63068425655365 49.55761261299145,40.86115300655365 46.381388009130845,34.00568425655365 45.279102926537,33.30255925655365 48.636868465271846, 30.13849675655365 49.78513301801265,28.38068425655365 47.2236377039631,29.78693425655365 44.6572866068524,27.67755925655365 42.62220075124676,23.10724675655365 43.77542058000212, 24.51349675655365 47.10412345120368,26.79865300655365 49.55761261299145,23.98615300655365 52.00209943876426,23.63459050655365 49.44345313705238,19.41584050655365 47.580567827212114, 19.59162175655365 44.90682206053508,20.11896550655365 42.36297154876359,22.93146550655365 40.651849782081555,25.56818425655365 39.98171166226459,29.61115300655365 40.78507856230178, 32.95099675655365 40.38459278067577,32.95099675655365 37.37491910393631,26.27130925655365 33.65619609886799,22.05255925655365 36.814081996401605,18.71271550655365 36.1072176729021, 18.53693425655365 39.16878677351903,15.37287175655365 38.346355762190846,15.19709050655365 41.578843777436326,12.56037175655365 41.050735748143424,12.56037175655365 44.02872991212046, 15.19709050655365 45.52594200494078,16.42755925655365 48.05271546733352,17.48224675655365 48.86865641518059,10.62677800655365 47.817178329053135,9.57209050655365 44.154980365192, 8.16584050655365 40.51835445724746,6.05646550655365 36.53210972067291,0.9588092565536499 31.583640057148145,-5.54509699344635 35.68001485298146,-6.77556574344635 40.51835445724746, -9.41228449344635 38.346355762190846,-12.40056574344635 35.10683619158607,-15.74040949344635 38.07010978950028,-14.68572199344635 41.31532459432774,-11.69744074344635 43.64836179231387, -8.88494074344635 42.88035509418534,-4.31462824344635 43.52103366008421,-8.35759699344635 47.2236377039631,-8.18181574344635 50.12441989397795,-5.01775324344635 49.55761261299145, -2.73259699344635 46.25998980446569,-1.67790949344635 44.154980365192,-1.32634699344635 39.30493590580802,2.18927800655365 41.44721797271696,4.47443425655365 43.26556960420879, 2.18927800655365 46.7439668697322,1.83771550655365 50.3492841273576,6.93537175655365 49.671505849335254,5.00177800655365 52.32557322466785,7.81427800655365 51.67627099802223, 7.81427800655365 54.5245591562317,10.97834050655365 51.89375191441792,10.97834050655365 55.43241335888528,13.26349675655365 52.53991761181831))" wkt <- gsub("\n", " ", wkt) if (requireNamespace("sf", quietly=TRUE)) { # to a bounding box in wkt format wkt_parse(wkt, geom_big = "bbox") }
wkt <- "POLYGON((13.26349675655365 52.53991761181831,18.36115300655365 54.11445544219924, 21.87677800655365 53.80418956368524,24.68927800655365 54.217364774722455,28.20490300655365 54.320018299365124,30.49005925655365 52.85948216284084,34.70880925655365 52.753220564427814, 35.93927800655365 50.46131871049754,39.63068425655365 49.55761261299145,40.86115300655365 46.381388009130845,34.00568425655365 45.279102926537,33.30255925655365 48.636868465271846, 30.13849675655365 49.78513301801265,28.38068425655365 47.2236377039631,29.78693425655365 44.6572866068524,27.67755925655365 42.62220075124676,23.10724675655365 43.77542058000212, 24.51349675655365 47.10412345120368,26.79865300655365 49.55761261299145,23.98615300655365 52.00209943876426,23.63459050655365 49.44345313705238,19.41584050655365 47.580567827212114, 19.59162175655365 44.90682206053508,20.11896550655365 42.36297154876359,22.93146550655365 40.651849782081555,25.56818425655365 39.98171166226459,29.61115300655365 40.78507856230178, 32.95099675655365 40.38459278067577,32.95099675655365 37.37491910393631,26.27130925655365 33.65619609886799,22.05255925655365 36.814081996401605,18.71271550655365 36.1072176729021, 18.53693425655365 39.16878677351903,15.37287175655365 38.346355762190846,15.19709050655365 41.578843777436326,12.56037175655365 41.050735748143424,12.56037175655365 44.02872991212046, 15.19709050655365 45.52594200494078,16.42755925655365 48.05271546733352,17.48224675655365 48.86865641518059,10.62677800655365 47.817178329053135,9.57209050655365 44.154980365192, 8.16584050655365 40.51835445724746,6.05646550655365 36.53210972067291,0.9588092565536499 31.583640057148145,-5.54509699344635 35.68001485298146,-6.77556574344635 40.51835445724746, -9.41228449344635 38.346355762190846,-12.40056574344635 35.10683619158607,-15.74040949344635 38.07010978950028,-14.68572199344635 41.31532459432774,-11.69744074344635 43.64836179231387, -8.88494074344635 42.88035509418534,-4.31462824344635 43.52103366008421,-8.35759699344635 47.2236377039631,-8.18181574344635 50.12441989397795,-5.01775324344635 49.55761261299145, -2.73259699344635 46.25998980446569,-1.67790949344635 44.154980365192,-1.32634699344635 39.30493590580802,2.18927800655365 41.44721797271696,4.47443425655365 43.26556960420879, 2.18927800655365 46.7439668697322,1.83771550655365 50.3492841273576,6.93537175655365 49.671505849335254,5.00177800655365 52.32557322466785,7.81427800655365 51.67627099802223, 7.81427800655365 54.5245591562317,10.97834050655365 51.89375191441792,10.97834050655365 55.43241335888528,13.26349675655365 52.53991761181831))" wkt <- gsub("\n", " ", wkt) if (requireNamespace("sf", quietly=TRUE)) { # to a bounding box in wkt format wkt_parse(wkt, geom_big = "bbox") }