The goal of azores.rorquals is to provide an R Compendium Accompanying Peres dos Santos et al. (2025).
# install.packages("pak")
pak::pak("patterninstitute/azores.rorquals")A data set of sightings of rorquals in the Azores, during the period of 2012 and 2018:
library(azores.rorquals)
#> Loading required package: sf
#> Linking to GEOS 3.13.0, GDAL 3.10.0, PROJ 9.5.0; sf_use_s2() is TRUEsightings_sf
#> Simple feature collection with 54946 features and 8 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -839828 ymin: -631183.3 xmax: 840628.2 ymax: 586935
#> Projected CRS: +proj=laea +lat_0=38.5 +lon_0=-28 +datum=WGS84 +units=m +no_defs
#> # A tibble: 54,946 × 9
#> source species datetime date time sc tc presence
#> * <chr> <chr> <dttm> <date> <hms> <int> <int> <chr>
#> 1 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 1 1 1933001…
#> 2 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 1 1 1933001…
#> 3 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 2 1 1933002…
#> 4 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 3 1 1933003…
#> 5 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 4 1 1933004…
#> 6 gbif Stenella co… 2007-04-10 00:00:00 2007-04-10 NA s… 1 1 1048400…
#> 7 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 1 1 1934001…
#> 8 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 2 1 1934002…
#> 9 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 3 1 1934003…
#> 10 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 4 1 1934004…
#> # ℹ 54,936 more rows
#> # ℹ 1 more variable: geometry <POINT [m]>The presences_sf dataset is a filtered version of sightings_sf with
redundant observations removed, ensuring that each unique animal is
counted only once. The presence column provides a unique identifier for
each individual.
The presences_sf2 dataset contains the same observations as
presences_sf but includes additional climate and topographical
variables.
presences_sf
#> Simple feature collection with 44037 features and 9 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -839797.6 ymin: -631183.3 xmax: 840628.2 ymax: 586935
#> Projected CRS: +proj=laea +lat_0=38.5 +lon_0=-28 +datum=WGS84 +units=m +no_defs
#> # A tibble: 44,037 × 10
#> source species datetime date time sc tc presence
#> * <chr> <chr> <dttm> <date> <hms> <int> <int> <chr>
#> 1 gbif Stenella co… 2007-04-10 00:00:00 2007-04-10 NA s… 1 1 1048400…
#> 2 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 1 1 1933001…
#> 3 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 2 1 1933002…
#> 4 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 3 1 1933003…
#> 5 gbif Delphinus d… 2007-04-10 00:00:00 2007-04-10 NA s… 4 1 1933004…
#> 6 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 1 1 1934001…
#> 7 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 2 1 1934002…
#> 8 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 3 1 1934003…
#> 9 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 4 1 1934004…
#> 10 gbif Delphinus d… 2007-04-11 00:00:00 2007-04-11 NA s… 5 1 1934005…
#> # ℹ 44,027 more rows
#> # ℹ 2 more variables: mixed_sp_grp <chr>, geometry <POINT [m]>presences_sf2
#> Simple feature collection with 44037 features and 17 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -839797.6 ymin: -631183.3 xmax: 840628.2 ymax: 586935
#> Projected CRS: +proj=laea +lat_0=38.5 +lon_0=-28 +datum=WGS84 +units=m +no_defs
#> First 10 features:
#> source species datetime date time sc tc presence
#> 1 gbif Stenella coeruleoalba 2007-04-10 2007-04-10 NA secs 1 1 10484001001
#> 2 gbif Delphinus delphis 2007-04-10 2007-04-10 NA secs 1 1 1933001001
#> 3 gbif Delphinus delphis 2007-04-10 2007-04-10 NA secs 2 1 1933002001
#> 4 gbif Delphinus delphis 2007-04-10 2007-04-10 NA secs 3 1 1933003001
#> 5 gbif Delphinus delphis 2007-04-10 2007-04-10 NA secs 4 1 1933004001
#> 6 gbif Delphinus delphis 2007-04-11 2007-04-11 NA secs 1 1 1934001001
#> 7 gbif Delphinus delphis 2007-04-11 2007-04-11 NA secs 2 1 1934002001
#> 8 gbif Delphinus delphis 2007-04-11 2007-04-11 NA secs 3 1 1934003001
#> 9 gbif Delphinus delphis 2007-04-11 2007-04-11 NA secs 4 1 1934004001
#> 10 gbif Delphinus delphis 2007-04-11 2007-04-11 NA secs 5 1 1934005001
#> mixed_sp_grp mld sst chl depth slope dst_all_sm
#> 1 0001001001 16.32740 16.57000 0.5599283 831.0224 0.3162881 30619.25
#> 2 0001002001 14.95407 16.51998 2.1153419 616.3859 7.8704503 30526.34
#> 3 0001003001 15.25926 16.55999 0.4679706 786.0898 0.8380182 28066.52
#> 4 0001004001 16.32740 16.60000 0.5599283 868.7615 2.1067018 29314.37
#> 5 0001005001 14.64889 16.60998 0.4254855 919.2671 3.2004743 27873.97
#> 6 0002001001 13.88592 16.77999 0.4627705 777.6098 3.5939226 29992.30
#> 7 0002002001 13.88592 16.82000 0.4815105 828.7536 1.0009347 28829.75
#> 8 0002003001 12.66518 16.73998 0.4886960 356.0018 18.0552602 34012.53
#> 9 0002004001 15.56444 16.77210 0.4784797 NA NA 37033.25
#> 10 0002005001 12.51259 16.82998 0.4634760 1208.8931 3.9953495 29045.14
#> dst_lrg_sm dst_sml_sm geometry
#> 1 45896.94 30619.25 POINT (-47310.49 -10036.94)
#> 2 38475.54 30526.34 POINT (-52938.36 -2508.946)
#> 3 40816.06 28066.52 POINT (-51663.29 -7790.11)
#> 4 47720.79 29314.37 POINT (-43688.28 -13202.49)
#> 5 49989.31 27873.97 POINT (-40352.78 -15994.97)
#> 6 42554.59 29992.30 POINT (-49768.66 -7357.982)
#> 7 44194.34 28829.75 POINT (-48861.64 -11378.21)
#> 8 45377.20 34012.53 POINT (-46437.58 -5083.639)
#> 9 53504.84 37033.25 POINT (-38022.26 -6811.32)
#> 10 53593.13 29045.14 POINT (-36244.88 -16051.4)The rorquals_occurrence dataset is derived from presences_sf2
through the following steps:
-
Observations with missing (
NA) values for any climate or topographical variable are removed. -
Observations of the target species—Balaenoptera physalus (Fin whale), Balaenoptera borealis (Sei whale), Balaenoptera musculus (Blue whale), and Megaptera novaeangliae (Humpback whale)—are classified as “presence” in the class column, while all other species are labeled as “absence”.
-
Observations that could simultaneously represent both presences of target species and absences of others are excluded.
-
Observations located on land are removed. This issue may arise in a small subset of records sourced from GBIF, or due to rasterization effects in climate and topographical data, when merging locations that are very close to the shore (< 500 m).
rorquals_occurrence
#> Simple feature collection with 43337 features and 19 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -697580.6 ymin: -554293.5 xmax: 684416.8 ymax: 533719.9
#> Projected CRS: +proj=laea +lat_0=38.5 +lon_0=-28 +datum=WGS84 +units=m +no_defs
#> # A tibble: 43,337 × 20
#> source species class datetime date time sc tc
#> * <chr> <chr> <chr> <dttm> <date> <drt> <int> <int>
#> 1 gbif Stenella coeru… abse… 2007-04-10 00:00:00 2007-04-10 NA s… 1 1
#> 2 gbif Delphinus delp… abse… 2007-04-10 00:00:00 2007-04-10 NA s… 1 1
#> 3 gbif Delphinus delp… abse… 2007-04-10 00:00:00 2007-04-10 NA s… 2 1
#> 4 gbif Delphinus delp… abse… 2007-04-10 00:00:00 2007-04-10 NA s… 3 1
#> 5 gbif Delphinus delp… abse… 2007-04-10 00:00:00 2007-04-10 NA s… 4 1
#> 6 gbif Delphinus delp… abse… 2007-04-11 00:00:00 2007-04-11 NA s… 1 1
#> 7 gbif Delphinus delp… abse… 2007-04-11 00:00:00 2007-04-11 NA s… 2 1
#> 8 gbif Delphinus delp… abse… 2007-04-11 00:00:00 2007-04-11 NA s… 3 1
#> 9 gbif Delphinus delp… abse… 2007-04-11 00:00:00 2007-04-11 NA s… 5 1
#> 10 gbif Delphinus delp… abse… 2007-04-11 00:00:00 2007-04-11 NA s… 6 1
#> # ℹ 43,327 more rows
#> # ℹ 12 more variables: presence <chr>, mixed_sp_grp <chr>, mld <dbl>,
#> # sst <dbl>, chl <dbl>, depth <dbl>, slope <dbl>, dst_all_sm <dbl>,
#> # dst_lrg_sm <dbl>, dst_sml_sm <dbl>, geometry <POINT [m]>, is_in_eez <lgl>colours <- c(
Balaenopteridae = "#0F00FF",
Physeteridae = "#FFEE00AF",
Ziphiidae = "#FFA600",
Delphinidae = "#E8E8E8AF",
Kogiidae = "#00E000",
Balaenidae = "#F404E2",
Phocoenidae = "#04B3F4"
)
rorquals_occurrence |>
dplyr::filter(is_in_eez) |>
dplyr::mutate(season = as_season(date)) |>
dplyr::left_join(cetaceans(), by = "species") |>
tidyr::drop_na(family) |>
ggplot() +
geom_sf(data = CAOP.RAA.2024::districts(), fill = "black") +
geom_sf(data = CAOP.RAA.2024::eez(distance = 200), fill = NA, linewidth = 0.5, col = "darkgray") +
geom_sf(mapping = aes(col = family), size = 0.1) +
geom_sf(data = CAOP.RAA.2024::eez(distance = 12), fill = NA, linewidth = 0.5, col = "#FF7B7B") +
scale_color_manual(values = colours, name = "Cetacean Family") +
guides(colour = guide_legend(override.aes = list(size = 3))) +
facet_wrap(vars(season)) +
theme_minimal() +
theme(legend.title = element_text(hjust = 0.5),
strip.text = element_text(size = 14))