library(brandr)
library(beepr)
library(cli)
library(dplyr)
library(fs)
library(geodata)
library(ggplot2)
library(ggspatial)
library(groomr) # github.com/danielvartan/groomr
library(here)
library(httr2)
library(magrittr)
library(orbis) # github.com/danielvartan/orbis
library(osfr)
library(purrr)
library(readr)
library(rvest)
library(stringr)
library(terra)
library(tidyr)
library(tidyterra)
library(readr)
library(rutils) # github.com/danielvartan/rutils
library(zip)A Reproducible Pipeline for Processing WorldClim 2.1 Historical Monthly Weather Data in Brazil
Overview
This report provides a fully reproducible pipeline for processing WorldClim 2.1 Historical Monthly Weather Data for Brazil using the R programming language.
Data Availability
The processed data are available in the tif format via a dedicated repository on the Open Science Framework (OSF), accessible here. You can also access these files directly from R using the osfr package.
Methods
Source of Data
The data used in this report come from the following sources:
- WorldClim 2.1: A downscaled version of high-resolution interpolated gridded climate datasets (Fick & Hijmans, 2017).
- CRU-TS-4.09: A high-resolution gridded dataset of climate variables, developed by the Climatic Research Unit at the University of East Anglia, covering the period from 1901 to 2024 (Harris et al., 2020).
Data Munging
The data munging followed the data science workflow outlined by Wickham et al. (2023), as illustrated in Figure 1. All processes were made using the Quarto publishing system (Allaire et al., n.d.), the R programming language (R Core Team, n.d.) and several R packages.
Spatial data processing was performed using the terra R package (Hijmans, n.d.). For data manipulation and workflow, packages from the tidyverse and rOpenSci ecosystems—adhering to the tidy tools manifesto (Wickham, 2023)—were prioritized. All steps were designed to ensure transparency and reproducibility of results.
Source: Reproduced from Wickham et al. (2023).
Code Style
The Tidyverse code style guide and design principles were followed to ensure consistency and enhance readability.
Reproducibility
The pipeline is fully reproducible and can be run again at any time. See the README file in the code repository to learn how to run it.
Set the Environment
Set the Initial Variables
Options
Code
options(cli.progress_show_after = 0)WorldClim
series <- "historical-monthly-weather-data"resolution <- "10m"Directories
Code
raw_data_dir <- here("data-raw")Code
raw_data_wc_dir <- path(raw_data_dir, "worldclim")Code
raw_data_wc_series_dir <- path(raw_data_wc_dir, series)Code
raw_data_wc_series_res_dir <- path(
raw_data_wc_series_dir,
resolution |> str_replace_all("\\.", "\\-")
)Code
data_dir <- here("data")Code
dirs <- c(
raw_data_dir, raw_data_wc_dir, raw_data_wc_series_dir,
raw_data_wc_series_res_dir, data_dir
)
for (i in dirs) {
if (!dir_exists(i)) {
dir_create(i, recurse = TRUE)
}
}Shape
country <- "bra"shape <- gadm(
country = country,
level = 0,
path = raw_data_dir
)Download the Data
Scrape the WordClim Website
Code
html <-
get_wc_url(series, resolution) |>
read_html()Code
urls <-
html |>
html_elements("a") |>
html_attr("href") |>
str_subset("geodata")Code
urls <-
urls %>%
magrittr::extract(
str_detect(
basename(.),
paste0("(?<=_)", resolution)
)
)Create the Metadata
Code
sizes <-
urls |>
map_dbl(
.f = get_file_size,
.progress = TRUE
) |>
as_fs_bytes()
#> ■ 0% | ETA: ?
#> ■■ 4% | ETA: 27s
#> ■■■■■■■■■■■■■ 42% | ETA: 6s
#> ■■■■■■■■■■■■■■■■■■■■■■■■■ 79% | ETA: 2smetadata <-
tibble(
url = urls,
file = basename(urls),
size = sizes
) |>
arrange(size) |>
mutate(
size_cum_sum =
size |>
replace_na() |>
cumsum() |>
as_fs_bytes()
)
metadata |> select(-url)Check for Errors
Code
{
cli_alert_info(
paste0(
"{.strong {cli::col_red(count_na(metadata$size))}} ",
"url requests resulted in error."
)
)
if (count_na(metadata$size) > 0) {
cli_alert_info("Their file names are:")
cli_li(metadata$file[is.na(metadata$size)])
}
}Add LICENSE and README Files
Code
dir <- path(raw_data_wc_series_res_dir, "zip")
if (!dir_exists(dir)) dir |> dir_create(recurse = TRUE)Code
dirs <- c(raw_data_wc_dir, raw_data_wc_series_res_dir)
for (i in dirs) wc_license() |> write_lines(path(i, "LICENSE.md"))Code
wc_readme() |> write_lines(path(raw_data_wc_dir, "README.md"))
wc_readme(series, resolution) |>
write_lines(path(raw_data_wc_series_res_dir, "README.md"))Download the Files
Code
metadata |>
pull(url) |>
rutils::download_file(
dir = dir,
broken_links = TRUE
)
beep(8)Unzip the Files
Code
zip_dir <- path(raw_data_wc_series_res_dir, "zip")Code
tif_dir <- path(raw_data_wc_series_res_dir, "tif")
if (!dir_exists(tif_dir)) tif_dir |> dir_create(recurse = TRUE)Code
if (dir_exists(zip_dir)) {
accompanying_files <- c("README.md", "LICENSE.md")
for (i in accompanying_files) {
if (file_exists(path(zip_dir, i))) {
file_copy(
path(zip_dir, i),
path(tif_dir, i),
overwrite = TRUE
)
}
}
}Code
if (dir_exists(zip_dir)) {
zip_files <-
zip_dir |>
dir_ls(type = "file", regexp = "zip$")
cli_progress_bar(
name = "Unzipping data",
total = length(zip_files),
clear = FALSE
)
for (i in zip_files) {
i |>
zip::unzip(
overwrite = TRUE,
exdir = tif_dir
)
cli_progress_update()
}
cli_progress_done()
beep(1)
}Code
if (dir_exists(zip_dir)) zip_dir |> dir_delete()Read the Data
Gather the tif Files
tif_files <-
tif_dir |>
dir_ls(type = "file", regexp = "tif$", recurse = TRUE)Read the Data
data_tmin <-
tif_files |>
str_subset("_tmin_") |>
rast()data_tmax <-
tif_files |>
str_subset("_tmax_") |>
rast()data_prec <-
tif_files |>
str_subset("_prec_") |>
rast()Tidy the Data
for (i in c("data_tmin", "data_tmax", "data_prec")) {
year_month <- get(i) |> names() |> str_extract("\\d{4}-\\d{2}")
var_name <- i |> str_extract("[a-z]+$")
i |> assign(magrittr::set_names(get(i), year_month))
i |> assign(`varnames<-`(get(i), rep(var_name, nlyr(get(i)))))
i |>
assign(
crop(
get(i),
shape,
snap = "near",
mask = TRUE,
touches = TRUE,
extend = TRUE
)
)
}Save the Data
for (i in c("data_tmin", "data_tmax", "data_prec")) {
var_name <- i |> str_extract("[a-z]+$")
get(i) |>
terra::writeRaster(
path(
data_dir,
paste0(resolution, "_", var_name, "_", country, ".tif")
),
overwrite = TRUE
)
}Visualize the Data
plot_data <- data_prec |> select(all_of("2024-07"))
ggplot() +
geom_spatraster(data = plot_data) +
scale_fill_brand_c(color_type = "div") +
annotation_scale(
location = "br",
style = "tick",
width_hint = 0.25,
height = unit(0.15, "cm")
) +
annotation_north_arrow(
location = "br",
height = unit(1, "cm"),
width = unit(1, "cm"),
pad_x = unit(0.1, "cm"),
pad_y = unit(0.55, "cm"),
style = north_arrow_fancy_orienteering
) +
labs(
title = "Precipitation in Brazil (mm)",
subtitle= "July 2024",
fill = NULL,
caption = "Source: WorldClim 2.1"
)
#> Scale on map varies by more than 10%, scale bar may be inaccurateHow to Cite
When using this data, you must also cite the original data sources.
To cite this work, please use the following format:
Vartanian, D., & Carvalho, A. M. (2025). A reproducible pipeline for processing WorldClim 2.1 Historical Monthly Weather Data in Brazil [Computer software]. Sustentarea Research and Extension Group of the University of São Paulo. https://sustentarea.github.io/brazil-historical-climate
A BibTeX entry for LaTeX users is
@misc{vartanian2025,
title = {A reproducible pipeline for processing WorldClim 2.1 Historical Monthly Weather Data in Brazil},
author = {{Daniel Vartanian} and {Aline Martins de Carvalho}},
year = {2025},
address = {São Paulo},
institution = {Sustentarea Research and Extension Group of the University of São Paulo},
langid = {en},
url = {https://sustentarea.github.io/brazil-historical-climate}
}
License
The original data sources may have their own license terms and conditions.
The code in this report is licensed under the GNU General Public License Version 3, while the report is available under the Creative Commons CC0 License.
Copyright (C) 2025 Daniel Vartanian
The code in this report is free software: you can redistribute it and/or
modify it under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>.
Acknowledgments
This work is part of the Sustentarea Research and Extension Group project: Global syndemic: The impact of anthropogenic climate change on the health and nutrition of children under five years old attended by Brazil’s public health system (SUS).
This work was supported by the Department of Science and Technology of the Secretariat of Science, Technology, and Innovation and of the Health Economic-Industrial Complex (SECTICS) of the Ministry of Health of Brazil, and the National Council for Scientific and Technological Development (CNPq) (grant no. 444588/2023-0).

