Bristol R Users Meetup, 2025-05-21
Using the inherently unexciting topic of ✨postcodes✨….
To talk about some useful R packages
{duckdb}
{sf}
{geos}
We will not be producing statistics below country level because the use of interpolation for smaller areas is additionally challenging, with the quality of the resulting values liable to be lower. We therefore recommend that comparisons between local areas in different parts of the UK should be based on the respective 2021 and 2022 census data. Users should, however, be aware of any factors that may complicate direct comparability between those dates.
Shout out to Alasdair Rae’s Greggs spider map… https://alasdairrae.github.io/steakbakespider/
shop_name | address_house_number_or_name | address_street_name | address_city | address_post_code | address_longitude | address_latitude |
---|---|---|---|---|---|---|
Bargoed, U5 The Plateau | U5 | The Plateau | Bargoed | CF81 8QT | -3.228921 | 51.68822 |
Rotherham, 2 Effingham St | 2/6 Effingham Street | Rotherham | S65 1AJ | -1.355389 | 53.43163 | |
Nottingham, U32 Victoria CTR | U32 Victoria Centre | Lower Parliament Street | Nottingham | NG1 3QE | -1.147138 | 52.95630 |
Birmingham, 85 New St | 85 New Street | Birmingham | B2 4BA | -1.901780 | 52.47934 | |
Daventry, U20 Bowen Sq | U20 | Bowen Square | Daventry | NN11 4DR | -1.161484 | 52.25734 |
Birmingham, U38 Great Western Arc | 37/38 Great Western Arcade | Birmingham | B2 5HU | -1.897459 | 52.48195 |
greggs_table <- greggs_data |>
select(-c("address_longitude", "address_latitude"))
gt(greggs_table |> head())
shop_name | address_house_number_or_name | address_street_name | address_city | address_post_code |
---|---|---|---|---|
Bargoed, U5 The Plateau | U5 | The Plateau | Bargoed | CF81 8QT |
Rotherham, 2 Effingham St | 2/6 Effingham Street | Rotherham | S65 1AJ | |
Nottingham, U32 Victoria CTR | U32 Victoria Centre | Lower Parliament Street | Nottingham | NG1 3QE |
Birmingham, 85 New St | 85 New Street | Birmingham | B2 4BA | |
Daventry, U20 Bowen Sq | U20 | Bowen Square | Daventry | NN11 4DR |
Birmingham, U38 Great Western Arc | 37/38 Great Western Arcade | Birmingham | B2 5HU |
{readr}
& {dplyr}
{duckdb}
& {dbplyr}
tic()
# Create duckdb connection
con <- dbConnect(duckdb())
# Register Greggs data to duckdb, so we can use it in the duckdb pipeline
duckdb_register(con, "greggs_table", greggs_table)
# Carry out the join within duckdb
greggs_coords <- tbl(con, "greggs_table") |>
mutate(pcds = str_remove_all(address_post_code, " ")) |>
left_join(
tbl(con, "data/NSPL_FEB_2025_UK.csv") |>
select(pcds, long, lat) |>
mutate(pcds = str_remove_all(pcds, " ")),
by = "pcds"
) |>
collect()
toc()
2.72 sec elapsed
Quite a bit faster in this example - with larger datasets come larger time savings.
greggs_coords_sf <- greggs_coords |>
st_as_sf(coords = c("long","lat"), # specify X and Y columns
na.fail = FALSE,
crs = 4326) |>
st_transform(27700) |>
arrange(shop_name)
distance_from_API_location <- st_distance(
greggs_sf |> st_transform(27700) |> arrange(shop_name),
greggs_coords_sf,
by_element = TRUE
)
# add distances to both tables for easier filtering when checking geocodes look reasonable:
greggs_coords_sf$m_from_API_location <- round(
as.numeric(distance_from_API_location), 2
)
top5_discrepancies <- greggs_coords_sf |>
select(shop_name, address_street_name, address_post_code, m_from_API_location) |>
arrange(desc(m_from_API_location)) |>
slice_head(n = 5)
shop_name | address_street_name | address_post_code | m_from_API_location |
---|---|---|---|
EG On The Move Gibbets Cross | Watling Street | LE17 6AR | 22503.24 |
ASDA EXPRESS Kimmel Park East | 55 Expressway | LL18 5XE | 14219.95 |
BLAKEMORE Phoenix Park | 205 Nottingham Road | NG16 1AE | 4100.68 |
ASDA EXPRESS Kildean | Kildean Business Park | FK9 4UA | 3696.36 |
APPLEGREEN M1 Northbound | Tullyacross | BT27 5SG | 2603.26 |
{sf}
{sf}
2tic()
centroids <- sf::read_sf(
centroid_path,
# Exclude terminated postcodes (whether they have a grid ref or not),
# those with no grid ref, and 'Large user' postcodes
query = "SELECT PCDS, LONG, LAT, SHAPE FROM NSPL_LATEST_UK
WHERE OSGRDIND NOT IN (8, 9)
AND USERTYPE = 0
AND DOTERM IS NULL"
) |>
clean_names() |>
mutate(pcds_district = word(pcds, 1) |> str_squish())
sf_voronoi <- centroids |>
# Create voronoi polygons for each postcode via sf
st_geometry() |> # to get sfc from sf
st_union() |> # to get a sfc of MULTIPOINT type
sf::st_voronoi(envelope = st_geometry(bounds)) |>
st_collection_extract(type = "POLYGON") |> # a list of polygons
st_sf() |>
st_join(centroids) # add attribute cols back in
# rename geometry column
# (otherwise it'll be named after the functions that produced it.)
st_geometry(sf_voronoi) <- "geometry"
toc()
{sf}
…{geos}
instead{geos}
provides access to the GEOS C library through R.
geos_bounds <- bounds |> as_geos_geometry()
message("Voronoi union via {geos}:")
tic()
geos_voronoi <- centroids |>
as_geos_geometry() |>
geos_make_collection() |>
geos_voronoi_polygons(
env = geos_bounds
) |>
st_as_sf() |>
st_collection_extract(type = "POLYGON") |>
# get attributes back
st_join(centroids) |>
as_tibble() |>
group_by(pcds_district) |>
summarise(
geometry = geometry |>
geos_make_collection() |>
geos_unary_union()
) |>
# convert from tibble back to sf object
st_as_sf() |>
# trim to UK bounds or some borders extend out to sea
st_intersection(bounds)
toc()
# ~240 seconds
Compared with Google Maps:
bristol_pop_oa21 <- read_sf(
"data/Output_Areas_2021_EW_BGC_V2.gpkg",
query = "SELECT OA21CD, SHAPE FROM OA_2021_EW_BGC_V2"
) |>
st_filter(geos_voronoi_bristol, .predicate = st_intersects) |>
inner_join(bristol_pop_table, by = "OA21CD")
overlay <- tm_shape(bristol_pop_oa21) + tm_fill(col = "population",
style = "fisher") +
tm_shape(geos_voronoi |>
filter(str_detect(pcds_district, regex("^BS\\d$")))
) +
tm_borders() +tm_text("pcds_district")
sf::st_interpolate_aw()
Longair, Mark (2021) “Open Data GB Postcode Unit Boundaries”. August 8, 2021. https://longair.net/blog/2021/08/23/open-data-gb-postcode-unit-boundaries/
Pebesma, E., (2018). Simple Features for R: Standardized Support for Spatial Vector Data. The R Journal 10 (1), 439-446. https://doi.org/10.32614/RJ-2018-009
Dunnington D, Pebesma E (2023). geos: Open Source Geometry Engine (‘GEOS’) R API. R package version 0.2.4. https://github.com/paleolimbot/geos/
McDermott, Grant (2022). “Fast geospatial tasks with data.table, geos & co.”. January 14, 2022. https://grantmcdermott.com/posts/fast-geospatial-datatable-geos/
Parry, Josiah. (2023). “R-Spatial Beyond Sf.” September 8, 2023. https://geocompx.org/post/2023/beyond-sf/
https://r-spatial.github.io/sf/
https://www.ons.gov.uk/methodology/geography/geographicalproducts/postcodeproducts
A guide to ONS Georaphy Products
ONS - Limitations of using postcodes as a geographic reference