Baja_west <- read_excel(path = here("data", "raw", "atlas_locations_raw.xlsx"),
sheet = "BAJA_WEST") %>%
mutate(
#Reformatting Clave column to accuratly reflect the four decimal ID
Clave = format(
round(as.numeric(stri_trim_both(Clave)), 4),
nsmall = 4)) %>%
select(-Num)Site Key Cleaning
Step 1) Clear each group of sites
Baja West cleaning
Baja East cleaning
Baja_east <- read_excel(path = here("data", "raw", "atlas_locations_raw.xlsx"),
sheet = "BAJA_EAST") |>
mutate(
#Reformatting Clave column to accuratly reflect the four decimal ID
Clave = format(
round(as.numeric(stri_trim_both(Clave)), 4),
nsmall = 4)) %>%
select(-Num, -Mapa)Baja Sur West cleaning
Bajasur_west <- read_excel(path = here("data", "raw", "atlas_locations_raw.xlsx"),
sheet = "BAJASUR_WEST") %>%
mutate(
#Reformatting Clave column to accuratly reflect the four decimal ID
Clave = format(
round(as.numeric(stri_trim_both(Clave)), 4),
nsmall = 4)) %>%
select(-Num, -Mapa)Baja Sur East cleaning
Bajasur_east <- read_excel(path = here("data", "raw", "atlas_locations_raw.xlsx"),
sheet = "BAJASUR_EAST") %>%
mutate(
#Reformatting Clave column to accuratly reflect the four decimal ID
Clave = format(
round(as.numeric(stri_trim_both(Clave)), 4),
nsmall = 4)) %>%
select(-Num, -Mapa)Sonora cleaning
Sonora <- read_excel(path = here("data", "raw", "atlas_locations_raw.xlsx"),
sheet = "SONORA") %>%
select(-NUM., -MAPA) %>%
rename(Localidad = LOCALIDAD,
Clave = CLAVE,
Captura = CAPTURA,
Desembarque = DESEMBAR.) %>%
mutate(
#Reformatting Clave column to accuratly reflect the four decimal ID
Clave = format(
round(as.numeric(stri_trim_both(Clave)), 4),
nsmall = 4))Step 2) Combining each group into one comprehensive data.frame
sites_key <- rbind(Baja_east,
Baja_west,
Bajasur_east,
Bajasur_west,
Sonora)Step 3) Add geospatial reference to the cleaned sites
gpkg <- st_read(dsn = here("data", "processed", "landingsites.gpkg")) %>%
mutate(Clave = as.character(Clave),
Clave = trimws(Clave))Reading layer `landingsites' from data source
`/Users/jcvd/GitHub/mex_fishing_locations/data/processed/landingsites.gpkg'
using driver `GPKG'
Simple feature collection with 1714 features and 2 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -118.403 ymin: 22.87 xmax: -109.042 ymax: 32.6508
Geodetic CRS: WGS 84
sites_key_geo <- sites_key %>%
left_join(gpkg, by = "Clave") %>%
rename(Localidad_geo = Localidad.y,
Localidad_key = Localidad.x) |>
st_as_sf(crs = "EPSG:4326")Step 4) Minor cleanup after join
A few problematic sites exist where a single key is mapped to different locations
Check which keys in the site list match multiple rows in the geopackage
#Evaluating how many times each key appears in the geopackage
key_count <- gpkg %>% count(Clave, name = "count")
#Join with site keys
site_check <- sites_key %>% left_join(key_count, by = "Clave")
#Counting rows where count>1
site_check <- site_check %>% filter(count > 1)Check which keys in the geopackage match multiple rows in the site list
#Evaluating how many times each key appears in the geopackage
site_count <- sites_key %>% count(Clave, name = "count")
#Join with site keys
key_check <- site_count %>% left_join(gpkg, by = "Clave")
#Counting rows where count>1
key_check <- key_check %>% filter(count > 1)Fix duplicates
Using locality name, manually selecting one location per key for problem keys
sites_key_geo <- sites_key_geo %>%
#MUELLE (1.0081)
filter(!(Clave == "1.0081" & Localidad_geo != "MUELLE")) %>%
#BOCANA EL ROSARIO (1.0236)
filter(!(Clave == "1.0236" & Localidad_geo != "ROSARIO BOCANA EL")) %>%
#PUERTO CORTEZ (2.0306)
filter(!(Clave == "2.0306" & Localidad_geo != "CORTEZ PUERTO")) %>%
#BOCA DE PIEDRA (5.0321)
filter(!(Clave == "5.0321" & Localidad_geo != "PIEDRA BOCA DE")) %>%
# Remove empty geometries
filter(!st_is_empty(.))Step 4) Visualize data before exporting
mapview::mapview(sites_key_geo)Step 5) Saving final clean geopackage
clean_path <- here("data", "processed", "sites_key.gpkg")
write_sf(sites_key_geo, clean_path)