library(tidyverse)
library(here)
library(sf)
library(sp)
library(tmap)
library(osmdata)
library(ggmap)
library(readxl)
library(classInt)
library(cowplot)
library(maps)
library(ggspatial)
library(shiny)
library(RColorBrewer)
library(leaflet)
library(leaflet.esri)
library(janitor)
Step one: load data and inspect it!
trees_raw <- read_csv(here::here("static/projects/lab08data/Parks_Tree_Inventory.csv")) %>%
glimpse()
## Parsed with column specification:
## cols(
## .default = col_character(),
## X = col_double(),
## Y = col_double(),
## OBJECTID = col_double(),
## DBH = col_double(),
## TreeHeight = col_double(),
## CrownWidthNS = col_double(),
## CrownWidthEW = col_double(),
## CrownBaseHeight = col_double(),
## UserID = col_double(),
## Structural_Value = col_double(),
## Carbon_Storage_lb = col_double(),
## Carbon_Storage_value = col_double(),
## Carbon_Sequestration_lb = col_double(),
## Carbon_Sequestration_value = col_double(),
## Stormwater_ft = col_double(),
## Stormwater_value = col_double(),
## Pollution_Removal_value = col_double(),
## Pollution_Removal_oz = col_double(),
## Total_Annual_Benefits = col_double()
## )
## See spec(...) for full column specifications.
#load the shp data with the sf package
pdx_bounds <- st_read(here::here("static/projects/lab08data/Neighborhoods__Regions_-shp/"))
river_bounds <- st_read(here::here("static/projects/lab08data/Willamette_Columbia_River_Ordinary_High_Water-shp/"))
trees_shape <- st_read(here::here("static/projects/lab08data/Parks_Tree_Inventory-shp/"))
farm_mark <- st_read(here::here("static/projects/lab08data/Farmers_Markets-shp/"))
grocerys <- st_read(here::here("static/projects/lab08data/Grocery_Stores-shp/"))
st_crs(trees_shape)
st_crs(pdx_bounds)
st_crs(river_bounds)
st_crs(farm_mark)
st_crs(grocerys)
So we know that all three use the crs 3857, which is also WGS 84 or Pseudo Mercator, an the units of the map are in meters
trees_shape$geometry
## Geometry set for 25534 features
## geometry type: POINT
## dimension: XY
## bbox: xmin: -13667450 ymin: 5692149 xmax: -13635010 ymax: 5724486
## CRS: 3857
## First 5 geometries:
## POINT (-13658194 5712487)
## POINT (-13658213 5712485)
## POINT (-13658254 5712491)
## POINT (-13658227 5712487)
## POINT (-13658234 5712488)
Here are our relative locations! xmin: -13667450 ymin: 5692149 xmax: -13635010 ymax: 5724486
Let’s start mapping!
Challenge 1:
Make a version of this map centered on your neighborhood, zoomed such that each of the bordering neighborhoods is visible (if “each” is too many to be useful, go with “most” or “some”).
pdx_bounds %>%
ggplot() +
geom_sf() +
coord_sf(
xlim=c(-13655500,-13645500),
ylim=c(5698900, 5704250)) +
geom_sf_label(aes(label=MAPLABEL)) #MAPLABEL is a column within pdx_bounds
Challenge 2:
That blue is pretty deep and intense; change it to something a bit more river-like. While you’re at it, the grey background for the neighborhoods is also a bit blah. Try changing it to something else.
pdx_bounds %>%
ggplot() +
geom_sf(fill = "darkred", colour = "grey80") +
geom_sf(data=river_bounds, fill="#026F86", size=0.0, alpha = 0.85)+
coord_sf(xlim=c(-13678000,-13630000), ylim=c(5689000, 5727000)) +
theme_dark()
Challenge 3:
Pick a different projection, and show how it looks with at least two different center points. Here is a guide to a little bit more information about the syntax for the proj4 specification.
world1 <- st_as_sf(map('world', plot = FALSE, fill = TRUE))
world2 <- st_transform(
world1, "+proj=laea +lat_0=54.48 +lon_0=-5.93 +ellps=EPSG:102663 +no_defs"
)
ggplot() +
geom_sf(data = world2) +
labs(title = "Hawaii State Plane Zone 3 projection (EPSG:102663)",
subtitle = "centered on Belfast, Northern Ireland (lon: -5.93, lat: 54.48)")
world3 <- st_transform(
world1, "+proj=laea +lat_0=-25.69 +lon_0=30.04 +ellps=EPSG:102663 +no_defs"
)
ggplot() +
geom_sf(data = world3) +
labs(title = "Hawaii State Plane Zone 3 projection (EPSG:102663)",
subtitle = "centered on Belfast, South Africa (lon: 30.04, lat: -25.69)")
Challenge 4:
Create a map showing the location of grocery stores and farmer’s markets, with a different point shape used to indicate which is which.
# legend_fix_ugh <- rergsfd g5ermhj 4liueksadjf.olesfjpoew;lj. fhrsdfapoe;wl.s/kfc pe, UGH I HATE THESE
pdx_bounds %>%
ggplot() +
geom_sf(aes(fill = "M"), colour = "grey50", show.legend = FALSE) +
geom_sf(data = river_bounds, aes(fill = "R"), size = 0.0, alpha = 0.85, show.legend = FALSE) +
geom_sf(data = grocerys, aes(fill = "G"), colour = "#111111",
shape = 21, stroke = 0.25, size = 2, show.legend = "point") +
geom_sf(data = farm_mark, aes(fill = "F"), colour = "#111111",
shape = 24, stroke = 0.25, size = 2, show.legend = "point") +
coord_sf(xlim = c(-13678000,-13630000),
ylim = c(5689000, 5727000)) +
labs(title = "Challenge 4 map",
fill = "Map: ") +
scale_fill_manual(breaks = c("G", "F"), #NOTE FOR FUTURE: FOR SOME REASON breaks CONTROLS THE LEGEND
values = c("M" = "#eeeeee", "R" = "#026F86", "G" = "gold1", "F" = "darkred"),
guide = guide_legend(
label = TRUE,
override.aes = list(show.legend = c("point", "point"),
shape = c(21, 24),
fill = c("gold1", "darkred")),
),
labels = c("Grocery stores", "Farmers' markets")) +
theme_minimal()
Challenge 5:
Follow this process (cropping the data set to a bounding box, etc.) for a park in your neighborhood, and create a comparable map.
pdx_bounds %>%
ggplot() +
geom_sf() +
geom_sf(data = trees_shape) +
coord_sf(
xlim=c(-13650000,-13645000),
ylim=c(5701000, 5704750)) +
theme_minimal()
ch5_box <- data.frame(lon = c(-122.605, -122.585), lat = c(45.506, 45.518))
sf_project(st_crs(4326), st_crs(3857), ch5_box)
## [,1] [,2]
## [1,] -13648326 5701536
## [2,] -13646100 5703442
ch5_trees <- st_crop(trees_shape, xmin = -13648326, xmax = -13646100, ymin = 5701536, ymax = 5703442)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
paste0("Total number of trees in original dataset: ", length(trees_shape$geometry))
## [1] "Total number of trees in original dataset: 25534"
paste0("Number of trees in our bounding box: ", length(ch5_trees$geometry))
## [1] "Number of trees in our bounding box: 1131"
pdx_bounds %>%
ggplot() +
geom_sf() +
geom_sf(data = ch5_trees) +
coord_sf(
xlim=c(-13648326,-13646100),
ylim=c(5701536, 5703442)) +
labs(title = "Trees in Mt. Tabor Park") +
theme_minimal()
Challenge 6:
Using either ggmap
or ggspatial
, take your local park’s tree map (from challenge #5) and place it on a OpenStreetMap raster tile. Use at least one aesthetic mapping (color, point shape, etc.) to visualize at least one type of data about the trees (other than genus).
Feel free to experiment with different map tile styles!
st_bbox(ch5_trees)
## xmin ymin xmax ymax
## -13647959 5701727 -13646633 5703148
ch6_map <- get_stamenmap(c(left = -122.605, bottom = 45.506, right = -122.585, top = 45.518),
zoom = 14,
maptype = "terrain-background")
## Source : http://tile.stamen.com/terrain-background/14/2612/5860.png
## Source : http://tile.stamen.com/terrain-background/14/2613/5860.png
## Source : http://tile.stamen.com/terrain-background/14/2612/5861.png
## Source : http://tile.stamen.com/terrain-background/14/2613/5861.png
ggmap(ch6_map)
ch6_proj <- st_transform(ch5_trees, st_crs(4326))
ggmap(ch6_map) +
geom_sf(data = ch6_proj, inherit.aes = FALSE,
size = 0.5,
aes(colour = Pollutio_1)) +
scale_colour_distiller(palette = "BuPu", direction = -1) +
labs(title = "Environmental potential of trees in Mt. Tabor",
colour = "Pollution Removal (oz)")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
interactive sidbar
leaflet(ch6_proj) %>%
addEsriBasemapLayer(esriBasemapLayers$Imagery) %>%
setView(-122.593, 45.512, zoom = 15) %>%
addCircleMarkers(label = ~Pollutio_1, radius = 5)
Challenge 7:
Look at the original spreadsheet and pick another column of interest (besides total population); revise the data ingest process, and map that column instead.
pdx_hh <- read_excel(here::here("static/projects/lab08data/Census_2010_Data_Cleanedup.xlsx"),
sheet = "Census_2010_Neighborhoods",
.name_repair = "universal",
trim_ws = TRUE) %>%
clean_names()
## New names:
## * `` -> ...1
## * `Total Population` -> Total.Population
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
pdx_hh <- pdx_hh %>%
select(pdx_nhd = x1, nonfam = x69, total_hh = housing_units) %>%
mutate(pdx_nhd=as.factor(pdx_nhd))
# we need to do a tiny bit of cleanup; the census spreadsheet has some slightly different names
pdx_hh <- pdx_hh %>% mutate(pdx_nhd=recode(pdx_nhd,
"ARGAY" = "ARGAY TERRACE",
"BROOKLYN" = "BROOKLYN ACTION CORPS",
"BUCKMAN" = "BUCKMAN COMMUNITY ASSOCIATION",
"CENTENNIAL" = "CENTENNIAL COMMUNITY ASSOCIATION",
"CULLY" = "CULLY ASSOCIATION OF NEIGHBORS",
"CENTENNIAL" = "CENTENNIAL COMMUNITY ASSOCIATION",
"DOWNTOWN" = "PORTLAND DOWNTOWN",
"GOOSE HOLLOW" = "GOOSE HOLLOW FOOTHILLS LEAGUE",
"HAYDEN ISLAND" = "HAYDEN ISLAND NEIGHBORHOOD NETWORK",
"HOSFORD-ABERNETHY" = "HOSFORD-ABERNETHY NEIGHBORHOOD DISTRICT ASSN.",
"IRVINGTON" = "IRVINGTON COMMUNITY ASSOCIATION",
"LLOYD DISTRICT" = "LLOYD DISTRICT COMMUNITY ASSOCIATION",
"NORTHWEST DISTRICT" = "NORTHWEST DISTRICT ASSOCIATION",
"OLD TOWN-CHINATOWN" = "OLD TOWN COMMUNITY ASSOCIATION",
"PARKROSE HEIGHTS" = "PARKROSE HEIGHTS ASSOCIATION OF NEIGHBORS",
"PEARL" = "PEARL DISTRICT",
"SABIN" = "SABIN COMMUNITY ASSOCIATION",
"SELLWOOD-MORELAND" = "SELLWOOD-MORELAND IMPROVEMENT LEAGUE",
"SOUTHWEST HILLS" = "SOUTHWEST HILLS RESIDENTIAL LEAGUE",
"SUMNER" = "SUMNER ASSOCIATION OF NEIGHBORS",
"SUNDERLAND" = "SUNDERLAND ASSOCIATION OF NEIGHBORS",
"WILKES" = "WILKES COMMUNITY GROUP"
))
pdx_hh <- pdx_hh %>%
filter(!is.na(pdx_nhd) & !is.na(nonfam) & !is.na(total_hh) & nonfam != "P0190017") %>%
mutate(nonfam = as.numeric(nonfam),
total_hh = as.numeric(total_hh)) %>%
mutate(nonfam_hh = (nonfam / total_hh))
pdx_hh
## # A tibble: 95 x 4
## pdx_nhd nonfam total_hh nonfam_hh
## <fct> <dbl> <dbl> <dbl>
## 1 ALAMEDA 177 2058 0.0860
## 2 ARBOR LODGE 478 2774 0.172
## 3 ARDENWALD-JOHNSON CREEK 204 2118 0.0963
## 4 ARGAY TERRACE 197 2498 0.0789
## 5 ARLINGTON HEIGHTS 20 307 0.0651
## 6 ARNOLD CREEK 67 1221 0.0549
## 7 ASHCREEK 252 2535 0.0994
## 8 BEAUMONT-WILSHIRE 257 2319 0.111
## 9 BOISE 440 1537 0.286
## 10 BRENTWOOD-DARLINGTON 695 5107 0.136
## # … with 85 more rows
# and now left join with our boundary data set- note that there will be some regions with no population data
bounds_hh <- left_join(pdx_bounds, pdx_hh, by=c("NAME"="pdx_nhd")) %>%
mutate(nonfam_area = (nonfam / (Shape_Area / 1E6)))
glimpse(bounds_hh)
## Rows: 98
## Columns: 15
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ NAME <fct> CATHEDRAL PARK, UNIVERSITY PARK, PIEDMONT, WOODLAWN, CULL…
## $ COMMPLAN <fct> NA, NA, ALBINA, ALBINA, NA, ALBINA, ALBINA, ALBINA, NA, N…
## $ SHARED <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ COALIT <fct> NPNS, NPNS, NPNS, NECN, CNN, NPNS, NPNS, NECN, EPCO, CNN,…
## $ HORZ_VERT <fct> HORZ, HORZ, VERT, HORZ, HORZ, HORZ, HORZ, HORZ, HORZ, HOR…
## $ MAPLABEL <fct> Cathedral Park, University Park, Piedmont, Woodlawn, Cull…
## $ ID <int> 31, 88, 70, 93, 23, 2, 66, 19, 67, 84, 44, 48, 89, 63, 74…
## $ Shape_Leng <dbl> 11434.255, 11950.860, 10849.327, 8078.361, 18179.392, 946…
## $ Shape_Area <dbl> 5424297.8, 6981456.8, 6079530.0, 3870553.6, 16580624.7, 4…
## $ nonfam <dbl> 264, 401, 503, 446, 723, 478, 559, 784, 230, 129, 458, 61…
## $ total_hh <dbl> 1703, 1834, 2983, 2084, 5230, 2774, 2756, 4001, 2314, 898…
## $ nonfam_hh <dbl> 0.15502055, 0.21864776, 0.16862219, 0.21401152, 0.1382409…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((-13664216 5..., MULTIPOLYGON…
## $ nonfam_area <dbl> 48.66989, 57.43787, 82.73666, 115.22899, 43.60511, 104.20…
bounds_hh %>%
ggplot() +
geom_sf(aes(fill = nonfam_hh)) +
scale_fill_distiller(palette = "OrRd", direction = 1, na.value = "grey35") +
labs(title = "Proportion of non-family households",
subtitle = "Nonfamily households per total households",
fill = "Nonfamily households") +
geom_sf(data = river_bounds, fill="#026F86", size=0.0) +
theme_minimal()
Note: I calculated the proportion of nonfamily households divided by total households. Since this would remove the variable of total # people per neighborhood, I don’t think I am just re-making a population density map.
Challenge 8:
Perform a similar analysis for the census data column you are working with. Is six the right number of bins? Which method works best for your specific data?
bounds_hh %>%
mutate(NAME = fct_reorder(NAME, desc(nonfam_hh))) %>%
ggplot(aes(x = NAME, y = nonfam_hh)) +
geom_bar(stat = "identity", na.rm = TRUE) +
theme_minimal()
## Warning: Removed 4 rows containing missing values (position_stack).
bounds_hh %>%
ggplot(aes(x = nonfam_hh)) +
geom_histogram() +
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4 rows containing non-finite values (stat_bin).
# following https://blog.datawrapper.de/how-to-choose-a-color-palette-for-choropleth-maps/
# and https://rpubs.com/danielkirsch/styling-choropleth-maps
min.nf <- min(bounds_hh$nonfam_hh, na.rm=TRUE)
max.nf <- max(bounds_hh$nonfam_hh, na.rm=TRUE)
diff.nf <- max.nf - min.nf
std.dev.nf <- sd(bounds_hh$nonfam_hh, na.rm = TRUE)
# some possible color break points - NOTE: 8 LOOKS MORE EVEN THAN 6
equal.interval <- seq(min.nf, max.nf, by = diff.nf / 8)
quantile.interval <- quantile(bounds_hh$nonfam_hh, probs=seq(0, 1, by = 1/8), na.rm = TRUE)
std.interval <- c(seq(min.nf, max.nf, by=std.dev.nf), max.nf)
jenks.interval <- classIntervals(bounds_hh$nonfam_hh, n=8, style='jenks')$brks
## Warning in classIntervals(bounds_hh$nonfam_hh, n = 8, style = "jenks"): var has
## missing values, omitted in finding classes
bounds_hh$nf.equal = cut(bounds_hh$nonfam_h, breaks = equal.interval, include.lowest = TRUE)
bounds_hh$nf.quantile = cut(bounds_hh$nonfam_h, breaks = quantile.interval, include.lowest = TRUE)
bounds_hh$nf.std = cut(bounds_hh$nonfam_h, breaks = std.interval, include.lowest = TRUE)
bounds_hh$nf.jenks = cut(bounds_hh$nonfam_h, breaks = jenks.interval, include.lowest = TRUE)
nfBarChart <- function(break_col) {
bounds_hh %>%
filter(!is.na(nonfam_hh)) %>%
ggplot(mapping = aes(x=fct_reorder(NAME, -nonfam_hh), y=nonfam_hh)) +
geom_col(aes(fill=.data[[break_col]])) +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
scale_y_continuous(name = "nonfam per total hh") +
scale_x_discrete(name=NULL) +
scale_fill_discrete(guide=FALSE) +
ggtitle(break_col)
}
plot_grid(
nfBarChart("nf.equal"),
nfBarChart("nf.quantile"),
nfBarChart("nf.std"),
nfBarChart("nf.jenks"),
nrow=2, ncol=2
)
nfMaps <- function(map_breaks) {
bounds_hh %>%
ggplot() +
# geom_sf(aes(fill = nf.jenks)) +
geom_sf(aes(fill = .data[[map_breaks]])) +
scale_fill_brewer(palette = "OrRd", direction = 1, na.value = "grey35") +
labs(title = "Proportion of non-family households",
# subtitle = "Nonfamily households per total households",
fill = map_breaks) +
geom_sf(data = river_bounds, fill="#026F86", size=0.0) +
theme_minimal() +
theme(axis.text = element_text(size = 6),
title = element_text(size = 6),
legend.title = element_text(size = 8),
legend.text = element_text(size = 4),
legend.key.size = unit(6, "points"),
legend.box.margin = margin(6, 6, 6, 6, "points")
)
}
plot_grid(
nfMaps("nf.equal"),
nfMaps("nf.quantile"),
nfMaps("nf.std"),
nfMaps("nf.jenks"),
nrow = 2, ncol = 2
)
I think that nf.jenks
has given the most nuanced distinction in the breaks:
bounds_hh %>%
ggplot() +
geom_sf(aes(fill = nf.jenks), colour = "grey20") +
scale_fill_brewer(palette = "OrRd", direction = 1, na.value = "grey35") +
labs(title = "Proportion of non-family households",
subtitle = "Nonfamily households per total households",
fill = "nf.jenks") +
geom_sf(data = river_bounds, fill="#026F86", size=0.0) +
theme_minimal()
As someone who has taken long walks to get out of my Richmond apartment during COVID times, I would definitely agree that Laurelhurst seems like a neighborhood with only families living in houses. This is much more apparent here than in the continuous fill graph from challenge 7, which had really distinction at the lower ends.