This vignette produces the graphs included in the initial MBR manuscript.
Figure 1: Raw monthly birth rates (General Fertility Rates; GFRs) for Oklahoma County, 1990-1999, plotted in a linear plot; the “bombing effect” is located ten months after the Oklahoma City bombing.
Smoothed monthly birth rates (General Fertility Rates; GFRs) for Oklahoma County, 1990-1999, plotted in a linear plot. The top plot shows the connected raw data with a February smoother; the middle plot shows smoothing with a 12-month moving average, blue/green line, superimposed on a February smoother, red line); the bottom plot shows the smoothers and confidence bands, which are H-spreads defined using the distribution of GFRs for the given month and 11 previous months.
First, some R packages are loaded, and some variables and functions are defined.
change_month <- base::as.Date("1996-02-15") #as.Date("1995-04-19") + lubridate::weeks(39) = "1996-01-17"
set.seed(444) # So bootstrap won't trigger a git diff
vp_layout <- function(x, y) {
grid::viewport(layout.pos.row = x, layout.pos.col = y)
}
full_spread <- function(scores) {
base::range(scores) # A new function isn't necessary. It's defined in order to be consistent.
}
h_spread <- function(scores) {
stats::quantile(x = scores, probs = c(.25, .75))
}
se_spread <- function(scores) {
base::mean(scores) + base::c(-1, 1) * stats::sd(scores) / base::sqrt(base::sum(!base::is.na(scores)))
}
boot_spread <- function(scores, conf = .68) {
plugin <- function(d, i) {
base::mean(d[i])
}
distribution <- boot::boot(data = scores, plugin, R = 99) # 999 for the publication
ci <- boot::boot.ci(distribution, type = c("bca"), conf = conf)
ci$bca[4:5] # The fourth & fifth elements correspond to the lower & upper bound.
}
dark_theme <- ggplot2::theme(
axis.title = ggplot2::element_text(color = "gray30", size = 9),
axis.text.x = ggplot2::element_text(color = "gray30", hjust = 0),
axis.text.y = ggplot2::element_text(color = "gray30"),
axis.ticks = ggplot2::element_blank(),
# panel.grid.minor.y = element_line(color = "gray95", linewidth = .1),
# panel.grid.major = element_line(color = "gray90", linewidth = .1),
panel.spacing = grid::unit(c(0, 0, 0, 0), "cm"),
plot.margin = grid::unit(c(0, 0, 0, 0), "cm")
)
# qplot(mtcars$hp) + dark_theme
light_theme <-
dark_theme +
ggplot2::theme(
axis.title = ggplot2::element_text(color = "gray80", size = 9),
axis.text.x = ggplot2::element_text(color = "gray80", hjust = 0),
axis.text.y = ggplot2::element_text(color = "gray80"),
panel.grid.minor.y = ggplot2::element_line(color = "gray99", linewidth = .1),
panel.grid.major = ggplot2::element_line(color = "gray95", linewidth = .1)
)
date_sequence <-
base::seq.Date(
from = base::as.Date("1990-01-01"),
to = base::as.Date("1999-01-01"),
by = "years"
)
x_scale <-
ggplot2::scale_x_date(
breaks = date_sequence,
labels = scales::date_format("%Y")
)
# This keeps things proportional down the three frames.
x_scale_blank <-
ggplot2::scale_x_date(
breaks = date_sequence,
labels = NULL
)
Here is the basic linear rolling graph. It doesn’t require much specification, and will work with a wide range of appropriate datasets. This first (unpublished) graph displays all components.
# Uncomment the next two lines to use the version built into the package. By default, it uses the
# CSV to promote reproducible research, since the CSV format is more open and accessible to more software.
ds_linear_all <-
county_month_birth_rate_2005_version |>
tibble::as_tibble()
ds_linear_okc <-
ds_linear_all |>
dplyr::filter(county_name == "oklahoma") |>
augment_year_data_with_month_resolution(date_name = "date")
portfolio_cartesian <-
annotate_data(
ds_linear_okc,
dv_name = "birth_rate",
center_function = stats::median,
spread_function = h_spread
)
cartesian_rolling(
ds_linear = portfolio_cartesian$ds_linear,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
change_point_labels = "Bombing Effect"
)
The version for the manuscript was tweaked to take advantage of certain features of the dataset. This is what it looks like when all three stylized panels are combined.
top_panel <-
Wats::cartesian_rolling(
ds_linear = portfolio_cartesian$ds_linear,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
y_title = "General Fertility Rate",
change_point_labels = "Bombing Effect",
draw_rolling_band = FALSE,
draw_rolling_line = FALSE
)
middle_panel <-
Wats::cartesian_rolling(
ds_linear = portfolio_cartesian$ds_linear,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
y_title = "General Fertility Rate",
change_point_labels = "",
draw_rolling_band = FALSE,
draw_jagged_line = FALSE
)
bottom_panel <-
Wats::cartesian_rolling(
ds_linear = portfolio_cartesian$ds_linear,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
y_title = "General Fertility Rate",
change_point_labels = "",
# draw_rolling_band = FALSE,
draw_jagged_line = FALSE
)
top_panel <- top_panel + x_scale + dark_theme
middle_panel <- middle_panel + x_scale + dark_theme
bottom_panel <- bottom_panel + x_scale_blank + dark_theme
grid::grid.newpage()
grid::pushViewport(grid::viewport(layout = grid::grid.layout(3,1)))
print(top_panel , vp = vp_layout(1, 1))
print(middle_panel, vp = vp_layout(2, 1))
print(bottom_panel, vp = vp_layout(3, 1))
grid::popViewport()
Cartesian plot of the GFR time series data in Oklahoma County, with H-spread Bands superimposed.
cartesian_periodic <-
Wats::cartesian_periodic(
portfolio_cartesian$ds_linear,
portfolio_cartesian$ds_periodic,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
change_point_labels = "Bombing Effect",
y_title = "General Fertility Rate",
draw_periodic_band = TRUE #The only difference from the simple linear graph above
)
print(cartesian_periodic)
Wrap Around Time Series (WATS Plot) of the Oklahoma City GFR data, 1990-1999.
portfolio_polar <-
polarize_cartesian(
ds_linear = portfolio_cartesian$ds_linear,
ds_stage_cycle = portfolio_cartesian$ds_stage_cycle,
y_name = "birth_rate",
stage_id_name = "stage_id",
plotted_point_count_per_cycle = 7200
)
grid::grid.newpage()
polar_periodic(
ds_linear = portfolio_polar$ds_observed_polar,
ds_stage_cycle = portfolio_polar$ds_stage_cycle_polar,
y_name = "radius",
stage_id_name = "stage_id",
draw_periodic_band = FALSE,
draw_stage_labels = TRUE,
draw_radius_labels = TRUE,
cardinal_labels = c("Jan1", "Apr1", "July1", "Oct1")
)
Wrap Around Time Series (WATS Plot) of the Oklahoma City GFR data, 1990-1999.
portfolio_polar <-
Wats::polarize_cartesian(
ds_linear = portfolio_cartesian$ds_linear,
ds_stage_cycle = portfolio_cartesian$ds_stage_cycle,
y_name = "birth_rate",
stage_id_name = "stage_id",
plotted_point_count_per_cycle = 7200
)
grid::grid.newpage()
grid::pushViewport(grid::viewport(
layout = grid::grid.layout(
nrow = 2,
ncol = 2,
respect = TRUE,
widths = grid::unit(c(1, 1), c("null", "null")),
heights = grid::unit(c(1, .5), c("null", "null"))
),
gp = grid::gpar(cex = 1, fill = NA)
))
## Create top left panel
grid::pushViewport(grid::viewport(layout.pos.col = 1, layout.pos.row = 1))
top_left_panel <-
Wats::polar_periodic(
ds_linear = portfolio_polar$ds_observed_polar,
ds_stage_cycle_polar = portfolio_polar$ds_stage_cycle_polar,
y_name = "radius",
stage_id_name = "stage_id", #graph_ceiling = 7,
cardinal_labels = c("Jan1", "Apr1", "July1", "Oct1")
)
grid::upViewport()
## Create top right panel
grid::pushViewport(grid::viewport(layout.pos.col = 2, layout.pos.row = 1))
top_right_panel <-
Wats::polar_periodic(
ds_linear = portfolio_polar$ds_observed_polar,
ds_stage_cycle_polar = portfolio_polar$ds_stage_cycle_polar,
y_name = "radius",
stage_id_name = "stage_id", #graph_ceiling = 7,
draw_observed_line = FALSE,
cardinal_labels = c("Jan1", "Apr1", "July1", "Oct1"),
origin_label = NULL
)
grid::upViewport()
## Create bottom panel
grid::pushViewport(grid::viewport(layout.pos.col = 1:2, layout.pos.row = 2, gp = grid::gpar(cex = 1)))
print(cartesian_periodic, vp = vp_layout(x = 1:2, y = 2)) # Print across both columns of the bottom row.
grid::upViewport()
This figure compares Oklahoma County against the (other) largest urban counties.
# ds_linear_all <- Wats::augment_year_data_with_month_resolution(ds_linear = county_month_birth_rate_2005_version, date_name="date")
# Identify the average size of the fecund population
ds_linear_all |>
dplyr::group_by(county_name) |>
dplyr::summarize(
Mean = base::mean(fecund_population)
) |>
dplyr::ungroup()
# A tibble: 12 × 2
county_name Mean
<chr> <dbl>
1 canadian 18332.
2 cleveland 48865.
3 comanche 26268.
4 creek 13402.
5 logan 7065.
6 mcclain 5434.
7 oklahoma 146882.
8 osage 8529.
9 pottawatomie 13604.
10 rogers 13383.
11 tulsa 123783.
12 wagoner 11580.
graph_row_comparison <- function(
row_label = "",
.county_name = "oklahoma",
spread_function = h_spread,
change_month = as.Date("1996-02-15")
) {
ds_linear <-
ds_linear_all |>
dplyr::filter(county_name == .county_name) |>
Wats::augment_year_data_with_month_resolution(date_name = "date")
portfolio_cartesian <-
ds_linear |>
Wats::annotate_data(
dv_name = "birth_rate",
center_function = stats::median,
spread_function = spread_function
)
portfolio_polar <-
portfolio_cartesian$ds_linear |>
Wats::polarize_cartesian(
ds_stage_cycle = portfolio_cartesian$ds_stage_cycle,
y_name = "birth_rate",
stage_id_name = "stage_id",
plotted_point_count_per_cycle = 7200
)
cartesian_periodic <-
portfolio_cartesian$ds_linear |>
Wats::cartesian_periodic(
portfolio_cartesian$ds_periodic,
x_name = "date",
y_name = "birth_rate",
stage_id_name = "stage_id",
change_points = change_month,
change_point_labels = ""
)
grid::pushViewport(grid::viewport(
layout =
grid::grid.layout(
nrow = 1,
ncol = 3,
respect = FALSE,
widths = grid::unit(c(1.5, 1, 3), c("line", "null", "null"))
),
gp = grid::gpar(cex = 1, fill = NA)
))
grid::pushViewport(grid::viewport(layout.pos.col = 1))
grid::grid.rect(gp = grid::gpar(fill = "gray90", col = NA))
grid::grid.text(row_label, rot = 90)
grid::popViewport()
grid::pushViewport(grid::viewport(layout.pos.col = 2))
Wats::polar_periodic(
ds_linear = portfolio_polar$ds_observed_polar,
ds_stage_cycle_polar = portfolio_polar$ds_stage_cycle_polar,
draw_observed_line = FALSE,
y_name = "radius",
stage_id_name = "stage_id",
origin_label = NULL,
plot_margins = c(0, 0, 0, 0)
)
grid::popViewport()
grid::pushViewport(grid::viewport(layout.pos.col = 3))
print(cartesian_periodic + x_scale + light_theme, vp = vp_layout(x = 1, y = 1))
grid::popViewport()
grid::popViewport() #Finish the row
}
county_names <- c("Comanche", "Cleveland", "Oklahoma", "Tulsa", "Rogers")
counties <- tolower(county_names)
grid::grid.newpage()
grid::pushViewport(grid::viewport(
layout = grid::grid.layout(nrow = length(counties), ncol = 1),
gp = grid::gpar(cex = 1, fill = NA)
))
for (i in base::seq_along(counties)) {
grid::pushViewport(grid::viewport(layout.pos.row = i))
graph_row_comparison(.county_name = counties[i], row_label = county_names[i])
grid::popViewport()
}
grid::popViewport()
Here are all 12 counties that Ronnie collected birth records for. This extended graph is not in the manuscript.
counties <- base::sort(base::unique(ds_linear_all$county_name))
county_names <- c("Canadian", "Cleveland", "Comanche", "Creek", "Logan", "McClain", "Oklahoma", "Osage", "Pottawatomie", "Rogers", "Tulsa", "Wagoner")
grid::grid.newpage()
grid::pushViewport(grid::viewport(
layout = grid::grid.layout(nrow = base::length(counties), ncol = 1),
gp = grid::gpar(cex = 1, fill = NA)
))
for (i in base::seq_along(counties)) {
grid::pushViewport(grid::viewport(layout.pos.row = i))
graph_row_comparison(.county_name = counties[i], row_label = county_names[i])
grid::popViewport()
}
grid::popViewport()
This figure demonstrates that WATS accommodates many types of error bands.
spreads <- c("h_spread", "full_spread", "se_spread", "boot_spread")
spread_names <- c("H-Spread", "Range", "+/-1 SE", "Bootstrap")
grid::grid.newpage()
grid::pushViewport(grid::viewport(
layout = grid::grid.layout(nrow = base::length(spreads), ncol = 1),
gp = grid::gpar(cex = 1, fill = NA)
))
for (i in base::seq_along(spreads)) {
grid::pushViewport(grid::viewport(layout.pos.row = i))
graph_row_comparison(spread_function = base::get(spreads[i]), row_label = spread_names[i])
grid::upViewport()
}
grid::upViewport()
The current vignette was build on a system using the following software.
Report created by root at Mon 11 Nov 2024 04:24:44 PM UTC, +0000
R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04.1 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Etc/UTC
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] Wats_1.0.1.9000 rmarkdown_2.29
loaded via a namespace (and not attached):
[1] gtable_0.3.6 jsonlite_1.8.9 dplyr_1.1.4 compiler_4.4.2
[5] tidyselect_1.2.1 jquerylib_0.1.4 scales_1.3.0 testit_0.13
[9] boot_1.3-31 yaml_2.3.10 fastmap_1.2.0 lattice_0.22-6
[13] ggplot2_3.5.1 R6_2.5.1 labeling_0.4.3 generics_0.1.3
[17] knitr_1.49 tibble_3.2.1 maketools_1.3.1 munsell_0.5.1
[21] lubridate_1.9.3 RColorBrewer_1.1-3 bslib_0.8.0 pillar_1.9.0
[25] rlang_1.1.4 utf8_1.2.4 cachem_1.1.0 xfun_0.49
[29] sass_0.4.9 sys_3.4.3 timechange_0.3.0 cli_3.6.3
[33] withr_3.0.2 magrittr_2.0.3 digest_0.6.37 grid_4.4.2
[37] lifecycle_1.0.4 vctrs_0.6.5 evaluate_1.0.1 glue_1.8.0
[41] farver_2.1.2 zoo_1.8-12 buildtools_1.0.0 fansi_1.0.6
[45] colorspace_2.1-1 tools_4.4.2 pkgconfig_2.0.3 htmltools_0.5.8.1