@@ -40,10 +40,19 @@ paralympic_1500 |>
4040 slice_tail(n = 5) |>
4141 kbl(
4242 col.names = c(
43- "", "year", "city", "country_of_games", "division", "type",
44- "name", "country_of_athlete", "time", "time_min"
43+ "",
44+ "year",
45+ "city",
46+ "country_of_games",
47+ "division",
48+ "type",
49+ "name",
50+ "country_of_athlete",
51+ "time",
52+ "time_min"
4553 ),
46- linesep = "", booktabs = TRUE,
54+ linesep = "",
55+ booktabs = TRUE,
4756 row.names = FALSE
4857 ) |>
4958 kable_styling(
@@ -66,10 +75,19 @@ paralympic_1500 |>
6675 slice_head(n = 5) |>
6776 kbl(
6877 col.names = c(
69- "", "year", "city", "country_of_games", "division", "type",
70- "name", "country_of_athlete", "time", "time_min"
78+ "",
79+ "year",
80+ "city",
81+ "country_of_games",
82+ "division",
83+ "type",
84+ "name",
85+ "country_of_athlete",
86+ "time",
87+ "time_min"
7188 ),
72- linesep = "", booktabs = TRUE,
89+ linesep = "",
90+ booktabs = TRUE,
7391 row.names = FALSE
7492 ) |>
7593 kable_styling(
@@ -107,26 +125,28 @@ The data dictionary is provided in @tbl-paralympic-var-def.
107125#| tbl-cap: Variables and their descriptions for the `paralympic_1500` dataset.
108126#| tbl-pos: H
109127paralympic_var_def <- tribble(
110- ~variable, ~description,
111- "year", "Year the Games took place.",
112- "city", "City of the Games.",
113- "country_of_games", "Country of the Games.",
114- "division", "Division: `Men` or `Women`.",
115- "type", "Type: `Olympic`, `T11`, `T12`, or `T13`.",
116- "name", "Name of the athlete.",
117- "country_of_athlete", "Country of athlete.",
118- "time", "Time of gold medal race, in m:s.",
119- "time_min", "Time of gold medal race, in decimal minutes (min + sec/60)."
128+ ~variable , ~description ,
129+ "year" , "Year the Games took place." ,
130+ "city" , "City of the Games." ,
131+ "country_of_games" , "Country of the Games." ,
132+ "division" , "Division: `Men` or `Women`." ,
133+ "type" , "Type: `Olympic`, `T11`, `T12`, or `T13`." ,
134+ "name" , "Name of the athlete." ,
135+ "country_of_athlete" , "Country of athlete." ,
136+ "time" , "Time of gold medal race, in m:s." ,
137+ "time_min" , "Time of gold medal race, in decimal minutes (min + sec/60)."
120138)
121139
122140paralympic_var_def |>
123141 kbl(
124- linesep = "", booktabs = TRUE,
142+ linesep = "",
143+ booktabs = TRUE,
125144 col.names = c("Variable", "Description")
126145 ) |>
127146 kable_styling(
128147 bootstrap_options = c("striped", "condensed"),
129- latex_options = c("striped"), full_width = TRUE
148+ latex_options = c("striped"),
149+ full_width = TRUE
130150 ) |>
131151 column_spec(1, monospace = TRUE) |>
132152 column_spec(2, width = "30em")
@@ -168,7 +188,7 @@ The maximum race time, therefore, should be taken into context in terms of the y
168188#| label: fig-paralympic-cat
169189#| fig-cap: |
170190#| Distributions of categorical variables in the `paralympic_1500` dataset.
171- #| fig-subcap:
191+ #| fig-subcap:
172192#| - Country of origin of the athlete
173193#| - Country in which the Games gook place
174194#| fig-alt: |
@@ -177,7 +197,7 @@ The maximum race time, therefore, should be taken into context in terms of the y
177197#| finishers, Kenya has had 7 top finishers, and Tunisia and Algeria have both
178198#| had 5. The right panel shows a bar plot counting the number of Games which
179199#| have happened in each country. The USA has hosted 4 Games, the UK has hosted
180- #| 3 Games, and each of Japan, Greece, Germany, France, and Australia have
200+ #| 3 Games, and each of Japan, Greece, Germany, France, and Australia have
181201#| hosted the Games twice.
182202#| fig-asp: 2
183203#| fig-width: 4
@@ -191,7 +211,8 @@ paralympic_1500 |>
191211 ungroup() |>
192212 mutate(country_of_games = fct_reorder(country_of_games, n)) |>
193213 ggplot(aes(
194- y = country_of_games, x = n,
214+ y = country_of_games,
215+ x = n,
195216 fill = fct_rev(country_of_games)
196217 )) +
197218 geom_col(show.legend = FALSE) +
@@ -261,7 +282,13 @@ In fact, some internet sleuthing tells you that the *top four* T13 finishers all
261282paralympic_1500 |>
262283 filter(division == "Men") |>
263284 filter(year > 1950) |>
264- ggplot(aes(x = year, y = time_min, group = type, color = type, shape = type)) +
285+ ggplot(aes(
286+ x = year,
287+ y = time_min,
288+ group = type,
289+ color = type,
290+ shape = type
291+ )) +
265292 geom_vline(xintercept = 2016, color = "darkgrey", lty = 2, lwd = 0.5) +
266293 geom_point(size = 2) +
267294 scale_color_openintro() +
@@ -274,7 +301,11 @@ paralympic_1500 |>
274301 ) +
275302 theme(
276303 legend.position = c(0.1, 0.75),
277- legend.background = element_rect(fill = "white", color = "gray", linewidth = 0.1)
304+ legend.background = element_rect(
305+ fill = "white",
306+ color = "gray",
307+ linewidth = 0.1
308+ )
278309 )
279310```
280311
@@ -298,7 +329,7 @@ That is, for later years, the predicted gold medal time is higher than in earlie
298329#| 1500m race time for Men's Olympic and Paralympic (T11) athletes. The line
299330#| represents a line of best fit to the entire dataset.
300331#| fig-alt: |
301- #| A scatterplot with year on the x-axis and gold medal 1500m time on the
332+ #| A scatterplot with year on the x-axis and gold medal 1500m time on the
302333#| y-axis. A line of best fit is drawn over the points.
303334#| fig-asp: 0.5
304335paralympic_1500 |>
@@ -333,8 +364,11 @@ paralympic_1500 |>
333364 filter(division == "Men", type == "Olympic" | type == "T11") |>
334365 filter(year > 1950) |>
335366 ggplot(aes(
336- x = year, y = time_min, group = type,
337- color = type, shape = type
367+ x = year,
368+ y = time_min,
369+ group = type,
370+ color = type,
371+ shape = type
338372 )) +
339373 geom_point(size = 2) +
340374 geom_smooth(method = "lm", se = FALSE) +
@@ -348,7 +382,11 @@ paralympic_1500 |>
348382 ) +
349383 theme(
350384 legend.position = c(0.1, 0.8),
351- legend.background = element_rect(fill = "white", color = "gray", linewidth = 0.1)
385+ legend.background = element_rect(
386+ fill = "white",
387+ color = "gray",
388+ linewidth = 0.1
389+ )
352390 )
353391```
354392
0 commit comments