Group challenges

Published

March 6, 2023

Group data wrangling challenge

Using the penguins data set from the {palmerpenguins} package, recreate this data frame.

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins)
`summarise()` has grouped output by 'species', 'island'. You can override using
the `.groups` argument.
# A tibble: 4 × 4
# Groups:   species, island [4]
  species   island    female  male
  <fct>     <fct>      <dbl> <dbl>
1 Adelie    Biscoe     3369. 4050 
2 Adelie    Dream      3344. 4046.
3 Adelie    Torgersen  3396. 4035.
4 Chinstrap Dream      3527. 3939.

Group plotting challenge

plot1 <- penguins |> filter(species == “Adelie”) |> drop_na(sex) |> mutate(Sex = fct_recode(sex, “Female” = “female”, “Male” = “male”)) |> ggplot(aes(x = bill_length_mm, y = bill_depth_mm, color = Sex)) + geom_point(size = 2.8, alpha = 0.55) + geom_smooth(method = “lm”, se = F, color = “gray50”) + scale_x_continuous(limits = c(31, 46), n.breaks = 15) + scale_y_continuous(limits = c(16, 21), n.breaks = 8, expand = c(0, 1.5)) + annotate(geom = “segment”, x = 33.5, xend = 34.35, y = 21.5, yend = 21.2, color = “black”) + annotate(geom = “text”, label = “Penguin 42”, x = 34, y = 21.8) + labs(y = “Bill depth (mm)”, x = “Bill length (mm)”, title = “Bill size by sex”) + theme_classic() + theme(legend.position = c(0.85, 0.2), legend.background = element_rect(fill = “transparent”, color = “transparent”))

plot2 <- penguins |> filter(species == “Adelie”) |> drop_na(sex, island) |> mutate(Sex = fct_recode(sex, “Female” = “female”, “Male” = “male”), Island = island) |> ggplot(aes(x = Sex, y = flipper_length_mm, group = Island, shape = Island, color = Island)) + stat_summary(position = position_dodge(width = 0.12)) + stat_summary(fun = mean, geom = “line”, position = position_dodge(width = 0.12)) + labs(y = “Flipper length (mm)”, title = “Mean and standard error of flipper length by sex and island”) + theme_classic() + theme(legend.position = c(0.85, 0.25))

plot3 <- penguins |> filter(species == “Adelie”) |> drop_na(sex, island, year) |> mutate(Sex = fct_recode(sex, “Female” = “female”, “Male” = “male”), Year = as.factor(year)) |> ggplot(aes(x = Year, y = flipper_length_mm, color = Year)) + geom_jitter(width = .2, alpha = 0.6) + stat_summary(fun.data = mean_cl_normal, color = “black”) + scale_y_continuous(limits = c(170, 210), n.breaks = 5) + geom_text(stat = “count”, aes(label = paste0(“N =”, after_stat(count))), y = 170, color = “black”) + facet_wrap(vars(island)) + labs(y = “Flipper length (mm)”, title = “Mean and 95% confidence interval of flipper length by island and year”) + scale_color_manual(values = c(“#0072B2”, “#009E73”, “#E69F00”)) + theme_classic() + theme(legend.position = “none”)

(plot1 + plot2) / plot3 + plot_annotation(title = “Adelie penguin bill and flipper size”, caption = “Source {palmerpenguins} data set”, tag_levels = “A”)