For these exercises, we’ll use the dog breed traits and dog breed popularity rankings data sets.
- Load tidyverse and import
dog_breed_traits_clean.csv
to traits
.
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.0 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
traits <- read_csv(here::here("data/dog_breed_traits_clean.csv"), show_col_types = FALSE)
- Create a column of per-row means over all rating columns called mean_ratings and assign to
traits2
.
traits2 <- traits |>
rowwise() |>
mutate(mean_ratings = mean(c(affectionate, children, other_dogs, shedding, grooming, na.rm = TRUE)))
- Convert mean_ratings to a proportion in a column called pmean_ratings and add to
traits2
.
traits2 <- traits2 |>
mutate(pmean_ratings = mean_ratings / 5)
- Apply a natural log transformation to the pmeans_ratings vector.
log(traits2$pmean_ratings)
[1] -0.3101549 -0.4567584 -0.4054651 -0.3101549 -0.5679840 -0.4567584
[7] -0.4567584 -0.6286087 -0.4054651 -0.5679840 -0.4567584 -0.5679840
[13] -0.4054651 -0.5679840 -0.6286087 -0.3101549 -0.4054651 -0.4567584
[19] -0.3566749 -0.3566749 -0.4567584 -0.2231436 -0.5679840 -0.3566749
[25] -0.7621401 -0.5108256 -0.3101549 -0.5108256 -0.3566749 -0.3101549
[31] -0.3101549 -0.5108256 -0.5108256 -0.9162907 -0.4054651 -0.4567584
[37] -0.6931472 -0.5679840 -0.4567584 -0.4567584 -0.3566749 -0.4567584
[43] -0.5679840 -0.4054651 -0.3101549 -0.6286087 -0.3101549 -0.7621401
[49] -0.3566749 -0.5679840 -0.5108256 -0.4567584 -0.4567584 -0.7621401
[55] -0.6931472 -0.4054651 -0.6286087 -0.4567584 -0.4567584 -0.6286087
[61] -0.6931472 -0.7621401 -0.4567584 -0.7621401 -0.6286087 -0.5679840
[67] -0.4567584 -0.3566749 -0.5108256 -0.5679840 -0.5679840 -0.4567584
[73] -0.6931472 -0.5108256 -0.3101549 -0.4567584 -0.5679840 -0.6286087
[79] -0.7621401 -0.3101549 -0.5108256 -0.6931472 -0.4567584 -0.6286087
[85] -0.8362480 -0.8362480 -0.4054651 -0.5679840 -0.5679840 -0.4054651
[91] -0.5679840 -0.5108256 -0.5679840 -0.5679840 -0.5108256 -0.3101549
[97] -0.5679840 -0.3101549 -0.5108256 -0.6931472 -0.4054651 -0.3566749
[103] -0.6931472 -0.4567584 -0.6931472 -0.6931472 -0.5679840 -0.5679840
[109] -0.4567584 -0.5108256 -0.4567584 -0.4567584 -0.5679840 -0.5679840
[115] -0.5108256 -0.4567584 -0.6931472 -0.4567584 -0.4054651 -0.6931472
[121] -0.5679840 -0.6931472 -0.5679840 -0.5679840 -0.3101549 -0.5679840
[127] -0.4054651 -0.4054651 -0.6286087 -0.4567584 -0.7621401 -0.5679840
[133] -0.6286087 -0.6931472 -0.6286087 -0.6286087 -0.5679840 -0.3101549
[139] -0.6286087 -0.5108256 -0.7621401 -0.6931472 -0.5108256 -0.6931472
[145] -0.4567584 -0.3566749 -0.4054651 -0.5108256 -0.5108256 -0.6931472
[151] -0.7621401 -0.5108256 -0.4054651 -0.4054651 -0.5108256 -0.6931472
[157] -0.3566749 -0.7621401 -0.5679840 -0.4054651 -0.5108256 -0.5679840
[163] -0.6286087 -0.4567584 -0.4567584 -0.5108256 -0.5679840 -0.5679840
[169] -0.5679840 -0.4054651 -0.5679840 -0.5108256 -0.9162907 -0.6286087
[175] -0.5108256 -0.6286087 -0.5679840 -0.6286087 -0.7621401 -0.4567584
[181] -0.6286087 -0.5679840 -0.5108256 -0.6286087 -0.6931472 -0.3566749
[187] -0.4054651 -0.5108256 -0.6931472 -0.4054651 -0.5679840 -0.5108256
[193] -0.7621401 -0.4054651 -0.6931472 NA -0.4054651
- Round pmean_ratings to two decimal places.
round(traits2$pmean_ratings, digits = 2)
[1] 0.73 0.63 0.67 0.73 0.57 0.63 0.63 0.53 0.67 0.57 0.63 0.57 0.67 0.57 0.53
[16] 0.73 0.67 0.63 0.70 0.70 0.63 0.80 0.57 0.70 0.47 0.60 0.73 0.60 0.70 0.73
[31] 0.73 0.60 0.60 0.40 0.67 0.63 0.50 0.57 0.63 0.63 0.70 0.63 0.57 0.67 0.73
[46] 0.53 0.73 0.47 0.70 0.57 0.60 0.63 0.63 0.47 0.50 0.67 0.53 0.63 0.63 0.53
[61] 0.50 0.47 0.63 0.47 0.53 0.57 0.63 0.70 0.60 0.57 0.57 0.63 0.50 0.60 0.73
[76] 0.63 0.57 0.53 0.47 0.73 0.60 0.50 0.63 0.53 0.43 0.43 0.67 0.57 0.57 0.67
[91] 0.57 0.60 0.57 0.57 0.60 0.73 0.57 0.73 0.60 0.50 0.67 0.70 0.50 0.63 0.50
[106] 0.50 0.57 0.57 0.63 0.60 0.63 0.63 0.57 0.57 0.60 0.63 0.50 0.63 0.67 0.50
[121] 0.57 0.50 0.57 0.57 0.73 0.57 0.67 0.67 0.53 0.63 0.47 0.57 0.53 0.50 0.53
[136] 0.53 0.57 0.73 0.53 0.60 0.47 0.50 0.60 0.50 0.63 0.70 0.67 0.60 0.60 0.50
[151] 0.47 0.60 0.67 0.67 0.60 0.50 0.70 0.47 0.57 0.67 0.60 0.57 0.53 0.63 0.63
[166] 0.60 0.57 0.57 0.57 0.67 0.57 0.60 0.40 0.53 0.60 0.53 0.57 0.53 0.47 0.63
[181] 0.53 0.57 0.60 0.53 0.50 0.70 0.67 0.60 0.50 0.67 0.57 0.60 0.47 0.67 0.50
[196] NA 0.67
- Convert pmean_ratings to scientific notation.
format(traits2$pmean_ratings, scientific = TRUE)
[1] "7.333333e-01" "6.333333e-01" "6.666667e-01" "7.333333e-01" "5.666667e-01"
[6] "6.333333e-01" "6.333333e-01" "5.333333e-01" "6.666667e-01" "5.666667e-01"
[11] "6.333333e-01" "5.666667e-01" "6.666667e-01" "5.666667e-01" "5.333333e-01"
[16] "7.333333e-01" "6.666667e-01" "6.333333e-01" "7.000000e-01" "7.000000e-01"
[21] "6.333333e-01" "8.000000e-01" "5.666667e-01" "7.000000e-01" "4.666667e-01"
[26] "6.000000e-01" "7.333333e-01" "6.000000e-01" "7.000000e-01" "7.333333e-01"
[31] "7.333333e-01" "6.000000e-01" "6.000000e-01" "4.000000e-01" "6.666667e-01"
[36] "6.333333e-01" "5.000000e-01" "5.666667e-01" "6.333333e-01" "6.333333e-01"
[41] "7.000000e-01" "6.333333e-01" "5.666667e-01" "6.666667e-01" "7.333333e-01"
[46] "5.333333e-01" "7.333333e-01" "4.666667e-01" "7.000000e-01" "5.666667e-01"
[51] "6.000000e-01" "6.333333e-01" "6.333333e-01" "4.666667e-01" "5.000000e-01"
[56] "6.666667e-01" "5.333333e-01" "6.333333e-01" "6.333333e-01" "5.333333e-01"
[61] "5.000000e-01" "4.666667e-01" "6.333333e-01" "4.666667e-01" "5.333333e-01"
[66] "5.666667e-01" "6.333333e-01" "7.000000e-01" "6.000000e-01" "5.666667e-01"
[71] "5.666667e-01" "6.333333e-01" "5.000000e-01" "6.000000e-01" "7.333333e-01"
[76] "6.333333e-01" "5.666667e-01" "5.333333e-01" "4.666667e-01" "7.333333e-01"
[81] "6.000000e-01" "5.000000e-01" "6.333333e-01" "5.333333e-01" "4.333333e-01"
[86] "4.333333e-01" "6.666667e-01" "5.666667e-01" "5.666667e-01" "6.666667e-01"
[91] "5.666667e-01" "6.000000e-01" "5.666667e-01" "5.666667e-01" "6.000000e-01"
[96] "7.333333e-01" "5.666667e-01" "7.333333e-01" "6.000000e-01" "5.000000e-01"
[101] "6.666667e-01" "7.000000e-01" "5.000000e-01" "6.333333e-01" "5.000000e-01"
[106] "5.000000e-01" "5.666667e-01" "5.666667e-01" "6.333333e-01" "6.000000e-01"
[111] "6.333333e-01" "6.333333e-01" "5.666667e-01" "5.666667e-01" "6.000000e-01"
[116] "6.333333e-01" "5.000000e-01" "6.333333e-01" "6.666667e-01" "5.000000e-01"
[121] "5.666667e-01" "5.000000e-01" "5.666667e-01" "5.666667e-01" "7.333333e-01"
[126] "5.666667e-01" "6.666667e-01" "6.666667e-01" "5.333333e-01" "6.333333e-01"
[131] "4.666667e-01" "5.666667e-01" "5.333333e-01" "5.000000e-01" "5.333333e-01"
[136] "5.333333e-01" "5.666667e-01" "7.333333e-01" "5.333333e-01" "6.000000e-01"
[141] "4.666667e-01" "5.000000e-01" "6.000000e-01" "5.000000e-01" "6.333333e-01"
[146] "7.000000e-01" "6.666667e-01" "6.000000e-01" "6.000000e-01" "5.000000e-01"
[151] "4.666667e-01" "6.000000e-01" "6.666667e-01" "6.666667e-01" "6.000000e-01"
[156] "5.000000e-01" "7.000000e-01" "4.666667e-01" "5.666667e-01" "6.666667e-01"
[161] "6.000000e-01" "5.666667e-01" "5.333333e-01" "6.333333e-01" "6.333333e-01"
[166] "6.000000e-01" "5.666667e-01" "5.666667e-01" "5.666667e-01" "6.666667e-01"
[171] "5.666667e-01" "6.000000e-01" "4.000000e-01" "5.333333e-01" "6.000000e-01"
[176] "5.333333e-01" "5.666667e-01" "5.333333e-01" "4.666667e-01" "6.333333e-01"
[181] "5.333333e-01" "5.666667e-01" "6.000000e-01" "5.333333e-01" "5.000000e-01"
[186] "7.000000e-01" "6.666667e-01" "6.000000e-01" "5.000000e-01" "6.666667e-01"
[191] "5.666667e-01" "6.000000e-01" "4.666667e-01" "6.666667e-01" "5.000000e-01"
[196] " NA" "6.666667e-01"
- Sum up the total grooming ratings for each coat type.
traits |>
count(coat_type, wt = grooming)
# A tibble: 10 × 2
coat_type n
<chr> <dbl>
1 Corded 14
2 Curly 21
3 Double 171
4 Hairless 4
5 Rough 8
6 Silky 30
7 Smooth 113
8 Wavy 15
9 Wiry 69
10 <NA> 2
- Add inline R code to the following sentence in R Markdown to say how many rows have
NA
for grooming:
We are missing grooming data for [insert inline R code] breeds.