

Jeffrey R. Stevens


March 6, 2023

For these exercises, we’ll use the dog breed traits and dog breed popularity rankings data sets.

  1. Load tidyverse and import dog_breed_traits_clean.csv to traits.
traits <- read_csv(here::here("data/dog_breed_traits_clean.csv"), show_col_types = FALSE)
  1. Create a column of per-row means over all rating columns called mean_ratings and assign to traits2.
traits2 <- traits |> 
  rowwise() |> 
  mutate(mean_ratings = mean(c(affectionate, children, other_dogs, shedding, grooming, na.rm = TRUE)))
  1. Convert mean_ratings to a proportion in a column called pmean_ratings and add to traits2.
traits2 <- traits2 |> 
  mutate(pmean_ratings = mean_ratings / 5)
  1. Apply a natural log transformation to the pmeans_ratings vector.
  [1] -0.3101549 -0.4567584 -0.4054651 -0.3101549 -0.5679840 -0.4567584
  [7] -0.4567584 -0.6286087 -0.4054651 -0.5679840 -0.4567584 -0.5679840
 [13] -0.4054651 -0.5679840 -0.6286087 -0.3101549 -0.4054651 -0.4567584
 [19] -0.3566749 -0.3566749 -0.4567584 -0.2231436 -0.5679840 -0.3566749
 [25] -0.7621401 -0.5108256 -0.3101549 -0.5108256 -0.3566749 -0.3101549
 [31] -0.3101549 -0.5108256 -0.5108256 -0.9162907 -0.4054651 -0.4567584
 [37] -0.6931472 -0.5679840 -0.4567584 -0.4567584 -0.3566749 -0.4567584
 [43] -0.5679840 -0.4054651 -0.3101549 -0.6286087 -0.3101549 -0.7621401
 [49] -0.3566749 -0.5679840 -0.5108256 -0.4567584 -0.4567584 -0.7621401
 [55] -0.6931472 -0.4054651 -0.6286087 -0.4567584 -0.4567584 -0.6286087
 [61] -0.6931472 -0.7621401 -0.4567584 -0.7621401 -0.6286087 -0.5679840
 [67] -0.4567584 -0.3566749 -0.5108256 -0.5679840 -0.5679840 -0.4567584
 [73] -0.6931472 -0.5108256 -0.3101549 -0.4567584 -0.5679840 -0.6286087
 [79] -0.7621401 -0.3101549 -0.5108256 -0.6931472 -0.4567584 -0.6286087
 [85] -0.8362480 -0.8362480 -0.4054651 -0.5679840 -0.5679840 -0.4054651
 [91] -0.5679840 -0.5108256 -0.5679840 -0.5679840 -0.5108256 -0.3101549
 [97] -0.5679840 -0.3101549 -0.5108256 -0.6931472 -0.4054651 -0.3566749
[103] -0.6931472 -0.4567584 -0.6931472 -0.6931472 -0.5679840 -0.5679840
[109] -0.4567584 -0.5108256 -0.4567584 -0.4567584 -0.5679840 -0.5679840
[115] -0.5108256 -0.4567584 -0.6931472 -0.4567584 -0.4054651 -0.6931472
[121] -0.5679840 -0.6931472 -0.5679840 -0.5679840 -0.3101549 -0.5679840
[127] -0.4054651 -0.4054651 -0.6286087 -0.4567584 -0.7621401 -0.5679840
[133] -0.6286087 -0.6931472 -0.6286087 -0.6286087 -0.5679840 -0.3101549
[139] -0.6286087 -0.5108256 -0.7621401 -0.6931472 -0.5108256 -0.6931472
[145] -0.4567584 -0.3566749 -0.4054651 -0.5108256 -0.5108256 -0.6931472
[151] -0.7621401 -0.5108256 -0.4054651 -0.4054651 -0.5108256 -0.6931472
[157] -0.3566749 -0.7621401 -0.5679840 -0.4054651 -0.5108256 -0.5679840
[163] -0.6286087 -0.4567584 -0.4567584 -0.5108256 -0.5679840 -0.5679840
[169] -0.5679840 -0.4054651 -0.5679840 -0.5108256 -0.9162907 -0.6286087
[175] -0.5108256 -0.6286087 -0.5679840 -0.6286087 -0.7621401 -0.4567584
[181] -0.6286087 -0.5679840 -0.5108256 -0.6286087 -0.6931472 -0.3566749
[187] -0.4054651 -0.5108256 -0.6931472 -0.4054651 -0.5679840 -0.5108256
[193] -0.7621401 -0.4054651 -0.6931472         NA -0.4054651
  1. Round pmean_ratings to two decimal places.
round(traits2$pmean_ratings, digits = 2)
  [1] 0.73 0.63 0.67 0.73 0.57 0.63 0.63 0.53 0.67 0.57 0.63 0.57 0.67 0.57 0.53
 [16] 0.73 0.67 0.63 0.70 0.70 0.63 0.80 0.57 0.70 0.47 0.60 0.73 0.60 0.70 0.73
 [31] 0.73 0.60 0.60 0.40 0.67 0.63 0.50 0.57 0.63 0.63 0.70 0.63 0.57 0.67 0.73
 [46] 0.53 0.73 0.47 0.70 0.57 0.60 0.63 0.63 0.47 0.50 0.67 0.53 0.63 0.63 0.53
 [61] 0.50 0.47 0.63 0.47 0.53 0.57 0.63 0.70 0.60 0.57 0.57 0.63 0.50 0.60 0.73
 [76] 0.63 0.57 0.53 0.47 0.73 0.60 0.50 0.63 0.53 0.43 0.43 0.67 0.57 0.57 0.67
 [91] 0.57 0.60 0.57 0.57 0.60 0.73 0.57 0.73 0.60 0.50 0.67 0.70 0.50 0.63 0.50
[106] 0.50 0.57 0.57 0.63 0.60 0.63 0.63 0.57 0.57 0.60 0.63 0.50 0.63 0.67 0.50
[121] 0.57 0.50 0.57 0.57 0.73 0.57 0.67 0.67 0.53 0.63 0.47 0.57 0.53 0.50 0.53
[136] 0.53 0.57 0.73 0.53 0.60 0.47 0.50 0.60 0.50 0.63 0.70 0.67 0.60 0.60 0.50
[151] 0.47 0.60 0.67 0.67 0.60 0.50 0.70 0.47 0.57 0.67 0.60 0.57 0.53 0.63 0.63
[166] 0.60 0.57 0.57 0.57 0.67 0.57 0.60 0.40 0.53 0.60 0.53 0.57 0.53 0.47 0.63
[181] 0.53 0.57 0.60 0.53 0.50 0.70 0.67 0.60 0.50 0.67 0.57 0.60 0.47 0.67 0.50
[196]   NA 0.67
  1. Convert pmean_ratings to scientific notation.
format(traits2$pmean_ratings, scientific = TRUE)
  [1] "7.333333e-01" "6.333333e-01" "6.666667e-01" "7.333333e-01" "5.666667e-01"
  [6] "6.333333e-01" "6.333333e-01" "5.333333e-01" "6.666667e-01" "5.666667e-01"
 [11] "6.333333e-01" "5.666667e-01" "6.666667e-01" "5.666667e-01" "5.333333e-01"
 [16] "7.333333e-01" "6.666667e-01" "6.333333e-01" "7.000000e-01" "7.000000e-01"
 [21] "6.333333e-01" "8.000000e-01" "5.666667e-01" "7.000000e-01" "4.666667e-01"
 [26] "6.000000e-01" "7.333333e-01" "6.000000e-01" "7.000000e-01" "7.333333e-01"
 [31] "7.333333e-01" "6.000000e-01" "6.000000e-01" "4.000000e-01" "6.666667e-01"
 [36] "6.333333e-01" "5.000000e-01" "5.666667e-01" "6.333333e-01" "6.333333e-01"
 [41] "7.000000e-01" "6.333333e-01" "5.666667e-01" "6.666667e-01" "7.333333e-01"
 [46] "5.333333e-01" "7.333333e-01" "4.666667e-01" "7.000000e-01" "5.666667e-01"
 [51] "6.000000e-01" "6.333333e-01" "6.333333e-01" "4.666667e-01" "5.000000e-01"
 [56] "6.666667e-01" "5.333333e-01" "6.333333e-01" "6.333333e-01" "5.333333e-01"
 [61] "5.000000e-01" "4.666667e-01" "6.333333e-01" "4.666667e-01" "5.333333e-01"
 [66] "5.666667e-01" "6.333333e-01" "7.000000e-01" "6.000000e-01" "5.666667e-01"
 [71] "5.666667e-01" "6.333333e-01" "5.000000e-01" "6.000000e-01" "7.333333e-01"
 [76] "6.333333e-01" "5.666667e-01" "5.333333e-01" "4.666667e-01" "7.333333e-01"
 [81] "6.000000e-01" "5.000000e-01" "6.333333e-01" "5.333333e-01" "4.333333e-01"
 [86] "4.333333e-01" "6.666667e-01" "5.666667e-01" "5.666667e-01" "6.666667e-01"
 [91] "5.666667e-01" "6.000000e-01" "5.666667e-01" "5.666667e-01" "6.000000e-01"
 [96] "7.333333e-01" "5.666667e-01" "7.333333e-01" "6.000000e-01" "5.000000e-01"
[101] "6.666667e-01" "7.000000e-01" "5.000000e-01" "6.333333e-01" "5.000000e-01"
[106] "5.000000e-01" "5.666667e-01" "5.666667e-01" "6.333333e-01" "6.000000e-01"
[111] "6.333333e-01" "6.333333e-01" "5.666667e-01" "5.666667e-01" "6.000000e-01"
[116] "6.333333e-01" "5.000000e-01" "6.333333e-01" "6.666667e-01" "5.000000e-01"
[121] "5.666667e-01" "5.000000e-01" "5.666667e-01" "5.666667e-01" "7.333333e-01"
[126] "5.666667e-01" "6.666667e-01" "6.666667e-01" "5.333333e-01" "6.333333e-01"
[131] "4.666667e-01" "5.666667e-01" "5.333333e-01" "5.000000e-01" "5.333333e-01"
[136] "5.333333e-01" "5.666667e-01" "7.333333e-01" "5.333333e-01" "6.000000e-01"
[141] "4.666667e-01" "5.000000e-01" "6.000000e-01" "5.000000e-01" "6.333333e-01"
[146] "7.000000e-01" "6.666667e-01" "6.000000e-01" "6.000000e-01" "5.000000e-01"
[151] "4.666667e-01" "6.000000e-01" "6.666667e-01" "6.666667e-01" "6.000000e-01"
[156] "5.000000e-01" "7.000000e-01" "4.666667e-01" "5.666667e-01" "6.666667e-01"
[161] "6.000000e-01" "5.666667e-01" "5.333333e-01" "6.333333e-01" "6.333333e-01"
[166] "6.000000e-01" "5.666667e-01" "5.666667e-01" "5.666667e-01" "6.666667e-01"
[171] "5.666667e-01" "6.000000e-01" "4.000000e-01" "5.333333e-01" "6.000000e-01"
[176] "5.333333e-01" "5.666667e-01" "5.333333e-01" "4.666667e-01" "6.333333e-01"
[181] "5.333333e-01" "5.666667e-01" "6.000000e-01" "5.333333e-01" "5.000000e-01"
[186] "7.000000e-01" "6.666667e-01" "6.000000e-01" "5.000000e-01" "6.666667e-01"
[191] "5.666667e-01" "6.000000e-01" "4.666667e-01" "6.666667e-01" "5.000000e-01"
[196] "          NA" "6.666667e-01"
  1. Sum up the total grooming ratings for each coat type.
traits |> 
  count(coat_type, wt = grooming)
# A tibble: 10 × 2
   coat_type     n
   <chr>     <dbl>
 1 Corded       14
 2 Curly        21
 3 Double      171
 4 Hairless      4
 5 Rough         8
 6 Silky        30
 7 Smooth      113
 8 Wavy         15
 9 Wiry         69
10 <NA>          2
  1. Add inline R code to the following sentence in R Markdown to say how many rows have NA for grooming:

We are missing grooming data for [insert inline R code] breeds.