Numbers

Author

Jeffrey R. Stevens

Published

March 6, 2023

For these exercises, we’ll use the dog breed traits and dog breed popularity rankings data sets.

  1. Load tidyverse and import dog_breed_traits_clean.csv to traits.
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
traits <- read_csv(here::here("data/dog_breed_traits_clean.csv"), show_col_types = FALSE)
  1. Create a column of per-row means over all rating columns called mean_ratings and assign to traits2.
traits2 <- traits |> 
  rowwise() |> 
  mutate(mean_ratings = mean(c(affectionate, children, other_dogs, shedding, grooming, na.rm = TRUE)))
  1. Convert mean_ratings to a proportion in a column called pmean_ratings and add to traits2.
traits2 <- traits2 |> 
  mutate(pmean_ratings = mean_ratings / 5)
  1. Apply a natural log transformation to the pmeans_ratings vector.
log(traits2$pmean_ratings)
  [1] -0.3101549 -0.4567584 -0.4054651 -0.3101549 -0.5679840 -0.4567584
  [7] -0.4567584 -0.6286087 -0.4054651 -0.5679840 -0.4567584 -0.5679840
 [13] -0.4054651 -0.5679840 -0.6286087 -0.3101549 -0.4054651 -0.4567584
 [19] -0.3566749 -0.3566749 -0.4567584 -0.2231436 -0.5679840 -0.3566749
 [25] -0.7621401 -0.5108256 -0.3101549 -0.5108256 -0.3566749 -0.3101549
 [31] -0.3101549 -0.5108256 -0.5108256 -0.9162907 -0.4054651 -0.4567584
 [37] -0.6931472 -0.5679840 -0.4567584 -0.4567584 -0.3566749 -0.4567584
 [43] -0.5679840 -0.4054651 -0.3101549 -0.6286087 -0.3101549 -0.7621401
 [49] -0.3566749 -0.5679840 -0.5108256 -0.4567584 -0.4567584 -0.7621401
 [55] -0.6931472 -0.4054651 -0.6286087 -0.4567584 -0.4567584 -0.6286087
 [61] -0.6931472 -0.7621401 -0.4567584 -0.7621401 -0.6286087 -0.5679840
 [67] -0.4567584 -0.3566749 -0.5108256 -0.5679840 -0.5679840 -0.4567584
 [73] -0.6931472 -0.5108256 -0.3101549 -0.4567584 -0.5679840 -0.6286087
 [79] -0.7621401 -0.3101549 -0.5108256 -0.6931472 -0.4567584 -0.6286087
 [85] -0.8362480 -0.8362480 -0.4054651 -0.5679840 -0.5679840 -0.4054651
 [91] -0.5679840 -0.5108256 -0.5679840 -0.5679840 -0.5108256 -0.3101549
 [97] -0.5679840 -0.3101549 -0.5108256 -0.6931472 -0.4054651 -0.3566749
[103] -0.6931472 -0.4567584 -0.6931472 -0.6931472 -0.5679840 -0.5679840
[109] -0.4567584 -0.5108256 -0.4567584 -0.4567584 -0.5679840 -0.5679840
[115] -0.5108256 -0.4567584 -0.6931472 -0.4567584 -0.4054651 -0.6931472
[121] -0.5679840 -0.6931472 -0.5679840 -0.5679840 -0.3101549 -0.5679840
[127] -0.4054651 -0.4054651 -0.6286087 -0.4567584 -0.7621401 -0.5679840
[133] -0.6286087 -0.6931472 -0.6286087 -0.6286087 -0.5679840 -0.3101549
[139] -0.6286087 -0.5108256 -0.7621401 -0.6931472 -0.5108256 -0.6931472
[145] -0.4567584 -0.3566749 -0.4054651 -0.5108256 -0.5108256 -0.6931472
[151] -0.7621401 -0.5108256 -0.4054651 -0.4054651 -0.5108256 -0.6931472
[157] -0.3566749 -0.7621401 -0.5679840 -0.4054651 -0.5108256 -0.5679840
[163] -0.6286087 -0.4567584 -0.4567584 -0.5108256 -0.5679840 -0.5679840
[169] -0.5679840 -0.4054651 -0.5679840 -0.5108256 -0.9162907 -0.6286087
[175] -0.5108256 -0.6286087 -0.5679840 -0.6286087 -0.7621401 -0.4567584
[181] -0.6286087 -0.5679840 -0.5108256 -0.6286087 -0.6931472 -0.3566749
[187] -0.4054651 -0.5108256 -0.6931472 -0.4054651 -0.5679840 -0.5108256
[193] -0.7621401 -0.4054651 -0.6931472         NA -0.4054651
  1. Round pmean_ratings to two decimal places.
round(traits2$pmean_ratings, digits = 2)
  [1] 0.73 0.63 0.67 0.73 0.57 0.63 0.63 0.53 0.67 0.57 0.63 0.57 0.67 0.57 0.53
 [16] 0.73 0.67 0.63 0.70 0.70 0.63 0.80 0.57 0.70 0.47 0.60 0.73 0.60 0.70 0.73
 [31] 0.73 0.60 0.60 0.40 0.67 0.63 0.50 0.57 0.63 0.63 0.70 0.63 0.57 0.67 0.73
 [46] 0.53 0.73 0.47 0.70 0.57 0.60 0.63 0.63 0.47 0.50 0.67 0.53 0.63 0.63 0.53
 [61] 0.50 0.47 0.63 0.47 0.53 0.57 0.63 0.70 0.60 0.57 0.57 0.63 0.50 0.60 0.73
 [76] 0.63 0.57 0.53 0.47 0.73 0.60 0.50 0.63 0.53 0.43 0.43 0.67 0.57 0.57 0.67
 [91] 0.57 0.60 0.57 0.57 0.60 0.73 0.57 0.73 0.60 0.50 0.67 0.70 0.50 0.63 0.50
[106] 0.50 0.57 0.57 0.63 0.60 0.63 0.63 0.57 0.57 0.60 0.63 0.50 0.63 0.67 0.50
[121] 0.57 0.50 0.57 0.57 0.73 0.57 0.67 0.67 0.53 0.63 0.47 0.57 0.53 0.50 0.53
[136] 0.53 0.57 0.73 0.53 0.60 0.47 0.50 0.60 0.50 0.63 0.70 0.67 0.60 0.60 0.50
[151] 0.47 0.60 0.67 0.67 0.60 0.50 0.70 0.47 0.57 0.67 0.60 0.57 0.53 0.63 0.63
[166] 0.60 0.57 0.57 0.57 0.67 0.57 0.60 0.40 0.53 0.60 0.53 0.57 0.53 0.47 0.63
[181] 0.53 0.57 0.60 0.53 0.50 0.70 0.67 0.60 0.50 0.67 0.57 0.60 0.47 0.67 0.50
[196]   NA 0.67
  1. Convert pmean_ratings to scientific notation.
format(traits2$pmean_ratings, scientific = TRUE)
  [1] "7.333333e-01" "6.333333e-01" "6.666667e-01" "7.333333e-01" "5.666667e-01"
  [6] "6.333333e-01" "6.333333e-01" "5.333333e-01" "6.666667e-01" "5.666667e-01"
 [11] "6.333333e-01" "5.666667e-01" "6.666667e-01" "5.666667e-01" "5.333333e-01"
 [16] "7.333333e-01" "6.666667e-01" "6.333333e-01" "7.000000e-01" "7.000000e-01"
 [21] "6.333333e-01" "8.000000e-01" "5.666667e-01" "7.000000e-01" "4.666667e-01"
 [26] "6.000000e-01" "7.333333e-01" "6.000000e-01" "7.000000e-01" "7.333333e-01"
 [31] "7.333333e-01" "6.000000e-01" "6.000000e-01" "4.000000e-01" "6.666667e-01"
 [36] "6.333333e-01" "5.000000e-01" "5.666667e-01" "6.333333e-01" "6.333333e-01"
 [41] "7.000000e-01" "6.333333e-01" "5.666667e-01" "6.666667e-01" "7.333333e-01"
 [46] "5.333333e-01" "7.333333e-01" "4.666667e-01" "7.000000e-01" "5.666667e-01"
 [51] "6.000000e-01" "6.333333e-01" "6.333333e-01" "4.666667e-01" "5.000000e-01"
 [56] "6.666667e-01" "5.333333e-01" "6.333333e-01" "6.333333e-01" "5.333333e-01"
 [61] "5.000000e-01" "4.666667e-01" "6.333333e-01" "4.666667e-01" "5.333333e-01"
 [66] "5.666667e-01" "6.333333e-01" "7.000000e-01" "6.000000e-01" "5.666667e-01"
 [71] "5.666667e-01" "6.333333e-01" "5.000000e-01" "6.000000e-01" "7.333333e-01"
 [76] "6.333333e-01" "5.666667e-01" "5.333333e-01" "4.666667e-01" "7.333333e-01"
 [81] "6.000000e-01" "5.000000e-01" "6.333333e-01" "5.333333e-01" "4.333333e-01"
 [86] "4.333333e-01" "6.666667e-01" "5.666667e-01" "5.666667e-01" "6.666667e-01"
 [91] "5.666667e-01" "6.000000e-01" "5.666667e-01" "5.666667e-01" "6.000000e-01"
 [96] "7.333333e-01" "5.666667e-01" "7.333333e-01" "6.000000e-01" "5.000000e-01"
[101] "6.666667e-01" "7.000000e-01" "5.000000e-01" "6.333333e-01" "5.000000e-01"
[106] "5.000000e-01" "5.666667e-01" "5.666667e-01" "6.333333e-01" "6.000000e-01"
[111] "6.333333e-01" "6.333333e-01" "5.666667e-01" "5.666667e-01" "6.000000e-01"
[116] "6.333333e-01" "5.000000e-01" "6.333333e-01" "6.666667e-01" "5.000000e-01"
[121] "5.666667e-01" "5.000000e-01" "5.666667e-01" "5.666667e-01" "7.333333e-01"
[126] "5.666667e-01" "6.666667e-01" "6.666667e-01" "5.333333e-01" "6.333333e-01"
[131] "4.666667e-01" "5.666667e-01" "5.333333e-01" "5.000000e-01" "5.333333e-01"
[136] "5.333333e-01" "5.666667e-01" "7.333333e-01" "5.333333e-01" "6.000000e-01"
[141] "4.666667e-01" "5.000000e-01" "6.000000e-01" "5.000000e-01" "6.333333e-01"
[146] "7.000000e-01" "6.666667e-01" "6.000000e-01" "6.000000e-01" "5.000000e-01"
[151] "4.666667e-01" "6.000000e-01" "6.666667e-01" "6.666667e-01" "6.000000e-01"
[156] "5.000000e-01" "7.000000e-01" "4.666667e-01" "5.666667e-01" "6.666667e-01"
[161] "6.000000e-01" "5.666667e-01" "5.333333e-01" "6.333333e-01" "6.333333e-01"
[166] "6.000000e-01" "5.666667e-01" "5.666667e-01" "5.666667e-01" "6.666667e-01"
[171] "5.666667e-01" "6.000000e-01" "4.000000e-01" "5.333333e-01" "6.000000e-01"
[176] "5.333333e-01" "5.666667e-01" "5.333333e-01" "4.666667e-01" "6.333333e-01"
[181] "5.333333e-01" "5.666667e-01" "6.000000e-01" "5.333333e-01" "5.000000e-01"
[186] "7.000000e-01" "6.666667e-01" "6.000000e-01" "5.000000e-01" "6.666667e-01"
[191] "5.666667e-01" "6.000000e-01" "4.666667e-01" "6.666667e-01" "5.000000e-01"
[196] "          NA" "6.666667e-01"
  1. Sum up the total grooming ratings for each coat type.
traits |> 
  count(coat_type, wt = grooming)
# A tibble: 10 × 2
   coat_type     n
   <chr>     <dbl>
 1 Corded       14
 2 Curly        21
 3 Double      171
 4 Hairless      4
 5 Rough         8
 6 Silky        30
 7 Smooth      113
 8 Wavy         15
 9 Wiry         69
10 <NA>          2
  1. Add inline R code to the following sentence in R Markdown to say how many rows have NA for grooming:

We are missing grooming data for [insert inline R code] breeds.