Jeff Stevens

2023-03-27

# Introduction

## Set-up

library(tidyverse)
library(palmerpenguins)

# Creating functions

## Creating functions

functionname <- function(argument1, argument2) {
# Function contents
}

## Creating functions

mymean <- function(x) {
sum(x) / length(x)
}
mymean(mtcars$mpg)  20.09062 mean(mtcars$mpg)
 20.09062

## Multi-line functions

mymean2 <- function(x) {
mysum <- sum(x)
mysum_divided <- mysum / length(x)
}
mymean2(mtcars$mpg) Why no output? (mymean_obj <- mymean2(mtcars$mpg))
 20.09062

## Returning output

Don’t assign last step to object

mymean3 <- function(x) {
mysum <- sum(x)
mysum / length(x)
}

mymean3(mtcars$mpg)  20.09062 ## Returning output Or use return() mymean4 <- function(x) { mysum <- sum(x) mysum_divided <- mysum / length(x) return(mysum_divided) } mymean4(mtcars$mpg)
 20.09062

## Return multiple output values

list()

mymean5 <- function(x) {
mysum <- sum(x)
mysum_divided <- mysum / length(x)
list(sum = mysum, mean = mysum_divided)
}

mymean5(mtcars$mpg) $sum
 642.9

$mean  20.09062 ## Save intermediate objects mymean6 <- function(x) { mysum <<- sum(x) mysum / length(x) } mymean6(mtcars$mpg)
 20.09062

Note

This is fine when testing out a function but probably not great practice ‘in production’. Why?

# Arguments

## Arguments

multiplier <- function(x, constant) {
x * constant
}
multiplier(x = 7, constant = 3)
 21
multiplier(x = 1:10, constant = 3)
   3  6  9 12 15 18 21 24 27 30
multiplier(x = 1:10)
Error in multiplier(x = 1:10) :
argument "constant" is missing, with no default

## Argument default values

multiplier2 <- function(x, constant = 3) {
x * constant
}
multiplier2(x = 1:10)
   3  6  9 12 15 18 21 24 27 30
multiplier2(x = 1:10, constant = 5)
   5 10 15 20 25 30 35 40 45 50

## Let’s write a function!

Here’s the formula that reverse codes scale values

Valuemax+ Valuemin-Score

Write a function that calculates the reversed code score. What arguments do you need?

# Conditional execution

## Conditional execution

head(penguins[, 1:5])
# A tibble: 6 × 5
species island    bill_length_mm bill_depth_mm flipper_length_mm
<fct>   <fct>              <dbl>         <dbl>             <int>
1 Adelie  Torgersen           39.1          18.7               181
2 Adelie  Torgersen           39.5          17.4               186
3 Adelie  Torgersen           40.3          18                 195
4 Adelie  Torgersen           NA            NA                  NA
5 Adelie  Torgersen           36.7          19.3               193
6 Adelie  Torgersen           39.3          20.6               190
mymean6(penguins$bill_length_mm)  NA ## Conditional execution head(penguins[, 1:5]) # A tibble: 6 × 5 species island bill_length_mm bill_depth_mm flipper_length_mm <fct> <fct> <dbl> <dbl> <int> 1 Adelie Torgersen 39.1 18.7 181 2 Adelie Torgersen 39.5 17.4 186 3 Adelie Torgersen 40.3 18 195 4 Adelie Torgersen NA NA NA 5 Adelie Torgersen 36.7 19.3 193 6 Adelie Torgersen 39.3 20.6 190 mymean10 <- function(x) { sum(x, na.rm = TRUE) / sum(!is.na(x)) } mymean10(penguins$bill_length_mm)
 43.92193

## Conditional execution

But if you want the user to control whether NA is ignored

mymean11 <- function(x, ignore_na = TRUE) {
if (ignore_na) {
sum(x, na.rm = TRUE) / sum(!is.na(x))
} else {
sum(x) / length(x)
}
}
mymean11(penguins$bill_length_mm)  43.92193 mymean11(penguins$bill_length_mm, ignore_na = FALSE)
 NA

## Multiple conditions

Use else if

age_cutoffs <- function(x) {
if(x <= 1.5) {
"puppy"
} else if (x <= 3) {
} else if (x <= 10) {
} else {
"senior"
}
}
age_cutoffs(1)
 "puppy"
age_cutoffs(2)
 "adolescent"
age_cutoffs(5)
 "adult"

## Stopping based on conditionals

age_cutoffs2 <- function(x) {
if(x <= 1.5) {
"puppy"
} else if (x <= 3) {
} else if (x <= 10) {
} else if (x <= 20) {
"senior"
} else {
stop("Age exceeded 20.")
}
}
age_cutoffs2(15)
 "senior"
age_cutoffs2(22)
Error in age_cutoffs2(22) : Age exceeded 20.

## Multiple conditions

Use switch()

central_tend <- function(x, type) {
switch(type,
mean = mean(x),
median = median(x),
trimmed = mean(x, trim = .1))
}
vector <- rcauchy(100)
central_tend(x = vector, type = "mean")
 3.548399
central_tend(x = vector, type = "median")
 0.04851638
central_tend(x = vector, type = "trimmed")
 0.3435384

# Data frame functions

## Creating functions in tidyverse

mean_species_bill <- function(df) {
df |>
group_by(species) |>
summarise(mean(bill_length_mm, na.rm = TRUE))
}

mean_species_bill(penguins)
# A tibble: 3 × 2
species   mean(bill_length_mm, na.rm = TRUE)
<fct>                                    <dbl>
2 Chinstrap                                 48.8
3 Gentoo                                    47.5

## Creating functions in tidyverse

What if we want user to input grouping and response variable?

grouped_mean <- function(df, group_var, mean_var) {
df |>
group_by(group_var) |>
summarise(mean(mean_var, na.rm = TRUE))
}
penguins |>
grouped_mean(group_var = species, mean_var = bill_length_mm)
Error in group_by():
! Must group by variables found in .data.
✖ Column group_var is not found.
Run rlang::last_trace() to see where the error occurred.

## Embracing

Embrace variables in {{ }}

grouped_mean2 <- function(df, group_var, mean_var) {
df |>
group_by({{ group_var }}) |>
summarize(mean({{ mean_var }}, na.rm = TRUE))
}
penguins |>
grouped_mean2(group_var = species, mean_var = bill_length_mm)
# A tibble: 3 × 2
species   mean(bill_length_mm, na.rm = TRUE)
<fct>                                    <dbl>
3 Gentoo                                    47.5