Dates and times

Jeff Stevens

2025-03-14

Introduction

The problem

What’s different between these data sets?

data1
# A tibble: 12 × 2
   test_date  birth_date  
   <date>     <chr>       
 1 2023-01-02 Jul 14, 1997
 2 2023-01-02 Jan 28, 1998
 3 2023-01-05 Jul 23, 1967
 4 2023-01-05 Aug 26, 1987
 5 2023-01-08 Nov 09, 1950
 6 2023-01-14 Aug 24, 2001
 7 2023-01-16 Sep 23, 1979
 8 2023-01-23 Mar 22, 1970
 9 2023-01-26 Apr 21, 1957
10 2023-01-27 Mar 07, 1989
11 2023-01-27 Nov 03, 1983
12 2023-01-28 Jan 31, 1989
data2
# A tibble: 9 × 4
  test_date  birth_date age_at_testing birth_month
  <date>     <date>     <drtn>         <ord>      
1 2023-01-05 1967-07-23 20255 days     July       
2 2023-01-05 1987-08-26 12916 days     August     
3 2023-01-08 1950-11-09 26358 days     November   
4 2023-01-16 1979-09-23 15821 days     September  
5 2023-01-23 1970-03-22 19300 days     March      
6 2023-01-26 1957-04-21 24021 days     April      
7 2023-01-27 1989-03-07 12379 days     March      
8 2023-01-27 1983-11-03 14330 days     November   
9 2023-01-28 1989-01-31 12415 days     January    

Set-up

Dates and times

Reminder

Dates and times are augmented doubles

(x <- as.Date("2023-03-22"))
[1] "2023-03-22"
[1] "Date"
[1] "double"

Standard (ISO-8601) way to represent dates and times is

YYYY-MM-DD HH:MM:SS, so 2025-03-14 15:30:00

Dates and times with {lubridate}

Current date/time

[1] "2025-04-25"
now()  # {lubridate}
[1] "2025-04-25 10:22:24 CDT"

Creating dates/times

as_date("2025-03-14") 
[1] "2025-03-14"
ymd("2025-03-14")
[1] "2025-03-14"
ymd(20250314)
[1] "2025-03-14"

Convert dates to ISO-8601

mdy("January 31st, 2017")
[1] "2017-01-31"
mdy("Jan 31 17")
[1] "2017-01-31"
dmy("31-Jan-2017")
[1] "2017-01-31"

Convert dates to ISO-8601

(r_class_schedule <- tibble(meeting = 1:4, 
                            date = c("22 Jan 2025", "24 Jan 2025", "27 Jan 2025", "29 Jan 2025"), 
                            topic = c("Course introduction", "Working in RStudio", "Coding basics", "Workflows")))
# A tibble: 4 × 3
  meeting date        topic              
    <int> <chr>       <chr>              
1       1 22 Jan 2025 Course introduction
2       2 24 Jan 2025 Working in RStudio 
3       3 27 Jan 2025 Coding basics      
4       4 29 Jan 2025 Workflows          
(r_class_schedule <- r_class_schedule |>
   mutate(iso_date = dmy(date)))
# A tibble: 4 × 4
  meeting date        topic               iso_date  
    <int> <chr>       <chr>               <date>    
1       1 22 Jan 2025 Course introduction 2025-01-22
2       2 24 Jan 2025 Working in RStudio  2025-01-24
3       3 27 Jan 2025 Coding basics       2025-01-27
4       4 29 Jan 2025 Workflows           2025-01-29

Date/time components

Extract date/time elements

First, let’s extract a random sample of departure times

set.seed(20250314)
(datetime <- flights |>
   drop_na(dep_time) |> 
   slice_sample(n = 20) |>
   mutate(departure = make_datetime(year, month, day, hour, minute)) |> 
   pull(departure))
 [1] "2013-10-03 07:20:00 UTC" "2013-09-01 14:05:00 UTC"
 [3] "2013-01-27 09:59:00 UTC" "2013-12-21 14:45:00 UTC"
 [5] "2013-02-20 09:11:00 UTC" "2013-01-09 16:43:00 UTC"
 [7] "2013-09-23 16:00:00 UTC" "2013-12-03 14:29:00 UTC"
 [9] "2013-04-16 07:05:00 UTC" "2013-04-30 07:30:00 UTC"
[11] "2013-11-29 13:45:00 UTC" "2013-12-23 07:55:00 UTC"
[13] "2013-09-21 06:30:00 UTC" "2013-03-19 17:55:00 UTC"
[15] "2013-06-10 08:50:00 UTC" "2013-04-30 13:45:00 UTC"
[17] "2013-04-20 16:52:00 UTC" "2013-04-18 06:05:00 UTC"
[19] "2013-07-29 14:48:00 UTC" "2013-08-10 19:16:00 UTC"

Extract date/time elements

Now let’s extract components

year(datetime)
 [1] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013
[16] 2013 2013 2013 2013 2013
month(datetime)
 [1] 10  9  1 12  2  1  9 12  4  4 11 12  9  3  6  4  4  4  7  8
month(datetime, label = TRUE)
 [1] Oct Sep Jan Dec Feb Jan Sep Dec Apr Apr Nov Dec Sep Mar Jun Apr Apr Apr Jul
[20] Aug
12 Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < ... < Dec
month(datetime, label = TRUE, abbr = FALSE)
 [1] October   September January   December  February  January   September
 [8] December  April     April     November  December  September March    
[15] June      April     April     April     July      August   
12 Levels: January < February < March < April < May < June < ... < December

Extract date/time elements

Now let’s extract components

mday(datetime)
 [1]  3  1 27 21 20  9 23  3 16 30 29 23 21 19 10 30 20 18 29 10
wday(datetime)
 [1] 5 1 1 7 4 4 2 3 3 3 6 2 7 3 2 3 7 5 2 7
wday(datetime, label = TRUE, abbr = FALSE)
 [1] Thursday  Sunday    Sunday    Saturday  Wednesday Wednesday Monday   
 [8] Tuesday   Tuesday   Tuesday   Friday    Monday    Saturday  Tuesday  
[15] Monday    Tuesday   Saturday  Thursday  Monday    Saturday 
7 Levels: Sunday < Monday < Tuesday < Wednesday < Thursday < ... < Saturday

Extract date/time elements

Now let’s extract components

hour(datetime)
 [1]  7 14  9 14  9 16 16 14  7  7 13  7  6 17  8 13 16  6 14 19
minute(datetime)
 [1] 20  5 59 45 11 43  0 29  5 30 45 55 30 55 50 45 52  5 48 16
second(datetime)
 [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Time spans

Time spans

Find or create durations

r_class_schedule
# A tibble: 4 × 4
  meeting date        topic               iso_date  
    <int> <chr>       <chr>               <date>    
1       1 22 Jan 2025 Course introduction 2025-01-22
2       2 24 Jan 2025 Working in RStudio  2025-01-24
3       3 27 Jan 2025 Coding basics       2025-01-27
4       4 29 Jan 2025 Workflows           2025-01-29
r_class_schedule$iso_date[2] - r_class_schedule$iso_date[1]
Time difference of 2 days
today() - ymd(r_class_schedule$iso_date[1]) # days since class start
Time difference of 93 days
r_class_schedule$iso_date[1] + 7 * 7 # 7 weeks after class start
[1] "2025-03-12"

Filter dates

(oldsched <- filter(r_class_schedule, iso_date < "2025-01-26") |>
  mutate(week_later = iso_date + 7,
         days_since = today() - iso_date))
# A tibble: 2 × 6
  meeting date        topic               iso_date   week_later days_since
    <int> <chr>       <chr>               <date>     <date>     <drtn>    
1       1 22 Jan 2025 Course introduction 2025-01-22 2025-01-29 93 days   
2       2 24 Jan 2025 Working in RStudio  2025-01-24 2025-01-31 91 days   

Solving the problem

library(tidyverse)
nrows <- 12
set.seed(12)
data1 <- tibble(test_date = 
                  as_date(sample(as_date("2023-01-01"):as_date("2023-01-31"), 
                                 nrows, replace = TRUE)),
                birth_date = 
                  format(as_date(sample(as_date("1950-01-01"):as_date("2005-01-31"),
                                        nrows, replace = TRUE)),
                         format = c("%b %d, %Y"))) |> 
  arrange(test_date)

Solving the problem

What code generates data2 from data1?

data1
# A tibble: 12 × 2
   test_date  birth_date  
   <date>     <chr>       
 1 2023-01-02 Jul 14, 1997
 2 2023-01-02 Jan 28, 1998
 3 2023-01-05 Jul 23, 1967
 4 2023-01-05 Aug 26, 1987
 5 2023-01-08 Nov 09, 1950
 6 2023-01-14 Aug 24, 2001
 7 2023-01-16 Sep 23, 1979
 8 2023-01-23 Mar 22, 1970
 9 2023-01-26 Apr 21, 1957
10 2023-01-27 Mar 07, 1989
11 2023-01-27 Nov 03, 1983
12 2023-01-28 Jan 31, 1989
data2
# A tibble: 9 × 4
  test_date  birth_date age_at_testing birth_month
  <date>     <date>     <drtn>         <ord>      
1 2023-01-05 1967-07-23 20255 days     July       
2 2023-01-05 1987-08-26 12916 days     August     
3 2023-01-08 1950-11-09 26358 days     November   
4 2023-01-16 1979-09-23 15821 days     September  
5 2023-01-23 1970-03-22 19300 days     March      
6 2023-01-26 1957-04-21 24021 days     April      
7 2023-01-27 1989-03-07 12379 days     March      
8 2023-01-27 1983-11-03 14330 days     November   
9 2023-01-28 1989-01-31 12415 days     January    

Let’s code!

Dates and times