Modified

May 12, 2026

About

This page documents and implements the data processing workflow for bootcamp electronic sign-ins via the Qualtrics link: https://pennstate.qualtrics.com/jfe/form/SV_5vdpIekRnWpfsBo

Setup

We load some packages into memory for convenience.

Code
suppressPackageStartupMessages(library('ggplot2'))
suppressPackageStartupMessages(library('tidyverse'))

Gather

The registrations and presenters data are extracted as part of the registrations workflow and saved as CSVs. We’ll import those.

The sign-ins tab contains the sign-ins data and must be retrieved from Google Forms.

Code
x <- assertthat::is.string(params$csv_dir)
if (!dir.exists(params$csv_dir)) {
  message("Creating missing `include/csv/`.")
  dir.create(params$csv_dir)
}
x <- assertthat::is.writeable(params$csv_dir)

x <- assertthat::is.string(params$psu_id)
googlesheets4::gs4_auth(email = params$psu_id)

# Sign-ins
x <- assertthat::is.string(params$sheets_id)
signins <- googlesheets4::read_sheet(params$reg_sheets_id,
                                           sheet = "sign-ins")
x <- assertthat::assert_that(is_tibble(signins))

signins_full_fn <- file.path(params$csv_dir, params$signins_fn)
x <- assertthat::is.string(signins_full_fn)

# Registrations
registration_full_fn <- file.path(params$csv_dir, params$registration_fn)
x <- assertthat::is.string(registration_full_fn)
x <- assertthat::is.readable(registration_full_fn)

# Import "cleaned" registrations file
registrations_clean <- readr::read_csv(file = registration_full_fn, show_col_types = FALSE)

# Presenters
presenters_full_fn <- file.path(params$csv_dir, params$presenters_fn)
x <- assertthat::is.string(presenters_full_fn)
x <- assertthat::is.readable(presenters_full_fn)

presenters <- readr::read_csv(file = presenters_full_fn, show_col_types = FALSE)

Check that we downloaded data.

Code
dim(signins)
[1] 76  5
Code
dim(registrations_clean)
[1] 91 11
Code
dim(presenters)
[1] 20  7

Clean

We need to clean the data file names.

Code
signins_clean <- janitor::clean_names(signins) |>
  dplyr::rename("signin_date" = "date") |>
  dplyr::mutate(name = paste0(first_name, " ", last_name))
Code
presenters_clean <- janitor::clean_names(presenters)

Show variable names prior to joining.

Code
names(signins_clean)
[1] "first_name"  "last_name"   "email"       "signin_date" "join_osi"   
[6] "name"       
Code
names(registrations_clean)
 [1] "timestamp"   "email"       "attend_days" "name"        "dept"       
 [6] "position"    "comments"    "dropped_out" "college"     ".default"   
[11] ".missing"   
Code
names(presenters_clean)
[1] "name"          "type"          "agreed"        "confirmed"    
[5] "reminder_sent" "college"       "email"        

Join registrations_clean and presenters_clean.

Code
people <- dplyr::full_join(registrations_clean, presenters_clean, by = NULL, copy = TRUE)
Joining with `by = join_by(email, name, college)`

We have the following fields in common across the datasets: name, email.

Code
people_signed_in <- dplyr::full_join(signins_clean, people, by = NULL, copy = TRUE)
Joining with `by = join_by(email, name)`
Warning in dplyr::full_join(signins_clean, people, by = NULL, copy = TRUE): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 13 of `x` matches multiple rows in `y`.
ℹ Row 75 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.

Modify people_signed_in to flag people who did not register in advance, but came to the workshop.

Code
people_signed_in <- people_signed_in |>
  dplyr::mutate(signin_noregister = if_else((!is.na(signin_date) &
                                               is.na(timestamp)), TRUE, FALSE),
                register_nosignin = if_else((is.na(signin_date) &
                                               !is.na(timestamp)), TRUE, FALSE))
Code
readr::write_csv(people_signed_in, file.path(params$csv_dir, "bootcamp-2026-signins.csv"))

Report

Code
mon_signins <- people_signed_in |>
  dplyr::filter(stringr::str_detect(as.character(signin_date), pattern = "2026-05-11"))

tue_signins <- people_signed_in |>
  dplyr::filter(stringr::str_detect(as.character(signin_date), "2026-05-12"))

Monday, May 11

We had n=44 attendees and presenters sign in. Of these, n=7 attended but did not register in advance. N=0 registered or committed to present but did not sign-in.

Here is the distribution of units.

Code
 mon_signins |>
   dplyr::filter(!is.na(signin_date),
                 !is.na(college)) |>
   dplyr::count(college, sort = TRUE) |>
   dplyr::mutate(college = fct_reorder(college, n)) |>
   ggplot() +
   geom_bar(aes(x = college, y = n), fill = "#BF8226", stat = "identity") +
   scale_y_continuous(breaks = seq(2, 24, by = 2)) +
   theme(legend.position = "right") +
   theme(legend.title = element_blank()) +
   xlab("") +
   theme_classic() +
   coord_flip()
Figure 10.1: Check-ins by college/unit on Monday, May 11

Tuesday, May 12

We had n=30 attendees and presenters sign in. Of these, n=8 attended but did not register in advance. N=0 registered or committed to present but did not sign-in.

Here is the distribution of units.

Code
 tue_signins |>
   dplyr::filter(!is.na(signin_date),
                 !is.na(college)) |>
   dplyr::count(college, sort = TRUE) |>
   dplyr::mutate(college = fct_reorder(college, n)) |>
   ggplot() +
   geom_bar(aes(x = college, y = n), fill = "#BF8226", stat = "identity") +
   scale_y_continuous(breaks = seq(2, 24, by = 2)) +
   theme(legend.position = "right") +
   theme(legend.title = element_blank()) +
   xlab("") +
   theme_classic() +
   coord_flip()
Figure 10.2: Check-ins by college/unit on Tuesday, May 12