Registration

Modified

July 4, 2025

About

This page documents and implements the data processing workflow for bootcamp registration.

Setup

We load some packages into memory for convenience.

Code
suppressPackageStartupMessages(library('tidyverse'))
suppressPackageStartupMessages(library('ggplot2'))
suppressPackageStartupMessages(library('dplyr'))
suppressPackageStartupMessages(library('tidyr'))
suppressPackageStartupMessages(library('stringr'))
suppressPackageStartupMessages(library('lubridate'))

Import

The Google Form generates a Google Sheet that we download to a protected directory (include/csv) that is not synched to GitHub.

Important

This is because the sheet contains personally identifying information.

Code
if (!dir.exists(params$csv_dir)) {
  message("Creating missing `include/csv/`.")
  dir.create(params$csv_dir)
}

options(gargle_oauth_email = Sys.getenv("GMAIL_SURVEY"))
googledrive::drive_auth()

googledrive::drive_download(
  params$sheets_fn,
  path = file.path(params$csv_dir, "registrations-2025.csv"),
  type = "csv",
  overwrite = TRUE
)

Clean

We reimport the saved CSV file and then clean it.

Code
registrations <- readr::read_csv(file.path(params$csv_dir, "registrations-2025.csv"),
                                 show_col_types = FALSE)

names(registrations)
 [1] "Timestamp"                                                                             
 [2] "Email Address"                                                                         
 [3] "Which days of the bootcamp will you attend?"                                           
 [4] "What is your name?"                                                                    
 [5] "What is your department or unit?"                                                      
 [6] "What is your current position?"                                                        
 [7] "Any comments?"                                                                         
 [8] "Are you interested in registering for the 2.5 day bootcamp or the Keynote address(es)?"
 [9] "Which Keynote address(es) are you interested in attending?"                            
[10] "presenter"                                                                             

Google Forms conveniently returns the questions as variable names at the top of each column. These are handy for creating a data dictionary, but awkward for data processing. We rename these for our convenience. We also export a data dictionary.

Code
reqistrations_qs <- names(registrations)

registrations_clean <- registrations |>
  dplyr::rename(
    timestamp = "Timestamp",
    attend_days = "Which days of the bootcamp will you attend?",
    name = "What is your name?",
    psu_email = "Email Address",
    dept = "What is your department or unit?",
    position = "What is your current position?",
    comments = "Any comments?",
    bootcamp_keynote = "Are you interested in registering for the 2.5 day bootcamp or the Keynote address(es)?",
    which_keynotes = "Which Keynote address(es) are you interested in attending?"
  )

registrations_short <- c(
  "timestamp",
  "psu_email",
  "attend_days",
  "name",
  "dept",
  "position",
  "bootcamp_keynote",
  "which_keynotes",
  "comments",
  "presenter"
)
registrations_pid <- c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE)

registrations_dd <- data.frame(qs = reqistrations_qs, qs_short = registrations_short, pid = registrations_pid)

registrations_dd |>
  knitr::kable(format = 'html')
readr::write_csv(registrations_dd,
                 file = file.path(params$csv_dir, "registrations-2025-data-dict.csv"))
Table 9.1: A minimal data dictionary.
qs qs_short pid
Timestamp timestamp FALSE
Email Address psu_email FALSE
Which days of the bootcamp will you attend? attend_days FALSE
What is your name? name TRUE
What is your department or unit? dept FALSE
What is your current position? position FALSE
Any comments? bootcamp_keynote FALSE
Are you interested in registering for the 2.5 day bootcamp or the Keynote address(es)? which_keynotes FALSE
Which Keynote address(es) are you interested in attending? comments FALSE
presenter presenter FALSE

Last, we convert the text dates into date/time objects and created a new registered variable.

Code
gilmore_dup <- registrations_clean$timestamp != "6/20/2025 9:36:33" & registrations_clean$name == "Rick Gilmore"

registrations_clean <- registrations_clean |>
  dplyr::filter(!gilmore_dup) |>
  dplyr::mutate(timestamp = lubridate::mdy_hms(timestamp, tz = "America/New_York"))

Visualize

Registration numbers

Code
registrations_yes <- registrations_clean |> 
  dplyr::mutate(registered = TRUE)

keynotes_only <- registrations_clean |>
  dplyr::mutate(keynotes = stringr::str_detect(string = bootcamp_keynote, pattern = "Keynote"))

As of 2025-07-04 07:58:03.36449, we have \(n=\) 22 registered attendees. That is 29.3 % of our target registration/attendance limit of n=75.

Time series

Code
registrations_clean |>
  dplyr::arrange(timestamp) |>
  dplyr::mutate(resp_index = seq_along(timestamp)) |>
  ggplot() +
  aes(x = timestamp, y = resp_index) +
  geom_point() +
  geom_line() +
  # facet_grid(~ registered) +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(x = NULL, y = 'n registrations') +
  ylim(0, 20) +
  scale_color_manual(breaks = c("No", "Yes"),
                        values=c("red", "green")) +
  theme(legend.position = "none")
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_line()`).
Figure 9.1: Time series of registrations. Note that the initial group includes presenters and organizers.

Registrant attendance plans by day

Code
registrations_clean |>
  dplyr::mutate(plan_wed = stringr::str_detect(attend_days, "Wed"),
                plan_thu = stringr::str_detect(attend_days, "Thu"),
                plan_fri = stringr::str_detect(attend_days, "Fri")) |>
  dplyr::mutate(n_wed = sum(plan_wed),
                n_thu = sum(plan_thu),
                n_fri = sum(plan_fri)) |>
  dplyr::select(n_wed, n_thu, n_fri) |>
  dplyr::distinct() |>
  knitr::kable(format = "html") |>
  kableExtra::kable_classic()
Table 9.2
n_wed n_thu n_fri
15 18 12

Bootcamp registrations by day.

Registrants by position

Code
registrations_yes |>
  dplyr::filter(!is.na(position)) |>
  ggplot() +
  aes(x = position, fill = position) +
  geom_bar() +
  # facet_grid(registered_attended ~ .) +
  theme(legend.position = "none") +
  coord_flip()
Figure 9.2: Registrations vs. attendees by day and position

Registrants by unit

Code
registrations_yes <- registrations_yes |>
  dplyr::mutate(
    dept = dplyr::recode(
      dept,
      `Department of Chemical Engineering` = "Chemical Engineering",
      `Clinical Psychology` = "Psychology",
      `Psychology (Cognitive)` = "Psychology",
      `Psychology / SSRI` = "Psychology",
      `Department of Psychology` = "Psychology",
      `Cognitive Psychology` = "Psychology",
      `Psych` = "Psychology",
      `English language` = "English",
      `english` = "English",
      `English Language Teaching` = "English",
      `English Department` = "English",
      `Languages` = "Global Languages & Literatures",
      `Languages and Literature` = "Global Languages & Literatures",
      `Department of Foreign Languages` = "Global Languages & Literatures",
      `Linguistics` = "Applied Linguistics",
      `Human Development and Family Studies & Social Data Analytics` = "HDFS",
      `Human Development and Family Studies` = "HDFS",
      `Human Development and Family Studies (HDFS)` = "HDFS",
      `Department of Human Development and Family Studies` = "HDFS",
      `HDFS/DEMO` = "HDFS",
      `RPTM` = "Recreation, Park, & Tourism Management",
      `Sociology and Social Data Analytics` = "Sociology",
      `Spanish Italian and portuguese` = "Spanish, Italian, & Portuguese",
      `Spanish, Italian, and Portuguese Department` = "Spanish, Italian, & Portuguese",
      `Spanish Italian and Portuguese` = "Spanish, Italian, & Portuguese",
      `Nutrition` = "Nutritional Sciences",
      `College of IST` = "IST",
      `Statistics Department` = "Statistics",
      `Recreation, Park and Tourism Management` = "Recreation, Park, & Tourism Management",
      `SHS` = "Student Health Svcs",
      `ESM` = "Engineering Science & Mechanics",
      `Engineering Science` = "Engineering Science & Mechanics",
      `Engineering Science and Mechanics` = "Engineering Science & Mechanics",
      `Department of Food Science` = "Food Science",
      `Libraries` = "University Libraries",
      `University libraries` = "University Libraries",
      
      `Astronomy and Astrophysics` = "Astronomy & Astrophysics"
    )
  ) |> 
  dplyr::mutate(
    college = case_match(
      dept,
      "Statistics" ~ "ECoS",
      "Biology" ~ "ECoS",
      "Psychology" ~ "CLA",
      "Spanish, Italian, & Portuguese" ~ "CLA",
      "Research Informatics and Publishing" ~ "Libraries",
      "Political Science" ~ "CLA",
      "Applied Linguistics" ~ "CLA",
      "Global Languages & Literatures" ~ "CLA",
      "Sociology" ~ "CLA",
      "English" ~ "CLA",
      "C-SoDA" ~ "CLA",
      "Office of Digital Pedagogies and Initiatives" ~ "CLA",
      "Asian Studies" ~ "CLA",
      "IST" ~ "IST",
      "Chemical Engineering" ~ "Engineering",
      "Material Science and Engineering" ~ "Engineering",
      "Engineering Science & Mechanics" ~ "Engineering",
      "Biomedical Engineering" ~ "Engineering",
      "Nutritional Sciences" ~ "HHD",
      "HDFS" ~ "HHD",
      "Kinesiology" ~ "HHD",
      "Recreation, Park, & Tourism Management" ~ "HHD",
      "Bellisario College of Communication" ~ "Comm",
      "Marketing" ~ "Smeal",
      "Food Science" ~ "Ag",
      "Neuroscience" ~ "Med",
      "College of Human and Health Development" ~ "HHD",
      "University Libraries" ~ "Libraries",
      "ICDS" ~ "ICDS",
      "EESI" ~ "EESI",
      "ORP" ~ "OVPR",
      "Astronomy & Astrophysics" ~ "ECoS"
    ),
    .default = "Unknown",
    .missing = "Unknown"
  )
Code
registrations_yes |>
  dplyr::filter(!is.na(dept),
                dept != "University of Pennsylvania") |>
  ggplot() +
  aes(x = dept, fill = college) +
  geom_bar() +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank()) +
  coord_flip()
Figure 9.3: Registrations by department and college/unit.