Modified

April 17, 2026

About

This page documents and implements the data processing workflow for bootcamp registration.

Setup

We load some packages into memory for convenience.

Code
suppressPackageStartupMessages(library('tidyverse'))
suppressPackageStartupMessages(library('ggplot2'))
suppressPackageStartupMessages(library('dplyr'))
suppressPackageStartupMessages(library('tidyr'))
suppressPackageStartupMessages(library('stringr'))
suppressPackageStartupMessages(library('lubridate'))
suppressPackageStartupMessages(library('janitor'))

Import

The Google Form generates a Google Sheet that we download to a protected directory (private/csv) that is not synched to GitHub.

Important

This is because the sheet contains personally identifying information.

To implement this, we add private to our .gitignore file in the project’s root directory. This omits all files in private/ from version control and keeps them local to the machine we render the site from.

Code
x <- assertthat::is.string(params$csv_dir)
if (!dir.exists(params$csv_dir)) {
  message("Creating missing `include/csv/`.")
  dir.create(params$csv_dir)
}
x <- assertthat::is.writeable(params$csv_dir)

# options(gargle_oauth_email = Sys.getenv("GMAIL_SURVEY"))
# googledrive::drive_auth()

x <- assertthat::is.string(params$psu_id)
googlesheets4::gs4_auth(email = params$psu_id)

x <- assertthat::is.string(params$sheets_id)
registrations <- googlesheets4::read_sheet(params$reg_sheets_id,
                                           sheet = "Form Responses 1")
x <- assertthat::assert_that(is_tibble(registrations))

registration_full_fn <- file.path(params$csv_dir, params$registration_fn)
x <- assertthat::is.string(registration_full_fn)

readr::write_csv(registrations, file = registration_full_fn)

Download program committee member data, too.

Code
program_comm <- googlesheets4::read_sheet(params$reg_sheets_id,
                                          sheet = "planning_committee")
x <- assertthat::assert_that(is_tibble(program_comm))

x <- assertthat::is.string(params$program_comm_fn)
program_comm_full_fn <- file.path(params$csv_dir, params$program_comm_fn)

readr::write_csv(program_comm, file = program_comm_full_fn)

Clean

Google Forms conveniently returns the questions as variable names at the top of each column. These are handy for creating a data dictionary, but awkward for data processing. We rename these for our convenience. We also export a data dictionary.

Code
reqistrations_qs <- names(registrations)

registrations_clean <- registrations |>
  dplyr::rename(
    timestamp = "Timestamp",
    attend_days = "Which days of the bootcamp will you attend?",
    name = "What is your name?",
    psu_email = "Email Address",
    dept = "What is your department or unit?",
    position = "What is your current position?",
    comments = "Any comments?"
  )

registrations_short <- c(
  "timestamp",
  "psu_email",
  "attend_days",
  "name",
  "dept",
  "position",
  "comments"
)
registrations_pid <- c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE)

registrations_dd <- data.frame(qs = reqistrations_qs, 
                               qs_short = registrations_short, 
                               pid = registrations_pid)

registrations_dd |>
  knitr::kable(format = 'html')
data_dict_fn <- file.path(params$csv_dir, "registrations-2026-data-dict.csv")
x <- assertthat::is.string(data_dict_fn)
readr::write_csv(registrations_dd,
                 file = data_dict_fn)
Table 7.1: A minimal data dictionary.
qs qs_short pid
Timestamp timestamp FALSE
Email Address psu_email FALSE
Which days of the bootcamp will you attend? attend_days FALSE
What is your name? name TRUE
What is your department or unit? dept FALSE
What is your current position? position FALSE
Any comments? comments FALSE

We drop some of responses that were used to test the workflow.

Code
registrations_clean <- registrations_clean |>
  dplyr::filter(psu_email != "nittany.amateur.radio.club@gmail.com")

Visualize

Registration numbers

Code
registrations_yes <- registrations_clean |> 
  dplyr::mutate(registered = TRUE)

As of 2026-04-17, we have n=28 registered attendees and presenters. That is 37.3% of our target registration/attendance limit of n=75.

Time series

Code
registrations_clean |>
  dplyr::arrange(timestamp) |>
  dplyr::mutate(resp_index = seq_along(timestamp)) |>
  ggplot() +
  aes(x = timestamp, y = resp_index) +
  geom_point() +
  geom_line() +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(x = NULL, y = 'n registrations') +
  scale_color_manual(breaks = c("No", "Yes"),
                        values=c("red", "green")) +
  theme(legend.position = "none")
Figure 7.1: Time series of registrations.

Registrant attendance plans by day

Code
registrations_clean |>
  dplyr::mutate(plan_wed = stringr::str_detect(attend_days, "Mon"),
                plan_thu = stringr::str_detect(attend_days, "Tue")) |>
  dplyr::mutate(n_mon = sum(plan_wed, na.rm = TRUE),
                n_tue = sum(plan_thu, na.rm = TRUE)) |>
  dplyr::select(n_mon, n_tue) |>
  dplyr::distinct() |>
  knitr::kable(format = "html") |>
  kableExtra::kable_classic()
Table 7.2
n_mon n_tue
27 24

Bootcamp registrations by day.

Registrations by position

Code
registrations_clean |>
  dplyr::filter(!is.na(position)) |>
 dplyr::count(position, sort = TRUE) |>
  dplyr::mutate(position = fct_reorder(position, n)) |>
  ggplot() +
  geom_bar(aes(x = position, y = n), fill = "#96BEE6", stat = "identity") +
  scale_y_continuous(breaks = seq(2, 14, by = 2)) +
  theme(legend.position = "right") +
  theme(legend.title = element_blank()) +
  xlab("") +
  theme_classic() +
  coord_flip()
Figure 7.2: Registrations by position.

Registrations by unit

Clean and recode.

Code
select_this <- registrations_clean$name == "Parisa Osfoori"
registrations_clean$dept[select_this] <- "Communication Sciences & Disorders"

select_this <- registrations_clean$name == "Koraly Pérez-Edgar"
registrations_clean$dept[select_this] <- "Psychology"

Normalize department names; assign college/institute values.

Code
registrations_clean <- registrations_clean |>
  dplyr::mutate(
    dept = dplyr::recode(
      dept,
      `Clinical Psychology` = "Psychology",
      `Psychology (Cognitive)` = "Psychology",
      `Psychology / SSRI` = "Psychology",
      `Psychology (Developmental)` = "Psychology",
      `Department of Psychology` = "Psychology",
      `Cognitive Psychology` = "Psychology",
      `Psychology, Developmental` = "Psychology",
      `Developmental Psychology (CAT Lab)` = "Psychology",
      `Developmental Psychology` = "Psychology",
      `Psych` = "Psychology",
      `English language` = "English",
      `english` = "English",
      `English Language Teaching` = "English",
      `English Department` = "English",
      `Languages` = "Global Languages & Literatures",
      `Languages and Literature` = "Global Languages & Literatures",
      `Department of Foreign Languages` = "Global Languages & Literatures",
      `Linguistics` = "Applied Linguistics",
      `Department of Sociology and Criminology` = "Sociology & Criminology",
      `Communication Sciences and Disorders` = "Communication Sciences & Disorders",
      `CSD` = "Communication Sciences & Disorders",
      `Human Development and Family Studies & Social Data Analytics` = "HDFS",
      `Human Development and Family Studies` = "HDFS",
      `Human Development and Family Studies (HDFS)` = "HDFS",
      `Department of Human Development and Family Studies` = "HDFS",
      `Human Development and Family Sciences` = "HDFS",
      `HDFS/DEMO` = "HDFS",
      `bbh` = "BBH",
      `Biobehavioral Health` = "BBH",
      `Biobehavioural Health` = "BBH",
      `Biobehavioural  Health` = "BBH",
      `Biobehavioral health` = "BBH",
      `RPTM` = "Recreation, Park, & Tourism Management",
      `Sociology and Social Data Analytics` = "Sociology",
      `Spanish Italian and portuguese` = "Spanish, Italian, & Portuguese",
      `Spanish, Italian, and Portuguese Department` = "Spanish, Italian, & Portuguese",
      `Spanish Italian and Portuguese` = "Spanish, Italian, & Portuguese",
      `Spanish, Italian, and Portuguese` = "Spanish, Italian, & Portuguese",
      `French and Francophone Studies` = "French & Francophone Studies",
      `DEMOG` = "Demography",
      `Germanic & Slavic Languages & Literatures` = "German & Slavic Languages",
      `Germanic and Slavic Languages and Literatures` = "German & Slavic Languages",
      `Nutrition` = "Nutritional Sciences",
      `College of IST` = "IST",
      `Statistics Department` = "Statistics",
      `Department of Statistics` = "Statistics",
      `Math` = "Mathematics",
      `Astronomy and Astrophysics` = "Astronomy & Astrophysics",
      `Recreation, Park and Tourism Management` = "Recreation, Park, & Tourism Management",
      `SHS` = "Student Health Svcs",
      `Department of Chemical Engineering` = "Chemical Engineering",
      `ESM` = "Engineering Science & Mechanics",
      `Engineering Science` = "Engineering Science & Mechanics",
      `Engineering Science and Mechanics` = "Engineering Science & Mechanics",
      `EECS` = "Electrical Engineering & Comp Sci",
      `Department of Food Science` = "Food Science",
      `Libraries` = "University Libraries",
      `University libraries` = "University Libraries",
      `Ecosystem Science and Management` = "Ecosystem Science & Management",
      `PRC` = "Population Research Center",
      `TLT, PSU Libraries` = "University Libraries",
      `Business and Economics` = "Business & Economics",
      `EE` = "Electrical Engineering",
      `College of Medicine / Clinical and Translational Science Institute` = "CTSI",
      `Mechanical engineering,Penn state Harrisburg` = "Mechanical Engineering (Harrisburg)",
      `Smeal College of Business, Accounting` = "Accounting",
      `School of Science, Engineering, and Technology` = "Sci, Engr, & Tech",
      `institute for Computational and Data Sciences` = "ICDS",
      `Plant Pathology and environmental microbiology` = "Plant Pathology & Environmental Microbiology",
      `Meteorology and Atmospheric Sciences` = "Meteorology & Atmospheric Sciences",
      `School of Labor and Employment Relations` = "School of Labor & Employment Relations"
    )
  ) |>
  dplyr::mutate(
    college = recode_values(
      dept,
      "Statistics" ~ "ECoS",
      "University of Kansas, Psychology" ~ "UKansas",
      "Biology" ~ "ECoS",
      "Psychology" ~ "CLA",
      "Spanish, Italian, & Portuguese" ~ "CLA",
      "German & Slavic Languages" ~ "CLA",
      "Research Informatics and Publishing" ~ "Libraries",
      "Political Science" ~ "CLA",
      "Applied Linguistics" ~ "CLA",
      "Global Languages & Literatures" ~ "CLA",
      "Anthropology" ~ "CLA",
      "Sociology" ~ "CLA",
      "English" ~ "CLA",
      "C-SoDA" ~ "CLA",
      "Office of Digital Pedagogies and Initiatives" ~ "CLA",
      "Asian Studies" ~ "CLA",
      "Sociology & Criminology" ~ "CLA",
      "School of Labor & Employment Relations" ~ "CLA",
      "IST" ~ "IST",
      "Chemical Engineering" ~ "Engineering",
      "Material Science and Engineering" ~ "Engineering",
      "Engineering Science & Mechanics" ~ "Engineering",
      "College of Engineering" ~ "Engineering",
      "Biomedical Engineering" ~ "Engineering",
      "Nutritional Sciences" ~ "HHD",
      "HDFS" ~ "HHD",
      "Kinesiology" ~ "HHD",
      "Recreation, Park, & Tourism Management" ~ "HHD",
      "BBH" ~ "HHD",
      "College of Nursing" ~ "Nursing",
      "Bellisario College of Communication" ~ "Comm",
      "Mass Communications" ~ "Comm",
      "Marketing" ~ "Smeal",
      "Neuroscience" ~ "Med",
      "College of Human and Health Development" ~ "HHD",
      "University Libraries" ~ "Libraries",
      "ICDS" ~ "ICDS",
      "EESI" ~ "EESI",
      "Astronomy & Astrophysics" ~ "ECoS",
      "Chemistry" ~ "ECoS",
      "Mathematics" ~ "ECoS",
      "Entomology" ~ "AgSci",
      "Ecosystem Science & Management" ~ "AgSci",
      "Plant Pathology & Environmental Microbiology" ~ "AgSci",
      "Plant Biology" ~ "Huck",
      "Food Science" ~ "AgSci",
      "Plant Science" ~ "AgSci",
      "Biotechnology" ~ "Huck",
      "Acoustics" ~ "Engineering",
      "Communication Sciences & Disorders" ~ "HHD",
      "Electrical Engineering & Comp Sci" ~ "Engineering",
      "Population Research Center" ~ "SSRI",
      "Psychology (Harrisburg)" ~ "PSU Harrisburg",
      "Business & Economics" ~ "PSU Brandywine",
      "Engineering" ~ "Engineering",
      "LPS/LDT" ~ "Education",
      "Demography" ~ "CLA",
      "OVPR" ~ "OVPR",
      "ORP" ~ "OVPR",
      "Mechanical Engineering" ~ "Engineering",
      "Electrical Engineering" ~ "Engineering",
      "Medicine" ~ "Medicine",
      "CTSI" ~ "Medicine",
      "French & Francophone Studies" ~ "CLA",
      "Data Analytics" ~ "IST",
      "Cybersecurity" ~ "IST",
      "Information Science and Technology" ~ "IST",
      "Mechanical Engineering (Harrisburg)" ~ "PSU Harrisburg",
      "Accounting" ~ "Smeal",
      "PSU Harrisburg" ~ "PSU Harrisburg",
      "Sci, Engr, & Tech" ~ "PSU Harrisburg",
      "Schreyer Institute for Teaching Excellence" ~ "Old Main",
      "HHD" ~ "HHD",
      "Meteorology & Atmospheric Sciences" ~ "EMS"
    ),
    .default = "Unknown",
    .missing = "Unknown"
  )

Plot.

Code
registrations_clean |>
  dplyr::filter(!is.na(college)) |>
  dplyr::count(college, sort = TRUE) |>
  dplyr::mutate(college = fct_reorder(college, n)) |>
  ggplot() +
  geom_bar(aes(x = college, y = n), fill = "#BF8226", stat = "identity") +
  scale_y_continuous(breaks = seq(2, 12, by = 2)) +
  theme(legend.position = "right") +
  theme(legend.title = element_blank()) +
  xlab("") +
  theme_classic() +
  coord_flip()
Figure 7.3: Registrations by college/unit.

The bootcamp registrations represent n=17 departments and n=12 campuses, colleges, institutes, or administrative entities.