Examine extraction-kit comparison experiment

Author

Dan Rice

Published

September 18, 2023

Background

Drive folder

Setup

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# library(googlesheets4)
library(fs)
library(here)
here() starts at /Users/dan/notebook
library(knitr)
library(broom)

# plotting libraries
library(cowplot)

Attaching package: 'cowplot'

The following object is masked from 'package:lubridate':

    stamp
library(patchwork)

Attaching package: 'patchwork'

The following object is masked from 'package:cowplot':

    align_plots
library(ggbeeswarm)

# Custon qPCR helpers
library(naowetlab)
# source('_functions.R')

# Plotting setup #

theme_set(theme_cowplot())

# Okabe Ito color scheme with amber for yellow; see https://easystats.github.io/see/reference/scale_color_okabeito.html
colors_oi <- grDevices::palette.colors()
colors_oi["yellow"] <- "#F5C710"

Data import

data_path <- here("_data/2023-07-18-extraction-kit-evaluation")

metadata

meta_samples <- path(data_path, "qpcr", "meta_samples.csv") %>%
  read_csv() %>%
  rename(sample_qpcr = sample_qPCR) %>%
  janitor::clean_names() %>%
  glimpse()
Rows: 27 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (7): sample_qPCR, Kit, treatment_group, LPA, other_treatment, Virus_spec...
dbl (1): group

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 27
Columns: 8
$ sample_qpcr     <chr> "1_ZR_A", "1_ZR_B", "1_ZR_C", "2_ZDR_A", "2_ZDR_B", "2…
$ kit             <chr> "Zymo RNA", "Zymo RNA", "Zymo RNA", "Zymo RNA/DNA", "Z…
$ treatment_group <chr> "ZQ-RNA", "ZQ-RNA", "ZQ-RNA", "ZQ-RNADNA", "ZQ-RNADNA"…
$ lpa             <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", …
$ group           <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ other_treatment <chr> "Shield", "Shield", "Shield", "Shield", "Shield", "Shi…
$ virus_specific  <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", …
$ fp_data         <chr> "No", "Yes", "No", "No", "Yes", "No", "No", "Yes", "No…
meta_targets <- path(data_path, "qpcr", "meta_target.csv") %>%
  read_csv() %>%
  rename(target_qpcr = target_qPCR) %>%
  janitor::clean_names() %>%
  glimpse()
Rows: 5 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): target_qPCR, target

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 5
Columns: 2
$ target_qpcr <chr> "CrA", "Cov2", "Noro", "16S", "Phg"
$ target      <chr> "Crassphage", "SARS-CoV-2", "Norovirus", "bacteria", "phag…

qPCR

fns <- data_path %>%
  dir_ls(recurse = TRUE, glob = "*_Standard Curve Result_*.csv")
fns %>% path_file()
[1] "2023-07-26_Cov2-kits_Standard Curve Result_20230728 133121.csv"    
[2] "2023-07-26_CrA-kits_Standard Curve Result_20230728 135806.csv"     
[3] "2023-07-26_Noro_kits_Standard Curve Result_20230728 140336.csv"    
[4] "2023-07-26_Phagemid-kits_Standard Curve Result_20230728 141050.csv"
results_raw <- tibble(file = fns) %>%
  mutate(
    .keep = "unused",
    data = map(file, read_qpcr_results_csv)
  ) %>%
  unnest(data)
results <- results_raw %>%
  rename(target_qpcr = target) %>%
  left_join(meta_samples, by = c("sample" = "sample_qpcr")) %>%
  left_join(meta_targets, by = "target_qpcr") %>%
  mutate()

amp curves

fns_amp <- data_path %>%
  dir_ls(recurse = TRUE, glob = "*_Amplification Data_*.csv") %>%
  str_subset(negate = TRUE, "Raw")

amp1 <- tibble(file = fns_amp) %>%
  mutate(
    .keep = "unused",
    data = map(file, read_qpcr_amplification_csv)
  ) %>%
  unnest(data) %>%
  rename(target_qpcr = target) %>%
  left_join(results)
Joining with `by = join_by(well, well_position, row, column, target_qpcr,
sample, omit)`

Second experiment

data_path2 <- here("_data/2023-07-13-volume-and-dilution")

meta_samples2 <- path(data_path2, "qpcr", "meta_samples.csv") %>%
  read_csv() %>%
  rename(sample_qpcr = sample_qPCR) %>%
  janitor::clean_names()
Rows: 6 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): sample_qPCR, treatment_group

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
meta_targets2 <- path(data_path2, "qpcr", "meta_target.csv") %>%
  read_csv() %>%
  rename(target_qpcr = target_qPCR) %>%
  janitor::clean_names()
Rows: 4 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): target_qPCR, target

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fns2 <- data_path2 %>%
  dir_ls(recurse = TRUE, glob = "*_Standard Curve Result_*.csv")
fns2 %>% path_file()
[1] "2023-07-20_SSTweenCovNoro_Standard Curve Result_20230724 143043.csv"
[2] "2023-07-20_TweenCrA16S_Standard Curve Result_20230724 142856.csv"   
results_raw2 <- tibble(file = fns2) %>%
  mutate(
    .keep = "unused",
    data = map(file, read_qpcr_results_csv)
  ) %>%
  unnest(data)

results2 <- results_raw2 %>%
  rename(target_qpcr = target) %>%
  left_join(meta_samples2, by = c("sample" = "sample_qpcr")) %>%
  left_join(meta_targets2, by = "target_qpcr") %>%
  mutate()

fns_amp2 <- data_path2 %>%
  dir_ls(recurse = TRUE, glob = "*_Amplification Data_*.csv") %>%
  str_subset(negate = TRUE, "Raw")

amp2 <- tibble(file = fns_amp2) %>%
  mutate(
    .keep = "unused",
    data = map(file, read_qpcr_amplification_csv)
  ) %>%
  unnest(data) %>%
  rename(target_qpcr = target) %>%
  left_join(results2)
Joining with `by = join_by(well, well_position, row, column, target_qpcr,
sample, omit)`
data_path3 <- here("_data/2023-06-13-concentration")

meta_samples3 <- path(data_path3, "qpcr", "meta_samples.csv") %>%
  read_csv() %>%
  mutate(treatment_group = as.character(treatment_group)) %>%
  # rename(sample_qpcr = sample_qPCR) %>%
  janitor::clean_names()
Rows: 12 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): sample_qpcr, filter, sewer_system, method, method_short
dbl (4): treatment_group, sample_number, volume, amicon_mwco

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
meta_targets3 <- path(data_path3, "qpcr", "meta_target.csv") %>%
  read_csv() %>%
  rename(target_qpcr = target_qPCR) %>%
  janitor::clean_names()
Rows: 5 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): target_qPCR, target

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fns3 <- data_path3 %>%
  dir_ls(recurse = TRUE, glob = "*_Standard Curve Result_*.csv")
fns3 %>% path_file()
[1] "2023-06-23_CPAmicon_16S_Standard Curve Result_20230625 111656.csv"    
[2] "2023-06-23_AmiconCP-COVNORO_Standard Curve Result_20230623 171728.csv"
[3] "2023-06-23_CrAPhg_Standard Curve Result_20230623 171458.csv"          
results_raw3 <- tibble(file = fns3) %>%
  mutate(
    .keep = "unused",
    data = map(file, read_qpcr_results_csv)
  ) %>%
  unnest(data)
print(unique(results_raw3$sample))
 [1] "N_30"               "N_0.05-80"          "6840.0"            
 [4] "N_100"              "N_Ultra-80"         "684.0"             
 [7] "N_0.05"             "S_0.05-80"          "68.4"              
[10] "N_Ultra"            "S_Ultra-80"         "6.840000000000001" 
[13] "S_30"               NA                   "0.6840000000000002"
[16] "S_100"              "S_0.05"             "S_Ultra"           
[19] "10.0"               "100000.0"           "10000.0"           
[22] "1000.0"             "100.0"             
print(unique(results_raw3$target))
[1] "16S"  "Cov2" "Noro" "CrA"  "phg" 
results3 <- results_raw3 %>%
  rename(target_qpcr = target) %>%
  left_join(meta_samples3, by = c("sample" = "sample_qpcr")) %>%
  left_join(meta_targets3, by = "target_qpcr") %>%
  mutate()

fns_amp3 <- data_path3 %>%
  dir_ls(recurse = TRUE, glob = "*_Amplification Data_*.csv") %>%
  str_subset(negate = TRUE, "Raw")


amp3 <- tibble(file = fns_amp3) %>%
  mutate(
    .keep = "unused",
    data = map(file, read_qpcr_amplification_csv)
  ) %>%
  unnest(data) %>%
  rename(target_qpcr = target) %>%
  left_join(results3)
Joining with `by = join_by(well, well_position, row, column, target_qpcr,
sample, omit)`
amp1$experiment <- 1
amp2$experiment <- 2
amp3$experiment <- 3
amp <- bind_rows(amp1, amp2, amp3)
# amp <- bind_rows(amp1, amp2)

Analyze qPCR data

Inspect SARS2 amplification curves

delta_rn_min <- 1e-3
ct_threshold <- results %>%
  filter(target == "SARS-CoV-2") %>%
  pull(threshold) %>%
  unique()
stopifnot(length(ct_threshold) == 1)

amp %>%
  filter(
    target == "SARS-CoV-2",
    !is.na(treatment_group)
  ) %>%
  ggplot(aes(cycle_number, pmax(d_rn, delta_rn_min), color = treatment_group)) +
  # scale_color_manual(values = colors_oi %>% unname) +
  scale_y_log10() +
  geom_line(aes(group = well)) +
  geom_hline(yintercept = ct_threshold, alpha = 0.3) +
  facet_wrap(~experiment) +
  # scale_color_brewer(type = 'qual') +
  # geom_point(data = baselines, aes(shape = baseline_boundary), size = 3) +
  # scale_shape_manual(values = c(1, 4)) +
  labs(y = "Delta Rn", x = "Cycle", color = "Target")

amp %>%
  filter(
    target == "SARS-CoV-2",
    task %in% c("STANDARD", "UNKNOWN")
  ) %>%
  ggplot(aes(cycle_number, pmax(d_rn, delta_rn_min), color = treatment_group)) +
  facet_wrap(~treatment_group) +
  # scale_color_manual(values = colors_oi %>% unname) +
  scale_x_continuous(limits = c(25, 40)) +
  scale_y_log10() +
  geom_line(aes(group = well)) +
  geom_hline(yintercept = ct_threshold, alpha = 0.3) +
  labs(y = "Delta Rn", x = "Cycle", color = "Target")
Warning: Removed 2064 rows containing missing values (`geom_line()`).

amp %>%
  filter(
    target == "SARS-CoV-2",
    task == "STANDARD"
  ) %>%
  ggplot(aes(cycle_number, pmax(d_rn, delta_rn_min), color = sample)) +
  # scale_color_manual(values = colors_oi %>% unname) +
  # scale_x_continuous(limits = c(25, 40)) +
  scale_y_log10() +
  geom_line(aes(group = well)) +
  geom_hline(yintercept = ct_threshold, alpha = 0.3) +
  labs(y = "Delta Rn", x = "Cycle", color = "Target")

Check SC versus target samples

amp %>%
  filter(
    target == "SARS-CoV-2",
    task %in% c("STANDARD", "UNKNOWN")
  ) %>%
  ggplot(aes(cycle_number, pmax(d_rn, delta_rn_min), color = interaction(task, experiment))) +
  scale_color_manual(values = colors_oi %>% unname()) +
  scale_x_continuous(limits = c(15, 40)) +
  scale_y_log10() +
  geom_line(aes(group = interaction(well, experiment))) +
  geom_hline(yintercept = ct_threshold, alpha = 0.3) +
  labs(y = "Delta Rn", x = "Cycle", color = "Target")
Warning: Removed 1960 rows containing missing values (`geom_line()`).

amp %>%
  filter(
    # target == 'SARS-CoV-2',
    task %in% c("STANDARD", "UNKNOWN")
  ) %>%
  ggplot(aes(cycle_number, pmax(d_rn, delta_rn_min), color = interaction(task, experiment))) +
  facet_wrap(~target) +
  scale_color_manual(values = colors_oi %>% unname()) +
  scale_x_continuous(limits = c(15, 40)) +
  scale_y_log10() +
  geom_line(aes(group = interaction(well, experiment))) +
  geom_hline(yintercept = ct_threshold, alpha = 0.3) +
  labs(y = "Delta Rn", x = "Cycle", color = "Target")
Warning: Removed 3220 rows containing missing values (`geom_line()`).