2023-10-10 Analyze prefilter experiment qPCR

Author

Dan Rice

Published

October 10, 2023

Objectives

Determine which prefiltration method produces better nucleic acid results.

Preliminary work

Exported csv files from Olivia’s eds file uploads.

Data import

library(here)

here() starts at /Users/dan/notebook

library(readr)
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(purrr)
library(stringr)
library(ggplot2)
library(tidyr)
library(broom)

data_dir <-
  here("~", "airport", "[2023-09-22] New Processing Tests")
filename_pattern <- "_Results_"

col_types <- list(
  Target = col_character(),
  Cq = col_double()
)
raw_data <- list.files(
  here(data_dir, "qpcr"),
  pattern = filename_pattern,
  recursive = TRUE,
  full.names = TRUE,
) |>
  print() |>
  map(function(f) {
    read_csv(f,
      skip = 23,
      col_types = col_types,
    )
  }) |>
  list_rbind()

[1] "/Users/dan/airport/[2023-09-22] New Processing Tests/qpcr/2023-10-09_Cov2_PMMV_Results_20231010_125053.csv"
[2] "/Users/dan/airport/[2023-09-22] New Processing Tests/qpcr/2023-10-09_CrA_16S_Results_20231010_125152.csv"  
[3] "/Users/dan/airport/[2023-09-22] New Processing Tests/qpcr/2023-10-09_Noro_Results_20231010_125241.csv"

Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)
One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)
One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)

print(raw_data)

# A tibble: 165 × 21
    Well `Well Position` Omit  Sample   Target Task     Reporter Quencher
   <dbl> <chr>           <lgl> <chr>    <chr>  <chr>    <chr>    <chr>   
 1     1 A1              FALSE 1A       Cov2   UNKNOWN  FAM      NFQ-MGB 
 2     2 A2              FALSE 1A       Cov2   UNKNOWN  FAM      NFQ-MGB 
 3     3 A3              FALSE 1A       Cov2   UNKNOWN  FAM      NFQ-MGB 
 4     4 A4              FALSE 10000.0  Cov2   STANDARD FAM      NFQ-MGB 
 5     5 A5              FALSE 10000.0  Cov2   STANDARD FAM      NFQ-MGB 
 6     6 A6              FALSE 10000.0  Cov2   STANDARD FAM      NFQ-MGB 
 7     7 A7              FALSE 1A       PMMV   UNKNOWN  FAM      NFQ-MGB 
 8     8 A8              FALSE 1A       PMMV   UNKNOWN  FAM      NFQ-MGB 
 9     9 A9              FALSE 1A       PMMV   UNKNOWN  FAM      NFQ-MGB 
10    10 A10             FALSE 800000.0 PMMV   STANDARD FAM      NFQ-MGB 
# ℹ 155 more rows
# ℹ 13 more variables: `Amp Status` <chr>, `Amp Score` <dbl>,
#   `Curve Quality` <lgl>, `Result Quality Issues` <lgl>, Cq <dbl>,
#   `Cq Confidence` <dbl>, `Cq Mean` <dbl>, `Cq SD` <dbl>,
#   `Auto Threshold` <lgl>, Threshold <dbl>, `Auto Baseline` <lgl>,
#   `Baseline Start` <dbl>, `Baseline End` <dbl>

raw_data |> count(Target)

# A tibble: 5 × 2
  Target     n
  <chr>  <int>
1 16S       36
2 Cov2      36
3 CrA       36
4 Noro      21
5 PMMV      36

tidy_data <- raw_data |>
  mutate(
    group = str_extract(Sample, "^[0-9]"),
    replicate = str_extract(Sample, "[A-Z]$"),
    quantity = as.double(Sample),
  ) |>
  glimpse()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `quantity = as.double(Sample)`.
Caused by warning:
! NAs introduced by coercion

Rows: 165
Columns: 24
$ Well                    <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
$ `Well Position`         <chr> "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8"…
$ Omit                    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
$ Sample                  <chr> "1A", "1A", "1A", "10000.0", "10000.0", "10000…
$ Target                  <chr> "Cov2", "Cov2", "Cov2", "Cov2", "Cov2", "Cov2"…
$ Task                    <chr> "UNKNOWN", "UNKNOWN", "UNKNOWN", "STANDARD", "…
$ Reporter                <chr> "FAM", "FAM", "FAM", "FAM", "FAM", "FAM", "FAM…
$ Quencher                <chr> "NFQ-MGB", "NFQ-MGB", "NFQ-MGB", "NFQ-MGB", "N…
$ `Amp Status`            <chr> "AMP", "AMP", "AMP", "AMP", "AMP", "AMP", "AMP…
$ `Amp Score`             <dbl> 1.3915582, 1.4014582, 1.4073581, 1.4090574, 1.…
$ `Curve Quality`         <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Result Quality Issues` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ Cq                      <dbl> 33.15919, 32.98389, 32.66178, 22.39386, 22.220…
$ `Cq Confidence`         <dbl> 0.9759085, 0.9891340, 0.9883204, 0.9891666, 0.…
$ `Cq Mean`               <dbl> 32.93495, 32.93495, 32.93495, 22.31821, 22.318…
$ `Cq SD`                 <dbl> 0.25229116, 0.25229116, 0.25229116, 0.08875503…
$ `Auto Threshold`        <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…
$ Threshold               <dbl> 0.2999157, 0.2999157, 0.2999157, 0.2999157, 0.…
$ `Auto Baseline`         <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…
$ `Baseline Start`        <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
$ `Baseline End`          <dbl> 27, 27, 26, 16, 16, 16, 24, 23, 23, 20, 20, 19…
$ group                   <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "…
$ replicate               <chr> "A", "A", "A", NA, NA, NA, "A", "A", "A", NA, …
$ quantity                <dbl> NA, NA, NA, 1e+04, 1e+04, 1e+04, NA, NA, NA, 8…

amp_data <- list.files(
  here(data_dir, "qpcr"),
  pattern = "Amplification Data",
  recursive = TRUE,
  full.names = TRUE,
) |>
  print() |>
  map(function(f) {
    read_csv(f,
      skip = 23,
      col_types = col_types,
    )
  }) |>
  list_rbind() |>
  left_join(tidy_data, by = join_by(Well, `Well Position`, Sample, Omit, Target)) |>
  glimpse()

[1] "/Users/dan/airport/[2023-09-22] New Processing Tests/qpcr/2023-10-09_Cov2_PMMV_Amplification Data_20231010_125053.csv"
[2] "/Users/dan/airport/[2023-09-22] New Processing Tests/qpcr/2023-10-09_CrA_16S_Amplification Data_20231010_125152.csv"  
[3] "/Users/dan/airport/[2023-09-22] New Processing Tests/qpcr/2023-10-09_Noro_Amplification Data_20231010_125241.csv"

Warning: The following named parsers don't match the column names: Cq
The following named parsers don't match the column names: Cq
The following named parsers don't match the column names: Cq

Rows: 6,600
Columns: 27
$ Well                    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ `Well Position`         <chr> "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1"…
$ `Cycle Number`          <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
$ Target                  <chr> "Cov2", "Cov2", "Cov2", "Cov2", "Cov2", "Cov2"…
$ Rn                      <dbl> 0.6443956, 0.6382574, 0.6284555, 0.6179680, 0.…
$ dRn                     <dbl> 2.836028e-02, 2.374099e-02, 1.545804e-02, 6.48…
$ Sample                  <chr> "1A", "1A", "1A", "1A", "1A", "1A", "1A", "1A"…
$ Omit                    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
$ Task                    <chr> "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN", "U…
$ Reporter                <chr> "FAM", "FAM", "FAM", "FAM", "FAM", "FAM", "FAM…
$ Quencher                <chr> "NFQ-MGB", "NFQ-MGB", "NFQ-MGB", "NFQ-MGB", "N…
$ `Amp Status`            <chr> "AMP", "AMP", "AMP", "AMP", "AMP", "AMP", "AMP…
$ `Amp Score`             <dbl> 1.391558, 1.391558, 1.391558, 1.391558, 1.3915…
$ `Curve Quality`         <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Result Quality Issues` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ Cq                      <dbl> 33.15919, 33.15919, 33.15919, 33.15919, 33.159…
$ `Cq Confidence`         <dbl> 0.9759085, 0.9759085, 0.9759085, 0.9759085, 0.…
$ `Cq Mean`               <dbl> 32.93495, 32.93495, 32.93495, 32.93495, 32.934…
$ `Cq SD`                 <dbl> 0.2522912, 0.2522912, 0.2522912, 0.2522912, 0.…
$ `Auto Threshold`        <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…
$ Threshold               <dbl> 0.2999157, 0.2999157, 0.2999157, 0.2999157, 0.…
$ `Auto Baseline`         <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…
$ `Baseline Start`        <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
$ `Baseline End`          <dbl> 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27…
$ group                   <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "…
$ replicate               <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
$ quantity                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

Groups 1 and 2 are two different prefiltration protocols.

tidy_data |>
  count(group, Target, replicate) |>
  print(n = Inf)

# A tibble: 40 × 4
   group Target replicate     n
   <chr> <chr>  <chr>     <int>
 1 0     16S    <NA>          9
 2 1     16S    A             3
 3 1     16S    B             3
 4 1     16S    C             3
 5 1     16S    <NA>          6
 6 1     Cov2   A             3
 7 1     Cov2   B             3
 8 1     Cov2   C             3
 9 1     Cov2   <NA>         15
10 1     CrA    A             3
11 1     CrA    B             3
12 1     CrA    C             3
13 1     CrA    <NA>         15
14 1     Noro   A             3
15 1     Noro   B             3
16 1     Noro   C             3
17 1     PMMV   A             3
18 1     PMMV   B             3
19 1     PMMV   C             3
20 2     16S    A             3
21 2     16S    B             3
22 2     16S    C             3
23 2     Cov2   A             3
24 2     Cov2   B             3
25 2     Cov2   C             3
26 2     CrA    A             3
27 2     CrA    B             3
28 2     CrA    C             3
29 2     Noro   A             3
30 2     Noro   B             3
31 2     Noro   C             3
32 2     PMMV   A             3
33 2     PMMV   B             3
34 2     PMMV   C             3
35 8     PMMV   <NA>         15
36 <NA>  16S    <NA>          3
37 <NA>  Cov2   <NA>          3
38 <NA>  CrA    <NA>          3
39 <NA>  Noro   <NA>          3
40 <NA>  PMMV   <NA>          3

Kit comparison

tidy_data |>
  filter(Task == "UNKNOWN") |>
  ggplot(mapping = aes(
    x = Cq,
    y = group,
    color = replicate,
  )) +
  stat_summary(
    fun.min = min,
    fun.max = max,
    fun = median,
    position = position_dodge(width = 0.2),
    size = 0.2
  ) +
  facet_wrap(facets = ~Target, scales = "free_x")

Standard curves

tidy_data |>
  filter(Task == "STANDARD") |>
  ggplot(mapping = aes(
    x = quantity,
    y = Cq,
  )) +
  stat_summary(
    fun.min = min,
    fun.max = max,
    fun = median,
    position = position_dodge(width = 0.2),
    size = 0.2
  ) +
  scale_x_log10() +
  geom_smooth(method = "lm") +
  facet_wrap(facets = ~Target, scales = "free")

`geom_smooth()` using formula = 'y ~ x'

fits <- tibble()
# Note: no standard for norovirus
for (target in c("16S", "Cov2", "CrA", "PMMV")) {
  fit <- lm(Cq ~ log10(quantity),
    data = filter(tidy_data, Task == "STANDARD", Target == target)
  ) |>
    tidy() |>
    mutate(Target = target, efficiency = 10^-(1 / estimate) - 1)
  fits <- bind_rows(fits, fit)
}
print(fits |> filter(term == "log10(quantity)"))

# A tibble: 4 × 7
  term            estimate std.error statistic  p.value Target efficiency
  <chr>              <dbl>     <dbl>     <dbl>    <dbl> <chr>       <dbl>
1 log10(quantity)    -2.92    0.125      -23.3 5.44e-12 16S          1.20
2 log10(quantity)    -2.54    0.121      -21.1 1.99e-11 Cov2         1.47
3 log10(quantity)    -2.84    0.0806     -35.3 2.69e-14 CrA          1.25
4 log10(quantity)    -2.72    0.0829     -32.8 6.90e-14 PMMV         1.33

Amplification curves

amp_data |>
  filter(Task == "UNKNOWN") |>
  ggplot(mapping = aes(
    x = `Cycle Number`,
    y = dRn,
    color = as.factor(group),
    group = Well
  )) +
  geom_line() +
  geom_line(mapping = aes(
    x = `Cycle Number`,
    y = Threshold
  ), color = "Grey") +
  scale_y_log10() +
  facet_wrap(~Target, scales = "free")

Warning in self$trans$transform(x): NaNs produced

Warning: Transformation introduced infinite values in continuous y-axis

Warning: Removed 49 rows containing missing values (`geom_line()`).

amp_data |>
  filter(Task == "NTC") |>
  ggplot(mapping = aes(
    x = `Cycle Number`,
    y = dRn,
    group = Well
  )) +
  geom_line() +
  geom_line(mapping = aes(
    x = `Cycle Number`,
    y = Threshold
  ), color = "Grey") +
  scale_y_log10() +
  facet_wrap(~Target, scales = "free")

Warning in self$trans$transform(x): NaNs produced

Warning: Transformation introduced infinite values in continuous y-axis

Warning: Removed 149 rows containing missing values (`geom_line()`).

plot_amp <- function(data, color) {
  ggplot(data, aes(x = `Cycle Number`, y = dRn)) +
    geom_line(mapping = aes(
      color = as.factor({{ color }}),
      group = Well,
    )) +
    scale_y_log10(limits = c(1e-3, 1e1))
}

ruler <- function(y0_from, num_rules) {
  y0 <- 10^seq(from = y0_from, by = -1, length.out = num_rules)
  rules <- crossing(`Cycle Number` = amp_data$`Cycle Number`, y0 = y0) |>
    mutate(dRn = y0 * 2^`Cycle Number`)
  geom_line(
    data = rules,
    mapping = aes(group = y0),
    color = "black"
  )
}

plot_amp_with_ruler <- function(target, y0_from, num_rules) {
  amp_data |>
    filter(!is.na(quantity), Target == target) |>
    plot_amp(quantity) +
    ruler(y0_from, num_rules) +
    geom_line(mapping = aes(
      x = `Cycle Number`,
      y = Threshold
    ), color = "Grey") +
    labs(title = target)
}

plot_amp_with_ruler("16S", -5.5, 5)

Warning in self$trans$transform(x): NaNs produced

Warning: Transformation introduced infinite values in continuous y-axis

Warning: Removed 51 rows containing missing values (`geom_line()`).

Warning: Removed 134 rows containing missing values (`geom_line()`).

plot_amp_with_ruler("Cov2", -6.5, 5)

Warning in self$trans$transform(x): NaNs produced

Warning: Transformation introduced infinite values in continuous y-axis

Warning: Removed 133 rows containing missing values (`geom_line()`).

plot_amp_with_ruler("CrA", -4.5, 5)

Warning in self$trans$transform(x): NaNs produced

Warning: Transformation introduced infinite values in continuous y-axis

Warning: Removed 34 rows containing missing values (`geom_line()`).

Warning: Removed 133 rows containing missing values (`geom_line()`).

plot_amp_with_ruler("PMMV", -8, 5)

Warning in self$trans$transform(x): NaNs produced

Warning: Transformation introduced infinite values in continuous y-axis

Warning: Removed 51 rows containing missing values (`geom_line()`).

Warning: Removed 136 rows containing missing values (`geom_line()`).