Overview

This document is for exploratory analysis of the two datasets.

Load data

list.files(here::here("source"))
## [1] "01_data_download.R"      "02_data_cleaning.R"     
## [3] "03_data_analysis.R"      "04_data_visualization.R"
source(here::here("source", "01_data_download.R"))

Longitudinal data

dim(covid)
## [1] 206129      5
names(covid)
## [1] "key_plot_id"     "date"            "pcr_conc_lin"    "normalization"  
## [5] "date_downloaded"
range(covid$pcr_conc_lin)
## [1] "0.0000010050251256281415" "9999900.304329794"
str(covid)
## 'data.frame':    206129 obs. of  5 variables:
##  $ key_plot_id    : chr  "CDC_VERILY_al_2629_Treatment plant_post grit removal" "CDC_VERILY_al_2629_Treatment plant_post grit removal" "CDC_VERILY_al_2629_Treatment plant_post grit removal" "CDC_VERILY_al_2629_Treatment plant_post grit removal" ...
##  $ date           : chr  "2024-07-11" "2024-07-09" "2024-07-02" "2024-06-27" ...
##  $ pcr_conc_lin   : chr  "53341336.07614875" "29775880.158212245" "26287234.478798874" "24661042.67835701" ...
##  $ normalization  : chr  "flow-population" "flow-population" "flow-population" "flow-population" ...
##  $ date_downloaded: Date, format: "2024-07-19" "2024-07-19" ...

Crossectional data data

dim(counties)
## [1] 825148     16
names(counties)
##  [1] "wwtp_jurisdiction"       "wwtp_id"                
##  [3] "reporting_jurisdiction"  "sample_location"        
##  [5] "sample_location_specify" "key_plot_id"            
##  [7] "county_names"            "county_fips"            
##  [9] "population_served"       "date_start"             
## [11] "date_end"                "ptc_15d"                
## [13] "detect_prop_15d"         "percentile"             
## [15] "sampling_prior"          "first_sample_date"