This document is for exploratory analysis of the two datasets.
list.files(here::here("source"))
## [1] "01_data_download.R" "02_data_cleaning.R"
## [3] "03_data_analysis.R" "04_data_visualization.R"
source(here::here("source", "01_data_download.R"))
dim(covid)
## [1] 206129 5
names(covid)
## [1] "key_plot_id" "date" "pcr_conc_lin" "normalization"
## [5] "date_downloaded"
range(covid$pcr_conc_lin)
## [1] "0.0000010050251256281415" "9999900.304329794"
str(covid)
## 'data.frame': 206129 obs. of 5 variables:
## $ key_plot_id : chr "CDC_VERILY_al_2629_Treatment plant_post grit removal" "CDC_VERILY_al_2629_Treatment plant_post grit removal" "CDC_VERILY_al_2629_Treatment plant_post grit removal" "CDC_VERILY_al_2629_Treatment plant_post grit removal" ...
## $ date : chr "2024-07-11" "2024-07-09" "2024-07-02" "2024-06-27" ...
## $ pcr_conc_lin : chr "53341336.07614875" "29775880.158212245" "26287234.478798874" "24661042.67835701" ...
## $ normalization : chr "flow-population" "flow-population" "flow-population" "flow-population" ...
## $ date_downloaded: Date, format: "2024-07-19" "2024-07-19" ...
dim(counties)
## [1] 825148 16
names(counties)
## [1] "wwtp_jurisdiction" "wwtp_id"
## [3] "reporting_jurisdiction" "sample_location"
## [5] "sample_location_specify" "key_plot_id"
## [7] "county_names" "county_fips"
## [9] "population_served" "date_start"
## [11] "date_end" "ptc_15d"
## [13] "detect_prop_15d" "percentile"
## [15] "sampling_prior" "first_sample_date"