Visualizing the Ohio COVID-19 data

James Bland true
2020-05-19

While I have been waiting for my Stan programs to run, I have been playing around with Ohio’s COVID-19 datset. So here are a few plots that I have made while learning a bit more R and dplyr.


rm(list = ls())
library(tidyr)
library(dplyr)
library(ggplot2)
library(gganimate)
library(lubridate)
library("survminer")
require(survival)
library(xtable)
library(scales)
set.seed(42)

# Source: https://worldpopulationreview.com/us-counties/oh/
pop<-read.csv("CountyPop.csv")

pop$CTYNAME<-gsub(" .*","",pop$CTYNAME)

colnames(pop)<-c("County","Population","GrowthRate")



D<-data.frame(read.csv('https://coronavirus.ohio.gov/static/COVIDSummaryData.csv'))

colnames(D)[1]<-c("County")

D<-merge(D,pop,by=c("County"))


# convert dates to days since 2020-01-01
D$Onset.Date<-yday(as.Date(D$Onset.Date, format="%m/%d/%Y"))-1
D$Admission.Date<-yday(as.Date(D$Admission.Date, format="%m/%d/%Y"))-1
D$Date.Of.Death<-yday(as.Date(D$Date.Of.Death, format="%m/%d/%Y"))-1
colnames(D)[3]<-"Age"

knitr::kable(head(D))
County Sex Age Onset.Date Date.Of.Death Admission.Date Case.Count Death.Count Hospitalized.Count Population GrowthRate
Adams Female 60-69 78 NA NA 1 0 0 27724 -2.9068
Adams Male 40-49 88 NA NA 1 0 0 27724 -2.9068
Adams Female 0-19 118 NA NA 1 0 0 27724 -2.9068
Adams Male 80+ 128 137 132 1 1 1 27724 -2.9068
Adams Male 20-29 131 NA NA 1 0 0 27724 -2.9068
Adams Male 40-49 105 NA NA 1 0 0 27724 -2.9068

Policies

Here I compile a (very incomplete) list of policies, directives, orders, and so on of things happening at the state and federal level, mostly based on the Stay Safe Ohio Order.


# Stay Safe Order Ohio
#https://coronavirus.ohio.gov/static/publicorders/Directors-Stay-Safe-Ohio-Order.pdf
Dates<-rbind(c("03/16/2020","President Trump's Coronavirus guidelines issued","Guidelines","Trump","closure"),
             c("03/15/2020","Ohio Department of Health: limits access to jails, limits sale of food to carry-out and delivery only","Jails,  Carryout","State","closure"),
             c("03/15/2020","CDC issues Interim Guidance for mass gatherings (>50 people)","Gatherings > 50","CDC","closure"),
             c("03/16/2020","Ohio department of health closes polling stations for March 17 primary election","Polls closed","State","closure"),
             c("03/17/2020","Ohio Department of Health limits mass gatherings","gatherings","State","closure"),
             c("03/19/2020","Ohio Department of Health closes hair salons","Salons","State","closure"),
             c("03/21/2020","Ohio Department of Health closes older adult day care and family entertainment centers","Adult day care","State","closure"),
             c("03/22/2020","Ohio Department of Health orders all persons to stay at home unless engaged in essential activity","Non essential stay at home","State","closure"),
             c("03/24/2020","Ohio Department of Health closes child care services","Child care","State","closure"),
             c("03/30/2020","Ohio Department of Health closes K-12 schools","K-12","State","closure"),
             
             c("03/09/2020","Governor declares state of emergency (Executive order 2020-01D)","State of emergency","State","closure"),
             c("04/30/2020","SSOO(9) Non-essential medical services restrictions rescinded","Non-essential medical","State","opening"),
             c("05/04/2020","SSOO(10-11) Non-essential manufacturing, general office environments may re-open","Non-essential manufacturing & office","State","opening"),
             c("05/01/2020","SSOO(12) Retail may re-open: Curbside pickup, delivery, and appointments only","Retail  curbside pickup","State","opening"),
             c("05/12/2020","SSOO(12) Retail may re-open","Retail in store","State","opening")
)
colnames(Dates)<-c("Date","Event","Short","Group","Type")

Dates<-data.frame(Dates)




Dates$Date.Int<-yday(as.Date(Dates$Date, format="%m/%d/%Y"))-1

#Dates<-Dates %>% arrange(Date.Int)
Dates$position<-((1:dim(Dates)[1])-floor((1:dim(Dates)[1])/4)*4)*(-1)^(1:dim(Dates)[1])/4
Dates$position<-runif(dim(Dates)[2])*(-1)^(1:dim(Dates)[1])
knitr::kable(Dates)
Date Event Short Group Type Date.Int position
03/16/2020 President Trump’s Coronavirus guidelines issued Guidelines Trump closure 75 -0.9148060
03/15/2020 Ohio Department of Health: limits access to jails, limits sale of food to carry-out and delivery only Jails, Carryout State closure 74 0.9370754
03/15/2020 CDC issues Interim Guidance for mass gatherings (>50 people) Gatherings > 50 CDC closure 74 -0.2861395
03/16/2020 Ohio department of health closes polling stations for March 17 primary election Polls closed State closure 75 0.8304476
03/17/2020 Ohio Department of Health limits mass gatherings gatherings State closure 76 -0.6417455
03/19/2020 Ohio Department of Health closes hair salons Salons State closure 78 0.5190959
03/21/2020 Ohio Department of Health closes older adult day care and family entertainment centers Adult day care State closure 80 -0.7365883
03/22/2020 Ohio Department of Health orders all persons to stay at home unless engaged in essential activity Non essential stay at home State closure 81 0.9148060
03/24/2020 Ohio Department of Health closes child care services Child care State closure 83 -0.9370754
03/30/2020 Ohio Department of Health closes K-12 schools K-12 State closure 89 0.2861395
03/09/2020 Governor declares state of emergency (Executive order 2020-01D) State of emergency State closure 68 -0.8304476
04/30/2020 SSOO(9) Non-essential medical services restrictions rescinded Non-essential medical State opening 120 0.6417455
05/04/2020 SSOO(10-11) Non-essential manufacturing, general office environments may re-open Non-essential manufacturing & office State opening 124 -0.5190959
05/01/2020 SSOO(12) Retail may re-open: Curbside pickup, delivery, and appointments only Retail curbside pickup State opening 121 0.7365883
05/12/2020 SSOO(12) Retail may re-open Retail in store State opening 132 -0.9148060

Event counts by day


## new cases
plt<-(
  ggplot()
  +theme_bw()
  +geom_histogram(data=D[D$Onset.Date>=50,],aes(x=Onset.Date),binwidth=1)
  +ylab("Daily cases")+xlab("Days since 2020-01-01")
  +geom_segment(data=Dates,aes(y=400*position,yend=0,x=Date.Int,xend=Date.Int,color=Type,linetype=Group))
  +geom_text(data=Dates,aes(y=400*position,x=Date.Int,label=Short,color=Type),size=3)
  #+coord_fixed(ratio = 1e-2)
)
plt


## Admitted to hospital
plt<-(
  ggplot(data=D,aes(x=Admission.Date))
  +theme_bw()
  +geom_histogram(binwidth=1)
  +geom_segment(data=Dates,aes(y=100*position,yend=0,x=Date.Int,xend=Date.Int,color=Type,linetype=Group))
  +geom_text(data=Dates,aes(y=100*position,x=Date.Int,label=Short,color=Type),size=3)
  +ylab("Hospital admissions")+xlab("Days since 2020-01-01")
)
plt


## Admitted to hospital
plt<-(
  ggplot(data=D,aes(x=Date.Of.Death))
  +theme_bw()
  +geom_histogram(binwidth=1)
  +ylab("Deaths")+xlab("Days since 2020-01-01")
  +geom_segment(data=Dates,aes(y=60*position,yend=0,x=Date.Int,xend=Date.Int,color=Type,linetype=Group))
  +geom_text(data=Dates,aes(y=60*position,x=Date.Int,label=Short,color=Type),size=3)
)
plt

Survival

Probably very pessimistic due to tests being conditioned on symptoms.


D$Died<-1+as.integer(!is.na(D$Date.Of.Death))
D$time<-max(D$Onset.Date)-D$Onset.Date

D<-D[order(D$Age),]

fit <- survfit(Surv(time, Died) ~ Age,data=D)


ggsurvplot(fit, data = D,conf.int = TRUE, ggtheme = theme_bw() ,
           xlab="Time since onset date (days)")

By county


Cases<-group_by(D, County,Onset.Date) %>% summarise(New.Cases= length(time))
colnames(Cases)[2]<-"Date"

Admissions<-group_by(D[!is.na(D$Admission.Date),], County,Admission.Date) %>% summarise(New.Admissions= length(time))
colnames(Admissions)[2]<-"Date"


Deaths<-group_by(D[!is.na(D$Date.Of.Death),], County,Date.Of.Death) %>% summarise(New.Deaths= length(time))
colnames(Deaths)[2]<-"Date"

COUNTIES<-merge(Cases,merge(Deaths,Admissions,by=c("County","Date"),all=TRUE),by=c("County","Date"),all=TRUE)

First.Case.Date<-aggregate(COUNTIES$Date,by=list(COUNTIES$County),FUN=min)
colnames(First.Case.Date)[1]<-"County"
DateList<-1:max(D$Onset.Date)
dl<-data.frame(DateList)
dl$Date<-dl$DateList

DL<-data.frame()
CList<-unique(COUNTIES$County)
for (cc in 1:length(CList)) {
  tmp<-dl
  tmp$County<-CList[cc]
  DL<-rbind(DL,tmp)
}

COUNTIES<-merge(COUNTIES,DL[,c(2,3)],by=c("County","Date"),all=TRUE)
COUNTIES<-COUNTIES[order(COUNTIES$County,COUNTIES$Date),]

# NA==> no death/admission/case on that day
COUNTIES[is.na(COUNTIES)]<-0

COUNTIES<-merge(COUNTIES,pop[,c("County","Population")],by="County")
COUNTIES<-COUNTIES %>%
          arrange(County,Date) %>%
          group_by(County) %>%
          mutate(Cumulative.Cases = cumsum(New.Cases)) %>%
          mutate(Cumulative.Deaths = cumsum(New.Deaths)) %>%
          mutate(Cumulative.Admissions = cumsum(New.Admissions)) %>%
          mutate(Cumulative.Cases.Frac = Cumulative.Cases/Population) %>%
          mutate(Cumulative.Deaths.Frac = Cumulative.Deaths/Population) %>%
          mutate(Cumulative.Admissions.Frac = Cumulative.Admissions/Population)

COUNTIES<-merge(COUNTIES,First.Case.Date,by="County")

COUNTIES$Time.Since.First.Case<-COUNTIES$Date-COUNTIES$x

(ggplot(COUNTIES[COUNTIES$Time.Since.First.Case>=0,],aes(x=Time.Since.First.Case,y=Cumulative.Cases.Frac*100000,group=County,color=Population))
      +geom_line()
      +theme_bw()
      +xlab("Days since first case")
      +ylab("Cases per 100,000")
      +scale_y_continuous(trans = "log10")
)


(ggplot(COUNTIES[COUNTIES$Time.Since.First.Case>=0,],aes(x=Time.Since.First.Case,y=Cumulative.Deaths.Frac*100000,group=County,color=Population))
      +geom_line()
      +theme_bw()
      +xlab("Days since first case")
      +ylab("Deaths per 100,000")
      +scale_y_continuous(trans = "log10")
)


plt<-(ggplot(COUNTIES[COUNTIES$Time.Since.First.Case>=0,],aes(y=Cumulative.Deaths.Frac*100000,x=Cumulative.Cases.Frac*100000,label=County,size=Population,color=Cumulative.Admissions.Frac*100000))
      +geom_point()
      +theme_bw()
      #+theme(legend.position = "none")
      +transition_time(Time.Since.First.Case)
      +labs(title= "COVID-19 cases by county in Ohio",subtitle = "Days since first case {round(frame_time, 0)}")
      +xlab("Cases per 100,000")
      +ylab("Deaths per 100,000")
      +scale_y_continuous(trans = "log10")
      +scale_x_continuous(trans = "log10")
      +labs(color = "Admissions per 100,000")
      +scale_fill_gradient(
        low = "blue",
        high = "red",
        space = "Lab",
        na.value = "grey50",
        guide = "colourbar",
        aesthetics = "color"
      )
)
animate(plt,nframes=max(COUNTIES$Time.Since.First.Case))


CountiesAggregate<- COUNTIES %>% group_by(County) %>%
            filter(Cumulative.Deaths.Frac == max(Cumulative.Deaths.Frac),
                   Cumulative.Cases.Frac == max(Cumulative.Cases.Frac),
                   Cumulative.Admissions.Frac == max(Cumulative.Admissions.Frac))

(ggplot(data=CountiesAggregate,aes(x=Cumulative.Cases.Frac*10^5,y=Cumulative.Deaths.Frac*10^5,label=County))
      +scale_y_continuous(trans = "log10")
      +scale_x_continuous(trans = "log10")
      +geom_text(size=2)
      +xlab("Cases per 100,000")
      +ylab("Deaths per 100,000")
      +theme_bw()
      +geom_smooth(method=lm,formula=y~x)#
      +labs(title = "COVID-19 cases and deaths by county in Ohio",subtitle = paste("Produced on",Sys.Date()))
  
)