Ozone Data for Illinois by County (2014)

Author: Brad Cable

Date: 2016-01-26

Illinois State University (IT 497 - Spring 2016)

Data Processing

Load Ozone Data from EPA Website

tempOzone <- tempfile()
download.file(
    "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/hourly_44201_2014.zip",
    tempOzone
)
ozone <- read.csv(unz(tempOzone, filename="hourly_44201_2014.csv"))
unlink(tempOzone)
tempOzone <- tempfile()
download.file(
    "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/hourly_44201_2015.zip",
    tempOzone
)
ozone2015 <- read.csv(unz(tempOzone, filename="hourly_44201_2015.csv"))
unlink(tempOzone)

Aggregate Data by County and Get Top Results

ozoneIL <- ozone[ozone$State.Name == "Illinois",]
ozoneIL$Month <- as.POSIXlt(ozoneIL$Date.Local)$mon
ozone2015IL <- ozone2015[ozone2015$State.Name == "Illinois",]
ozone2015IL$Month <- as.POSIXlt(ozone2015IL$Date.Local)$mon
ozoneILcnty <- aggregate(
    Sample.Measurement ~ County.Name, data=ozoneIL, FUN=mean
)
ozoneILcnty <- ozoneILcnty[order(-ozoneILcnty$Sample.Measurement),]
ozoneILcntyHead5 <- head(ozoneILcnty, n=5)
ozoneILcntyHead5$County.Name <- factor(
    ozoneILcntyHead5$County.Name,
    levels=ozoneILcntyHead5$County.Name
)
ozoneILcntyHead10 <- head(ozoneILcnty, n=10)
ozoneILcntyHead10$County.Name <- factor(
    ozoneILcntyHead10$County.Name,
    levels=ozoneILcntyHead10$County.Name
)
ozoneILpreagg <- ozoneIL[ozoneIL$County.Name %in% ozoneILcntyHead5$County.Name,]
ozoneILpreagg$Hour <- as.POSIXlt(ozoneILpreagg$Time.Local, format="%H:%M")$hour
ozoneILpreagg$Day <- as.POSIXlt(ozoneILpreagg$Date.Local)$mday

Generate a Generic Function for Aggregating Multiple Ways

monVect <- c(
    "January", "February", "March", "April", "May",
    "June", "July", "August", "September", "October",
    "November", "December"
)
aggByMonth <- function(ozoneAgg, FUN){
    ozoneAgg <- aggregate(
        Sample.Measurement ~ County.Name + Month,
        data=ozoneAgg, FUN=FUN
    )

    ozoneAgg <- ozoneAgg[
        order(ozoneAgg$County.Name, ozoneAgg$Month),
    ]

    ozoneAgg$Month <- months(as.POSIXlt(paste(
        rep("1970", nrow(ozoneAgg)),
        ozoneAgg$Month+1,
        rep("01", nrow(ozoneAgg))
    , sep="-")))

    ozoneAgg$Month <- factor(ozoneAgg$Month, levels=monVect)
    return(ozoneAgg)
}

aggByHour <- function(ozoneAgg, FUN){
    ozoneAgg <- aggregate(
        Sample.Measurement ~ County.Name + Hour,
        data=ozoneAgg, FUN=FUN
    )

    ozoneAgg <- ozoneAgg[
        order(ozoneAgg$County.Name, ozoneAgg$Hour),
    ]

    ozoneAgg$Hour <- factor(ozoneAgg$Hour, levels=0:23)
    return(ozoneAgg)
}

aggByDay <- function(ozoneAgg, FUN){
    ozoneAgg <- aggregate(
        Sample.Measurement ~ County.Name + Day,
        data=ozoneAgg, FUN=FUN
    )

    ozoneAgg <- ozoneAgg[
        order(ozoneAgg$County.Name, ozoneAgg$Day),
    ]

    ozoneAgg$Day <- factor(ozoneAgg$Day, levels=1:31)
    return(ozoneAgg)
}

aggLength <- function(ozoneAgg){
    ozoneAgg <- aggByMonth(ozoneAgg, length)
    for(cnty in levels(factor(ozoneAgg$County.Name))){
        for(mon in monVect){
            if(nrow(ozoneAgg[
                ozoneAgg$Month == mon &
                ozoneAgg$County.Name == cnty,
            ]) == 0){
                ozoneAgg <- rbind(ozoneAgg, c(cnty, mon, 0))
            }
        }
    }
    ozoneAgg <- ozoneAgg[order(
        ozoneAgg$County.Name, ozoneAgg$Month
    ),]
    ozoneAgg$Sample.Measurement <- as.numeric(
        ozoneAgg$Sample.Measurement
    )
    ozoneAgg
}

Create Aggregate Data

ozoneILaggMonthMean <- aggByMonth(ozoneILpreagg, mean)
ozoneILaggHourMean <- aggByHour(ozoneILpreagg, mean)
ozoneILaggDayMean <- aggByDay(ozoneILpreagg, mean)
ozoneILaggLength <- aggLength(ozoneIL)
ozone2015ILaggLength <- aggLength(ozone2015IL)

Results

Top Five Polluting Counties in Illinois

ozoneILcntyHead5
##    County.Name Sample.Measurement
## 18    Randolph         0.03418317
## 9   Jo Daviess         0.03231734
## 1        Adams         0.03179620
## 8       Jersey         0.03114063
## 7     Hamilton         0.03070671
library(ggplot2)
g <- ggplot(ozoneILcntyHead5, aes(x=County.Name, y=Sample.Measurement))
g <- g + geom_bar(stat="identity")
g + xlab("County Name") + ylab("Ozone Emissions (PPM)")

plot of chunk unnamed-chunk-14

County Data Hourly Averages by Month (Bars)

g <- ggplot(ozoneILaggMonthMean, aes(x=Month, y=Sample.Measurement))
g <- g + geom_bar(stat="identity") + facet_grid(County.Name ~ .)
g + xlab("Month") + ylab("Ozone Emissions (PPM)")

plot of chunk unnamed-chunk-15

County Data Hourly Averages by Month (Lines)

g <- ggplot(ozoneILaggMonthMean, aes(
    x=Month, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Month") + ylab("Ozone Emissions (PPM)")

plot of chunk unnamed-chunk-16

County Data Averages by Hour

Clearly more pollution during normal human awake hours.

g <- ggplot(ozoneILaggHourMean, aes(
    x=Hour, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Hour") + ylab("Ozone Emissions (PPM)")

plot of chunk unnamed-chunk-17

County Data Averages by Day

g <- ggplot(ozoneILaggDayMean, aes(
    x=Day, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Day") + ylab("Ozone Emissions (PPM)")

plot of chunk unnamed-chunk-18

Number of Records by County per Month

Interesting to see just how many counties have less data in January through March and November/December in Illinois.

g <- ggplot(ozoneILaggLength, aes(
    x=Month, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Month") + ylab("Ozone Emissions (PPM)")

plot of chunk unnamed-chunk-19

Number of Records by County per Month (2015)

Looking at the 2015 data, we see a similar pattern for January through March, and the drop in November/December can be explained by the data not being collected yet for this particular year.

g <- ggplot(ozone2015ILaggLength, aes(
    x=Month, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Month") + ylab("Ozone Emissions (PPM)")

plot of chunk number_2015