DevOps IPTables Logs

Libraries

library(circlize)
library(ggplot2)
library(igraph)
library(rCharts)
library(stringr)

Theme Colors

thm_colors <- c(
    "#EA3C00", "#EAD900",
    "#00EAB4", "#9200EA",
    "#EA7C00", "#3BEA00",
    "#EA00C0", "#0009EA",
    "#A0A0A0"
)
darken <- function(color_arg){
    color_ret <- substr(color_arg, 2, nchar(color_arg))

    ind_adjust <- function(color_int_arg){
        int_col <- round(as.integer(
            paste0("0x", color_int_arg)
        ) * 0.5, 0)
        hex_col <- sprintf("%X", int_col)
        is_single <- nchar(hex_col) == 1
        hex_col[is_single] <- paste0(
            "0", hex_col[is_single]
        )
        hex_col
    }

    color_r <- ind_adjust(substr(color_ret, 1, 2))
    color_g <- ind_adjust(substr(color_ret, 3, 4))
    color_b <- ind_adjust(substr(color_ret, 5, 6))

    paste0("#", color_r, color_g, color_b)
}
thm_colors <- c(thm_colors, darken(thm_colors))
ggp_theme <- scale_fill_manual(values=thm_colors)

Load Data

ipt_full <- read.csv("iptables.csv", header=FALSE)
names(ipt_full) <- c(
    "filename", "date_raw", "host", "label", "iface.in", "iface.out", "phys.in",
    "phys.out", "mac", "src.ip", "dst.ip", "len.1", "tos", "prec", "ttl", "id.1",
    "df", "proto", "src.port", "dst.port", "window", "res", "pkt.synack", "pkt.psh",
    "urgp", "type", "code", "id.2", "seq", "len.2", "extra"
)
if(gsub("^.*/([^/]+)$", "\\1", getwd()) == "prod"){
    ipt <- ipt_full
} else {
    set.seed(43121)
    ipt <- ipt_full[sample(rownames(ipt_full), nrow(ipt_full)/100),]
}
subnets <- read.csv("subnet.csv")
dns <- read.csv("dns.csv")

Functions

is_broadcast <- function(ip){
    !is.na(str_locate(ip, "\\.255")[,1])
}
is_multicast <- function(proto){
    proto == 2
}
is_isu <- function(ip){
    substr(ip, 0, 7) == "138.87."
}
is_vpn <- function(ip){
    ip %in% c(
        "179.43.177.162", "81.17.16.170",
        "179.43.178.98", "179.43.148.34",
        "179.43.176.2", "179.43.174.34",
        "46.19.140.62", "46.19.141.158",
        "179.43.133.226", "179.43.148.66",
        "81.17.27.234", "179.43.174.130",
        "179.43.177.98"
    )
}
is_internal <- function(ip){
    !is.na(as.integer(str_locate(ip, "192\\.168\\.")[,1]))
}
is_external <- function(ip){
    !is_internal(ip) &
    !(tolower(substr(ip, 0, 1)) %in% letters)
}
subnet_octet_gen <- function(ip){
    gsub("^192\\.168\\.([0-9]+)\\.[0-9]+$", "\\1", ip)
}
ip2subnet <- function(ip, proto){
    subnet_octet <- gsub("^.*\\.(255)$", "\\1", ip)
    subnet_octet <- subnet_octet_gen(subnet_octet)
    subnet_octet[is_multicast(proto)] <- 224
    subnet_octet[is_isu(ip)] <- 256
    subnet_octet[is_vpn(ip)] <- 257
    subnet_octet[is_external(ip)] <- 300
    subnet_octet[!is.na(as.integer(str_locate(subnet_octet, "\\.")[,1]))] <- NA

    NAs <- is.na(subnet_octet)

    lookup_df <- data.frame(octet=subnet_octet[!NAs])
    lookup_subnets <- rbind(subnets, data.frame(
        name=c("multicast", "broadcast", "external_isu", "external_vpn", "external"),
        third_octet=c(224, 255, 256, 257, 300)
    ))
    subnet_octet[!NAs] <- as.character(merge(
        lookup_df, lookup_subnets, by.x="octet", by.y="third_octet", all.x=TRUE
    )$name)

    subnet_octet
}
hostparse <- function(ipt, dns, ip_field, hostname_field){
    ipt_ret <- merge(ipt, dns, by.x=ip_field, by.y="ip", all.x=TRUE)
    names(ipt_ret)[ncol(ipt_ret)] <- hostname_field
    ipt_ret[[hostname_field]] <- as.character(ipt_ret[[hostname_field]])
    NAs <- is.na(ipt_ret[[hostname_field]])

    ipt_ret[[hostname_field]][NAs] <- ipt_ret[[ip_field]][NAs]
    ipt_ret[[hostname_field]][is_multicast(ipt_ret[["proto"]])] <- "multicast"
    ipt_ret[[hostname_field]][is_broadcast(ipt_ret[[hostname_field]])] <- "broadcast"
    ipt_ret[[hostname_field]][is_isu(ipt_ret[[hostname_field]])] <- "external_isu"
    ipt_ret[[hostname_field]][is_vpn(ipt_ret[[hostname_field]])] <- "external_vpn"
    ipt_ret[[hostname_field]][is_external(ipt_ret[[hostname_field]])] <- "external"

    for(i in 1:nrow(subnets)){
        ipt_ret[[hostname_field]][!is.na(str_locate(
            ipt_ret[[hostname_field]],
            paste0("192\\.168\\.", subnets[i, "third_octet"], "\\.")
        )[,1])] <- paste0(subnets[i, "name"], "_dhcp")
    }

    octet_trans <- subnet_octet_gen(
        ipt_ret[is_internal(ipt_ret[[hostname_field]]),][[hostname_field]]
    )
    ipt_ret[is_internal(ipt_ret[[hostname_field]]),][[hostname_field]] <-
        paste0("unknown_", octet_trans)

    ipt_ret
}
sort_agg_int <- function(agg1, agg2, agg_name1, agg_name2){
    agg1 <- agg1[order(agg1$Group.1, agg1$Group.2),]
    agg2 <- agg2[order(agg2$Group.1, agg2$Group.2),]
    agg_ret <- agg1[order(agg1$Group.2, agg2$x),]
    agg_labels <- unique(agg2[order(agg2$x),]$Group.1)
    rownames(agg_ret) <- seq(1, nrow(agg_ret))
    names(agg_ret) <- c(agg_name1, agg_name2, "Count")
    agg_ret[[agg_name1]] <- factor(agg_ret[[agg_name1]], levels=agg_labels)
    agg_ret
}
sort_agg <- function(data, agg_field,
    field1, field2,
    agg_name1, agg_name2,
    agg_func_data, agg_func_sort=NA,
    filter=TRUE
){
    if(length(filter) == 1){
        num <- nrow(data[filter])
    } else {
        num <- sum(filter)
    }
    agg1 <- aggregate(
        rep(1, num),
        by=list(field1[filter], field2[filter]),
        FUN=agg_func_data
    )
    if(class(agg_func_sort) != "function"){
        names(agg1) <- c(agg_name1, agg_name2, "Count")
        agg1
    } else {
        agg2 <- aggregate(
            agg_field[filter],
            by=list(field1[filter], field2[filter]),
            FUN=agg_func_sort
        )
        sort_agg_int(agg1, agg2, agg_name1, agg_name2)
    }
}
chordCols <- function(src_data, lvls){
    cols <- thm_colors
    while(nrow(src_data) > length(cols)){
        cols <- c(cols, thm_colors)
    }
    col_ret <- cols[1:nrow(src_data)]
    names(col_ret) <- lvls
    col_ret
}
chordFunc <- function(src_data, lvls, ...){
    mat <- matrix(rep(0, length(lvls)**2), length(lvls))
    colnames(mat) <- lvls
    rownames(mat) <- lvls

    mat_col <- matrix(rep("#000000", length(lvls)**2), length(lvls))
    colnames(mat_col) <- lvls
    rownames(mat_col) <- lvls

    grid_col <- chordCols(src_data, lvls)

    for(i in 1:nrow(src_data)){
        mat[
            src_data[i,1],
            src_data[i,2]
        ] <- src_data[i,3]

        mat_col[
            src_data[i,1],
            src_data[i,2]
        ] <- grid_col[src_data[i,1]]
    }

    layout(matrix(c(1,2),1,2), widths=c(5,1))
    chordDiagram(mat,
        grid.col=grid_col,
        col=mat_col,
        directional=TRUE,
        annotationTrack="grid",
        diffHeight=0,
        link.border=TRUE,
        ...
    )
    plot.new()
    legend("left", legend=lvls, fill=grid_col, border=1)
}
ig <- function(data, layout_func){
    ig_ret <- NULL
    for(i in seq(1,nrow(data)*2)){
        if(i %% 2 == 0){
            ig_ret <- c(ig_ret,
                data[ceiling(i/2),2]
            )
        } else {
            ig_ret <- c(ig_ret,
                data[ceiling(i/2),1]
            )
        }
    }
    plot(
        make_graph(ig_ret, directed=TRUE),
        layout=layout_func
    )
}

Data Manipulation

nr_tf <- (
    substr(ipt$filename, 14, 15) %in% c("01","02","03") &
    substr(ipt$date_raw, 0, 3) %in% c("Oct", "Nov", "Dec")
)
ipt_nr <- ipt[nr_tf,]
ipt_not_nr <- ipt[!nr_tf,]
ipt_not_nr_date <- as.POSIXlt(paste0(
    ipt_not_nr$date_raw,
    substr(ipt_not_nr$filename, 10, 13)
), format="%b %e %H:%M:%S%Y")
ipt_nr_date <- as.POSIXlt(paste0(
    ipt_nr$date_raw,
    as.integer(substr(ipt_nr$filename, 10, 13))-1
), format="%b %e %H:%M:%S%Y")
ipt_merge <- nr_tf
ipt_merge[nr_tf] <- as.numeric(ipt_nr_date)
ipt_merge[!nr_tf] <- as.numeric(ipt_not_nr_date)
ipt$date <- as.POSIXlt(as.character(ipt_merge), format="%s")
ipt <- ipt[as.numeric(ipt$date) > 323762400,] # after 1980
ipt$agg_date <- strftime(ipt$date, format="%b %Y")
ipt <- ipt[order(ipt$date),]
ipt$src.ip <- as.character(ipt$src.ip)
ipt$dst.ip <- as.character(ipt$dst.ip)
dns$ip <- as.character(dns$ip)
ipt$src.subnet <- ip2subnet(ipt$src.ip, ipt$proto)
ipt$dst.subnet <- ip2subnet(ipt$dst.ip, ipt$proto)
ipt <- hostparse(ipt, dns, "src.ip", "src.hostname")
ipt <- hostparse(ipt, dns, "dst.ip", "dst.hostname")
ipt$iface.in[ipt$iface.in == "br1"] <- "kvm-br1"
ipt$iface.out[ipt$iface.out == "br1"] <- "kvm-br1"

Data Aggregation

proto_freq <- data.frame(table(ipt$proto))
names(proto_freq) <- c("Protocol", "Count")
proto_freq_agg_data <- sort_agg(ipt,
    as.numeric(ipt$date),
    ipt$agg_date, ipt$proto,
    "Date", "Protocol",
    length, max
)
iface_in_filter <- ipt$iface.in != ""
iface_in <- factor(ipt$iface.in[iface_in_filter])
iface_in_freq <- data.frame(table(iface_in))
names(iface_in_freq) <- c("Interface", "Count")
iface_in_agg_data <- sort_agg(ipt,
    as.numeric(ipt$date),
    ipt$agg_date, ipt$iface.in,
    "Date", "Interface",
    length, max,
    iface_in_filter
)
iface_out_filter <- ipt$iface.out != ""
iface_out <- factor(ipt$iface.out[iface_out_filter])
iface_out_freq <- data.frame(table(iface_out))
names(iface_out_freq) <- c("Interface", "Count")
iface_out_agg_data <- sort_agg(ipt,
    as.numeric(ipt$date),
    ipt$agg_date, ipt$iface.out,
    "Date", "Interface",
    length, max,
    iface_out_filter
)
subnet_src_freq_data <- data.frame(table(ipt$src.subnet))
names(subnet_src_freq_data) <- c("Subnet", "Count")
subnet_src_agg_data <- sort_agg(ipt,
    as.numeric(ipt$date),
    ipt$agg_date, ipt$src.subnet,
    "Date", "Subnet",
    length, max
)
subnet_src_host <- data.frame(table(ipt$src.hostname))
names(subnet_src_host) <- c("Hostname", "Subnet")
subnet_src_host <- sort_agg(ipt,
    ipt$src.hostname,
    ipt$src.hostname, ipt$src.subnet,
    "Hostname", "Subnet",
    length
)
subnet_dst_freq_data <- data.frame(table(ipt$dst.subnet))
names(subnet_dst_freq_data) <- c("Subnet", "Count")
subnet_dst_agg_data <- sort_agg(ipt,
    as.numeric(ipt$date),
    ipt$agg_date, ipt$dst.subnet,
    "Date", "Subnet",
    length, max
)
subnet_dst_host <- sort_agg(ipt,
    ipt$dst.hostname,
    ipt$dst.hostname, ipt$dst.subnet,
    "Hostname", "Subnet",
    length
)
subnet_vs_subnet <- sort_agg(ipt,
    ipt$src.subnet,
    ipt$src.subnet, ipt$dst.subnet,
    "Subnet.Source", "Subnet.Destination",
    length
)
host_vs_host <- sort_agg(ipt,
    ipt$src.hostname,
    ipt$src.hostname, ipt$dst.hostname,
    "Hostname.Source", "Hostname.Destination",
    length
)

Scale Adjust

proto_freq$Count <- proto_freq$Count/100000
proto_freq_agg_data$Count <- proto_freq_agg_data$Count/100000
iface_in_freq$Count <- iface_in_freq$Count/100000
iface_in_agg_data$Count <- iface_in_agg_data$Count/100000
iface_out_freq$Count <- iface_out_freq$Count/100000
iface_out_agg_data$Count <- iface_out_agg_data$Count/100000
subnet_src_freq_data$Count <- subnet_src_freq_data$Count/100000
subnet_src_agg_data$Count <- subnet_src_agg_data$Count/100000
subnet_src_host$Count <- subnet_src_host$Count/100000
subnet_dst_freq_data$Count <- subnet_dst_freq_data$Count/100000
subnet_dst_agg_data$Count <- subnet_dst_agg_data$Count/100000
subnet_dst_host$Count <- subnet_dst_host$Count/100000
subnet_vs_subnet$Count <- subnet_vs_subnet$Count/100000

Protocol Frequency

proto_freq
##   Protocol    Count
## 1        2  1.36855
## 2       AH  0.00107
## 3      ESP  0.00055
## 4     ICMP  0.59840
## 5      TCP  5.40229
## 6      UDP 21.34442
g <- ggplot(proto_freq, aes(x=Protocol, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g

plot of chunk proto_freq_ggplot

g <- ggplot(proto_freq_agg_data, aes(x=Date, y=Count, fill=Protocol))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme

plot of chunk proto_freq_date_ggplot

Interface IN Frequency

iface_in_freq
##   Interface   Count
## 1       em1 1.37820
## 2   kvm-br1 4.07101
## 3      p1p1 4.45534
## 4      p1p2 1.97376
## 5      p3p1 0.20819
## 6      p3p2 0.05222
## 7      p4p1 0.08303
g <- ggplot(iface_in_freq, aes(x=Interface, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g

plot of chunk iface_in_freq_ggplot

g <- ggplot(iface_in_agg_data, aes(x=Date, y=Count, fill=Interface))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme

plot of chunk iface_in_freq_date_ggplot

Interface OUT Frequency

iface_out_freq
##   Interface    Count
## 1       em1 12.50094
## 2   kvm-br1  4.07093
## 3      p1p1  0.00355
## 4      p1p2  0.00309
## 5      p3p1  0.00163
## 6      p3p2  0.00180
## 7      p4p1  0.39880
## 8    virbr0  0.00011
g <- ggplot(iface_out_freq, aes(x=Interface, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g

plot of chunk iface_out_freq_ggplot

g <- ggplot(iface_out_agg_data, aes(x=Date, y=Count, fill=Interface))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme

plot of chunk iface_out_freq_date_ggplot

Subnet Source Frequency

g <- ggplot(subnet_src_freq_data, aes(x=Subnet, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g

plot of chunk subnet_src_freq_data_ggplot

g <- ggplot(subnet_src_agg_data, aes(x=Date, y=Count, fill=Subnet))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme

plot of chunk subnet_src_freq_date_data_ggplot

g <- ggplot(subnet_src_host, aes(x=Subnet, y=Count, fill=Hostname))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g + ggp_theme
## Error: Insufficient values in manual scale. 19 needed but only 18 provided.

plot of chunk subnet_src_host_ggplot

Subnet Destination Frequency

g <- ggplot(subnet_dst_freq_data, aes(x=Subnet, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g

plot of chunk subnet_dst_freq_data_ggplot

g <- ggplot(subnet_dst_agg_data, aes(x=Date, y=Count, fill=Subnet))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme

plot of chunk subnet_dst_freq_date_data_ggplot

g <- ggplot(subnet_dst_host, aes(x=Subnet, y=Count, fill=Hostname))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g + ggp_theme
## Error: Insufficient values in manual scale. 43 needed but only 18 provided.

plot of chunk subnet_dst_host_ggplot

Subnet vs Subnet

g <- ggplot(subnet_vs_subnet, aes(x=Subnet.Source, y=Count, fill=Subnet.Destination))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g + ggp_theme

plot of chunk subnet_vs_subnet_ggplot

ig(subnet_vs_subnet[subnet_vs_subnet[1] != subnet_vs_subnet[2],], layout.gem)

plot of chunk igraph_subnets

sk_data_subnet <- data.frame(
    source=subnet_vs_subnet$Subnet.Source,
    target=subnet_vs_subnet$Subnet.Destination,
    value=subnet_vs_subnet$Count
)
sk_data_subnet$source <- paste0(as.character(sk_data_subnet$source), ".src")
sk_data_subnet$target <- paste0(as.character(sk_data_subnet$target), ".dst")
sk_data_subnet <- sk_data_subnet[sk_data_subnet$source != sk_data_subnet$target,]

sk_subnet <- rCharts$new()
sk_subnet$setLib("./rCharts_d3_sankey/")
sk_subnet$setTemplate(script="./rCharts_d3_sankey/layouts/chart.html")
sk_subnet$set(
    data=sk_data_subnet,
    nodeWidth=15,
    nodePadding=10,
    layout=31,
    width=700,
    height=1024
)
sk_subnet$print(chartId="sankey_subnet")
lvls_subnet <- unique(c(
    unique(subnet_vs_subnet$Subnet.Source),
    unique(subnet_vs_subnet$Subnet.Destination)
))

lvls_host <- unique(c(
    unique(host_vs_host$Hostname.Source),
    unique(host_vs_host$Hostname.Destination)
))
chordFunc(subnet_vs_subnet, lvls_subnet)

plot of chunk chord_diagram_01

chordFunc(subnet_vs_subnet[
    subnet_vs_subnet$Subnet.Source !=
    subnet_vs_subnet$Subnet.Destination,
], lvls_subnet)

plot of chunk chord_diagram_02

Hostname vs Hostname

ig(host_vs_host[host_vs_host[1] != host_vs_host[2],], layout.star)

plot of chunk igraph_hosts

ig(host_vs_host[host_vs_host[1] != host_vs_host[2],], layout.fruchterman.reingold)

plot of chunk igraph_hosts_02

sk_data_host <- data.frame(
    source=host_vs_host$Hostname.Source,
    target=host_vs_host$Hostname.Destination,
    value=host_vs_host$Count
)
sk_data_host$source <- paste0(as.character(sk_data_host$source), ".src")
sk_data_host$target <- paste0(as.character(sk_data_host$target), ".dst")
sk_data_host <- sk_data_host[sk_data_host$source != sk_data_host$target,]

sk_host <- rCharts$new()
sk_host$setLib("./rCharts_d3_sankey/")
sk_host$setTemplate(script="./rCharts_d3_sankey/layouts/chart.html")
sk_host$set(
    data=sk_data_host,
    nodeWidth=15,
    nodePadding=10,
    layout=31,
    width=700,
    height=1024
)
sk_host$print(chartId="sankey_host")
chordFunc(host_vs_host, lvls_host)

plot of chunk chord_diagram_03