library(circlize)
library(ggplot2)
library(igraph)
library(rCharts)
library(stringr)
thm_colors <- c(
"#EA3C00", "#EAD900",
"#00EAB4", "#9200EA",
"#EA7C00", "#3BEA00",
"#EA00C0", "#0009EA",
"#A0A0A0"
)
darken <- function(color_arg){
color_ret <- substr(color_arg, 2, nchar(color_arg))
ind_adjust <- function(color_int_arg){
int_col <- round(as.integer(
paste0("0x", color_int_arg)
) * 0.5, 0)
hex_col <- sprintf("%X", int_col)
is_single <- nchar(hex_col) == 1
hex_col[is_single] <- paste0(
"0", hex_col[is_single]
)
hex_col
}
color_r <- ind_adjust(substr(color_ret, 1, 2))
color_g <- ind_adjust(substr(color_ret, 3, 4))
color_b <- ind_adjust(substr(color_ret, 5, 6))
paste0("#", color_r, color_g, color_b)
}
thm_colors <- c(thm_colors, darken(thm_colors))
ggp_theme <- scale_fill_manual(values=thm_colors)
ipt_full <- read.csv("iptables.csv", header=FALSE)
names(ipt_full) <- c(
"filename", "date_raw", "host", "label", "iface.in", "iface.out", "phys.in",
"phys.out", "mac", "src.ip", "dst.ip", "len.1", "tos", "prec", "ttl", "id.1",
"df", "proto", "src.port", "dst.port", "window", "res", "pkt.synack", "pkt.psh",
"urgp", "type", "code", "id.2", "seq", "len.2", "extra"
)
if(gsub("^.*/([^/]+)$", "\\1", getwd()) == "prod"){
ipt <- ipt_full
} else {
set.seed(43121)
ipt <- ipt_full[sample(rownames(ipt_full), nrow(ipt_full)/100),]
}
subnets <- read.csv("subnet.csv")
dns <- read.csv("dns.csv")
is_broadcast <- function(ip){
!is.na(str_locate(ip, "\\.255")[,1])
}
is_multicast <- function(proto){
proto == 2
}
is_isu <- function(ip){
substr(ip, 0, 7) == "138.87."
}
is_vpn <- function(ip){
ip %in% c(
"179.43.177.162", "81.17.16.170",
"179.43.178.98", "179.43.148.34",
"179.43.176.2", "179.43.174.34",
"46.19.140.62", "46.19.141.158",
"179.43.133.226", "179.43.148.66",
"81.17.27.234", "179.43.174.130",
"179.43.177.98"
)
}
is_internal <- function(ip){
!is.na(as.integer(str_locate(ip, "192\\.168\\.")[,1]))
}
is_external <- function(ip){
!is_internal(ip) &
!(tolower(substr(ip, 0, 1)) %in% letters)
}
subnet_octet_gen <- function(ip){
gsub("^192\\.168\\.([0-9]+)\\.[0-9]+$", "\\1", ip)
}
ip2subnet <- function(ip, proto){
subnet_octet <- gsub("^.*\\.(255)$", "\\1", ip)
subnet_octet <- subnet_octet_gen(subnet_octet)
subnet_octet[is_multicast(proto)] <- 224
subnet_octet[is_isu(ip)] <- 256
subnet_octet[is_vpn(ip)] <- 257
subnet_octet[is_external(ip)] <- 300
subnet_octet[!is.na(as.integer(str_locate(subnet_octet, "\\.")[,1]))] <- NA
NAs <- is.na(subnet_octet)
lookup_df <- data.frame(octet=subnet_octet[!NAs])
lookup_subnets <- rbind(subnets, data.frame(
name=c("multicast", "broadcast", "external_isu", "external_vpn", "external"),
third_octet=c(224, 255, 256, 257, 300)
))
subnet_octet[!NAs] <- as.character(merge(
lookup_df, lookup_subnets, by.x="octet", by.y="third_octet", all.x=TRUE
)$name)
subnet_octet
}
hostparse <- function(ipt, dns, ip_field, hostname_field){
ipt_ret <- merge(ipt, dns, by.x=ip_field, by.y="ip", all.x=TRUE)
names(ipt_ret)[ncol(ipt_ret)] <- hostname_field
ipt_ret[[hostname_field]] <- as.character(ipt_ret[[hostname_field]])
NAs <- is.na(ipt_ret[[hostname_field]])
ipt_ret[[hostname_field]][NAs] <- ipt_ret[[ip_field]][NAs]
ipt_ret[[hostname_field]][is_multicast(ipt_ret[["proto"]])] <- "multicast"
ipt_ret[[hostname_field]][is_broadcast(ipt_ret[[hostname_field]])] <- "broadcast"
ipt_ret[[hostname_field]][is_isu(ipt_ret[[hostname_field]])] <- "external_isu"
ipt_ret[[hostname_field]][is_vpn(ipt_ret[[hostname_field]])] <- "external_vpn"
ipt_ret[[hostname_field]][is_external(ipt_ret[[hostname_field]])] <- "external"
for(i in 1:nrow(subnets)){
ipt_ret[[hostname_field]][!is.na(str_locate(
ipt_ret[[hostname_field]],
paste0("192\\.168\\.", subnets[i, "third_octet"], "\\.")
)[,1])] <- paste0(subnets[i, "name"], "_dhcp")
}
octet_trans <- subnet_octet_gen(
ipt_ret[is_internal(ipt_ret[[hostname_field]]),][[hostname_field]]
)
ipt_ret[is_internal(ipt_ret[[hostname_field]]),][[hostname_field]] <-
paste0("unknown_", octet_trans)
ipt_ret
}
sort_agg_int <- function(agg1, agg2, agg_name1, agg_name2){
agg1 <- agg1[order(agg1$Group.1, agg1$Group.2),]
agg2 <- agg2[order(agg2$Group.1, agg2$Group.2),]
agg_ret <- agg1[order(agg1$Group.2, agg2$x),]
agg_labels <- unique(agg2[order(agg2$x),]$Group.1)
rownames(agg_ret) <- seq(1, nrow(agg_ret))
names(agg_ret) <- c(agg_name1, agg_name2, "Count")
agg_ret[[agg_name1]] <- factor(agg_ret[[agg_name1]], levels=agg_labels)
agg_ret
}
sort_agg <- function(data, agg_field,
field1, field2,
agg_name1, agg_name2,
agg_func_data, agg_func_sort=NA,
filter=TRUE
){
if(length(filter) == 1){
num <- nrow(data[filter])
} else {
num <- sum(filter)
}
agg1 <- aggregate(
rep(1, num),
by=list(field1[filter], field2[filter]),
FUN=agg_func_data
)
if(class(agg_func_sort) != "function"){
names(agg1) <- c(agg_name1, agg_name2, "Count")
agg1
} else {
agg2 <- aggregate(
agg_field[filter],
by=list(field1[filter], field2[filter]),
FUN=agg_func_sort
)
sort_agg_int(agg1, agg2, agg_name1, agg_name2)
}
}
chordCols <- function(src_data, lvls){
cols <- thm_colors
while(nrow(src_data) > length(cols)){
cols <- c(cols, thm_colors)
}
col_ret <- cols[1:nrow(src_data)]
names(col_ret) <- lvls
col_ret
}
chordFunc <- function(src_data, lvls, ...){
mat <- matrix(rep(0, length(lvls)**2), length(lvls))
colnames(mat) <- lvls
rownames(mat) <- lvls
mat_col <- matrix(rep("#000000", length(lvls)**2), length(lvls))
colnames(mat_col) <- lvls
rownames(mat_col) <- lvls
grid_col <- chordCols(src_data, lvls)
for(i in 1:nrow(src_data)){
mat[
src_data[i,1],
src_data[i,2]
] <- src_data[i,3]
mat_col[
src_data[i,1],
src_data[i,2]
] <- grid_col[src_data[i,1]]
}
layout(matrix(c(1,2),1,2), widths=c(5,1))
chordDiagram(mat,
grid.col=grid_col,
col=mat_col,
directional=TRUE,
annotationTrack="grid",
diffHeight=0,
link.border=TRUE,
...
)
plot.new()
legend("left", legend=lvls, fill=grid_col, border=1)
}
ig <- function(data, layout_func){
ig_ret <- NULL
for(i in seq(1,nrow(data)*2)){
if(i %% 2 == 0){
ig_ret <- c(ig_ret,
data[ceiling(i/2),2]
)
} else {
ig_ret <- c(ig_ret,
data[ceiling(i/2),1]
)
}
}
plot(
make_graph(ig_ret, directed=TRUE),
layout=layout_func
)
}
nr_tf <- (
substr(ipt$filename, 14, 15) %in% c("01","02","03") &
substr(ipt$date_raw, 0, 3) %in% c("Oct", "Nov", "Dec")
)
ipt_nr <- ipt[nr_tf,]
ipt_not_nr <- ipt[!nr_tf,]
ipt_not_nr_date <- as.POSIXlt(paste0(
ipt_not_nr$date_raw,
substr(ipt_not_nr$filename, 10, 13)
), format="%b %e %H:%M:%S%Y")
ipt_nr_date <- as.POSIXlt(paste0(
ipt_nr$date_raw,
as.integer(substr(ipt_nr$filename, 10, 13))-1
), format="%b %e %H:%M:%S%Y")
ipt_merge <- nr_tf
ipt_merge[nr_tf] <- as.numeric(ipt_nr_date)
ipt_merge[!nr_tf] <- as.numeric(ipt_not_nr_date)
ipt$date <- as.POSIXlt(as.character(ipt_merge), format="%s")
ipt <- ipt[as.numeric(ipt$date) > 323762400,] # after 1980
ipt$agg_date <- strftime(ipt$date, format="%b %Y")
ipt <- ipt[order(ipt$date),]
ipt$src.ip <- as.character(ipt$src.ip)
ipt$dst.ip <- as.character(ipt$dst.ip)
dns$ip <- as.character(dns$ip)
ipt$src.subnet <- ip2subnet(ipt$src.ip, ipt$proto)
ipt$dst.subnet <- ip2subnet(ipt$dst.ip, ipt$proto)
ipt <- hostparse(ipt, dns, "src.ip", "src.hostname")
ipt <- hostparse(ipt, dns, "dst.ip", "dst.hostname")
ipt$iface.in[ipt$iface.in == "br1"] <- "kvm-br1"
ipt$iface.out[ipt$iface.out == "br1"] <- "kvm-br1"
proto_freq <- data.frame(table(ipt$proto))
names(proto_freq) <- c("Protocol", "Count")
proto_freq_agg_data <- sort_agg(ipt,
as.numeric(ipt$date),
ipt$agg_date, ipt$proto,
"Date", "Protocol",
length, max
)
iface_in_filter <- ipt$iface.in != ""
iface_in <- factor(ipt$iface.in[iface_in_filter])
iface_in_freq <- data.frame(table(iface_in))
names(iface_in_freq) <- c("Interface", "Count")
iface_in_agg_data <- sort_agg(ipt,
as.numeric(ipt$date),
ipt$agg_date, ipt$iface.in,
"Date", "Interface",
length, max,
iface_in_filter
)
iface_out_filter <- ipt$iface.out != ""
iface_out <- factor(ipt$iface.out[iface_out_filter])
iface_out_freq <- data.frame(table(iface_out))
names(iface_out_freq) <- c("Interface", "Count")
iface_out_agg_data <- sort_agg(ipt,
as.numeric(ipt$date),
ipt$agg_date, ipt$iface.out,
"Date", "Interface",
length, max,
iface_out_filter
)
subnet_src_freq_data <- data.frame(table(ipt$src.subnet))
names(subnet_src_freq_data) <- c("Subnet", "Count")
subnet_src_agg_data <- sort_agg(ipt,
as.numeric(ipt$date),
ipt$agg_date, ipt$src.subnet,
"Date", "Subnet",
length, max
)
subnet_src_host <- data.frame(table(ipt$src.hostname))
names(subnet_src_host) <- c("Hostname", "Subnet")
subnet_src_host <- sort_agg(ipt,
ipt$src.hostname,
ipt$src.hostname, ipt$src.subnet,
"Hostname", "Subnet",
length
)
subnet_dst_freq_data <- data.frame(table(ipt$dst.subnet))
names(subnet_dst_freq_data) <- c("Subnet", "Count")
subnet_dst_agg_data <- sort_agg(ipt,
as.numeric(ipt$date),
ipt$agg_date, ipt$dst.subnet,
"Date", "Subnet",
length, max
)
subnet_dst_host <- sort_agg(ipt,
ipt$dst.hostname,
ipt$dst.hostname, ipt$dst.subnet,
"Hostname", "Subnet",
length
)
subnet_vs_subnet <- sort_agg(ipt,
ipt$src.subnet,
ipt$src.subnet, ipt$dst.subnet,
"Subnet.Source", "Subnet.Destination",
length
)
host_vs_host <- sort_agg(ipt,
ipt$src.hostname,
ipt$src.hostname, ipt$dst.hostname,
"Hostname.Source", "Hostname.Destination",
length
)
proto_freq$Count <- proto_freq$Count/100000
proto_freq_agg_data$Count <- proto_freq_agg_data$Count/100000
iface_in_freq$Count <- iface_in_freq$Count/100000
iface_in_agg_data$Count <- iface_in_agg_data$Count/100000
iface_out_freq$Count <- iface_out_freq$Count/100000
iface_out_agg_data$Count <- iface_out_agg_data$Count/100000
subnet_src_freq_data$Count <- subnet_src_freq_data$Count/100000
subnet_src_agg_data$Count <- subnet_src_agg_data$Count/100000
subnet_src_host$Count <- subnet_src_host$Count/100000
subnet_dst_freq_data$Count <- subnet_dst_freq_data$Count/100000
subnet_dst_agg_data$Count <- subnet_dst_agg_data$Count/100000
subnet_dst_host$Count <- subnet_dst_host$Count/100000
subnet_vs_subnet$Count <- subnet_vs_subnet$Count/100000
proto_freq
## Protocol Count
## 1 2 1.36855
## 2 AH 0.00107
## 3 ESP 0.00055
## 4 ICMP 0.59840
## 5 TCP 5.40229
## 6 UDP 21.34442
g <- ggplot(proto_freq, aes(x=Protocol, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g
g <- ggplot(proto_freq_agg_data, aes(x=Date, y=Count, fill=Protocol))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme
iface_in_freq
## Interface Count
## 1 em1 1.37820
## 2 kvm-br1 4.07101
## 3 p1p1 4.45534
## 4 p1p2 1.97376
## 5 p3p1 0.20819
## 6 p3p2 0.05222
## 7 p4p1 0.08303
g <- ggplot(iface_in_freq, aes(x=Interface, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g
g <- ggplot(iface_in_agg_data, aes(x=Date, y=Count, fill=Interface))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme
iface_out_freq
## Interface Count
## 1 em1 12.50094
## 2 kvm-br1 4.07093
## 3 p1p1 0.00355
## 4 p1p2 0.00309
## 5 p3p1 0.00163
## 6 p3p2 0.00180
## 7 p4p1 0.39880
## 8 virbr0 0.00011
g <- ggplot(iface_out_freq, aes(x=Interface, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g
g <- ggplot(iface_out_agg_data, aes(x=Date, y=Count, fill=Interface))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme
g <- ggplot(subnet_src_freq_data, aes(x=Subnet, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g
g <- ggplot(subnet_src_agg_data, aes(x=Date, y=Count, fill=Subnet))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme
g <- ggplot(subnet_src_host, aes(x=Subnet, y=Count, fill=Hostname))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g + ggp_theme
## Error: Insufficient values in manual scale. 19 needed but only 18 provided.
g <- ggplot(subnet_dst_freq_data, aes(x=Subnet, y=Count))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g
g <- ggplot(subnet_dst_agg_data, aes(x=Date, y=Count, fill=Subnet))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)", x="")
g + ggp_theme
g <- ggplot(subnet_dst_host, aes(x=Subnet, y=Count, fill=Hostname))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g + ggp_theme
## Error: Insufficient values in manual scale. 43 needed but only 18 provided.
g <- ggplot(subnet_vs_subnet, aes(x=Subnet.Source, y=Count, fill=Subnet.Destination))
g <- g + geom_bar(stat="identity")
g <- g + labs(y="Count (hundreds of thousands)")
g + ggp_theme
ig(subnet_vs_subnet[subnet_vs_subnet[1] != subnet_vs_subnet[2],], layout.gem)
sk_data_subnet <- data.frame(
source=subnet_vs_subnet$Subnet.Source,
target=subnet_vs_subnet$Subnet.Destination,
value=subnet_vs_subnet$Count
)
sk_data_subnet$source <- paste0(as.character(sk_data_subnet$source), ".src")
sk_data_subnet$target <- paste0(as.character(sk_data_subnet$target), ".dst")
sk_data_subnet <- sk_data_subnet[sk_data_subnet$source != sk_data_subnet$target,]
sk_subnet <- rCharts$new()
sk_subnet$setLib("./rCharts_d3_sankey/")
sk_subnet$setTemplate(script="./rCharts_d3_sankey/layouts/chart.html")
sk_subnet$set(
data=sk_data_subnet,
nodeWidth=15,
nodePadding=10,
layout=31,
width=700,
height=1024
)
sk_subnet$print(chartId="sankey_subnet")
lvls_subnet <- unique(c(
unique(subnet_vs_subnet$Subnet.Source),
unique(subnet_vs_subnet$Subnet.Destination)
))
lvls_host <- unique(c(
unique(host_vs_host$Hostname.Source),
unique(host_vs_host$Hostname.Destination)
))
chordFunc(subnet_vs_subnet, lvls_subnet)
chordFunc(subnet_vs_subnet[
subnet_vs_subnet$Subnet.Source !=
subnet_vs_subnet$Subnet.Destination,
], lvls_subnet)
ig(host_vs_host[host_vs_host[1] != host_vs_host[2],], layout.star)
ig(host_vs_host[host_vs_host[1] != host_vs_host[2],], layout.fruchterman.reingold)
sk_data_host <- data.frame(
source=host_vs_host$Hostname.Source,
target=host_vs_host$Hostname.Destination,
value=host_vs_host$Count
)
sk_data_host$source <- paste0(as.character(sk_data_host$source), ".src")
sk_data_host$target <- paste0(as.character(sk_data_host$target), ".dst")
sk_data_host <- sk_data_host[sk_data_host$source != sk_data_host$target,]
sk_host <- rCharts$new()
sk_host$setLib("./rCharts_d3_sankey/")
sk_host$setTemplate(script="./rCharts_d3_sankey/layouts/chart.html")
sk_host$set(
data=sk_data_host,
nodeWidth=15,
nodePadding=10,
layout=31,
width=700,
height=1024
)
sk_host$print(chartId="sankey_host")
chordFunc(host_vs_host, lvls_host)