--- /dev/null
+#!/bin/sh
+
+# Copyright 2013 MediaMobil Communication GmbH
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# This script converts a binary .db file into a .csv file.
+# The .db file was generated by darkstat with the --export option.
+# The .csv file shall be read by any spreadsheet application.
+SCRIPTNAME=$( basename $0)
+if test -z "$( type -P awk )" ; then
+ echo "${SCRIPTNAME}: missing AWK interpreter, at least not found in PATH"
+ echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH"
+ exit 1
+fi
+if test -z "$( type -P od )" ; then
+ echo "${SCRIPTNAME}: missing od file dump tool, at least not found in PATH"
+ echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH"
+ exit 1
+fi
+if test $# -ne 1; then
+ echo "${SCRIPTNAME}: missing parameter; need file name of .db file"
+ exit 1
+fi
+DBFILENAME=$1
+if test -r ${DBFILENAME}; then
+ echo ${SCRIPTNAME}: Found file ${DBFILENAME}
+else
+ echo ${SCRIPTNAME}: file ${DBFILENAME} does not exist
+ exit 1
+fi
+CSVFILENAME=${DBFILENAME%%.*}.csv
+echo ${SCRIPTNAME}: Writing output into ${CSVFILENAME}
+
+# The spec of the .db export format exists for different versions:
+# http://unix4lyfe.org/gitweb/darkstat/blob/0a152e51f5d9c1771308caa7135d363a722aee18:/export-format.txt
+# http://git.msquadrat.de/darkstat.git/blob_plain/master:/export-format.txt
+# http://phil.lavin.me.uk/downloads/parse.phps
+# Only file format version 1 is supported by us.
+# Obviously, darkstat itself distinguishes 3 different host format versions.
+# Only host format version 2 is supported by us.
+# The darkstat database file is converted from binary format
+# to ASCII by the standard Unix command od.
+
+# Some things don't work correctly yet.
+# Probably because there is no DNS server configured in our embedded device
+# that produces .db files within OpenWRT.
+# - host name contains nonsense at constant length 5
+# - "last seen" timing information contains always 0
+# - we read the graphics section of the file but ignore it
+
+# Let the od tool convert each binary byte into several textual formats.
+# The AWK script reads all variants and later picks the format it needs.
+od -Ad -v -tx1 -tu1 -ta -w1 < ${DBFILENAME} |
+awk '
+ NF==2 { addr = 0 + $1; hex[addr] = $2; next }
+ NF==1 && addr in dec { ascii[addr]=$1; next }
+ NF==1 && ! (addr in dec) { dec[addr]=$1; next }
+ # Now all variants of the bytes are available in certain arrays.
+ # The array indices cover the range 0 .. addr.
+
+ function read_bytes(array, address, count, retval, c) {
+ retval=""
+ for (c=0; c<count; c++)
+ retval = retval array[address+c]
+ return retval
+ }
+ function read_number(address, count, retval, c) {
+ retval=0
+ for (c=0; c<count; c++)
+ retval = retval*256 + dec[address+c]
+ return retval
+ }
+ function read_text(address, count, retval, c) {
+ retval=""
+ for (c=0; c<count; c++)
+ retval = retval ascii[address+c]
+ return retval
+ }
+ function quit(reason, terminate, retval) {
+ if (length(reason) > 0)
+ print reason
+ if (terminate != 0) {
+ # Any remaining bytes in the file shall be dumped.
+ for (i=ai; i<=addr; i++)
+ print i, hex[i], ascii[i]
+ exit(retval)
+ }
+ }
+ function readIPsection() {
+ ip_protos_data=read_bytes(ascii, ai, 1)
+ if (ip_protos_data != "P")
+ quit("expected ip_protos_data P, found " ip_protos_data, 1, 1)
+ ai += 1
+ ip_proto_count=read_number(ai, 1)
+ ai += 1
+ for (pi=0; pi<ip_proto_count; pi++) {
+ ip_proto_type=read_number(ai, 1)
+ ai += 1
+ IPprotos = IPprotos " " ip_proto_type
+ ip_proto_in += read_number(ai, 8)
+ ai += 8
+ ip_proto_out += read_number(ai, 8)
+ ai += 8
+ }
+ }
+ function readTCPsection() {
+ tcp_protos_data=read_bytes(ascii, ai, 1)
+ if (tcp_protos_data != "T")
+ quit("expected tcp_protos_data T, found " tcp_protos_data, 1, 1)
+ ai += 1
+ tcp_proto_count=read_number(ai, 2)
+ ai += 2
+ for (ti=0; ti<tcp_proto_count; ti++) {
+ tcp_proto_port=read_number(ai, 2)
+ ai += 2
+ TCPports = TCPports " " tcp_proto_port
+ tcp_proto_syn=read_number(ai, 8)
+ ai += 8
+ tcp_proto_in += read_number(ai, 8)
+ ai += 8
+ tcp_proto_out += read_number(ai, 8)
+ ai += 8
+ if (tcp_proto_port == 22) {
+ ssh_in += tcp_proto_in
+ ssh_out += tcp_proto_out
+ }
+ if (tcp_proto_port == 3389) {
+ rdp_in += tcp_proto_in
+ rdp_out += tcp_proto_out
+ }
+ }
+ }
+ function readUDPsection() {
+ udp_protos_data=read_bytes(ascii, ai, 1)
+ if (udp_protos_data != "U")
+ quit("expected udp_protos_data U, found " udp_protos_data, 1, 1)
+ ai += 1
+ udp_proto_count=read_number(ai, 2)
+ ai += 2
+ for (ui=0; ui<udp_proto_count; ui++) {
+ udp_proto_port=read_number(ai, 2)
+ ai += 2
+ UDPports = UDPports " " udp_proto_port
+ udp_proto_in += read_number(ai, 8)
+ ai += 8
+ udp_proto_out += read_number(ai, 8)
+ ai += 8
+ if (udp_proto_port == 22) {
+ ssh_in += udp_proto_in
+ ssh_out += udp_proto_out
+ }
+ if (udp_proto_port == 3389) {
+ rdp_in += udp_proto_in
+ rdp_out += udp_proto_out
+ }
+ }
+ }
+ function readGraphsection(interval) {
+ n_bars=read_number(ai++, 1)
+ i_bars=read_number(ai++, 1)
+ for (bi=0; bi<n_bars; bi++) {
+ graph_bytes_in=read_number(ai, 8)
+ ai += 8
+ graph_bytes_out=read_number(ai, 8)
+ ai += 8
+ }
+ }
+
+ END {
+ file_header=read_bytes(hex, 0, 4)
+ if (file_header != "da314159")
+ quit("input data is not an exported darkstat .db file, wrong header: " file_header, 1, 1)
+ section_header=read_bytes(hex, 4, 3)
+ if (section_header != "da4853")
+ quit("section header da4853 expected: " section_header, 1, 1)
+ db_version=read_bytes(hex, 7, 1)
+ if (db_version != "01")
+ quit("file format supported only in version 01", 1, 1)
+ host_count=read_number(8, 4)
+ ai=12
+ # Print a header into the .csv file.
+ printf("IP address;MAC address;host in bytes;host out bytes;IP protos;IP in bytes;IP out bytes;TCP port count;TCP in bytes;TCP out bytes;UDP port count;UDP in bytes;UDP out bytes;ssh in bytes;ssh out bytes;rdp in bytes;rdp out bytes;TCP ports;UDP ports\n")
+ for (hi=1; hi<=host_count; hi++) {
+ # Make sure all variables to be printed are initially empty.
+ ip_address=mac_address=""
+ host_bytes_in=host_bytes_out=ip_proto_in=ip_proto_out=tcp_proto_in=tcp_proto_out=udp_proto_in=udp_proto_out=ssh_in=ssh_out=rdp_in=rdp_out=0
+ IPprotos=TCPports=UDPports=""
+ tcp_proto_count=udp_proto_count=0
+ host_header=read_bytes(hex, ai, 3)
+ host_version=read_bytes(hex, ai+3, 1)
+ ai += 4
+ if (host_version == "02") {
+ ip_address=read_number(ai+0,1) "." read_number(ai+1,1) "." read_number(ai+2,1) "." read_number(ai+3,1)
+ ai += 4
+ if ((host_version+0) > 1) {
+ last_seen=read_number(ai, 4)
+ # This value is always 0 in our files.
+ ai += 4
+ }
+ mac_address=hex[ai+0] ":" hex[ai+1] ":" hex[ai+2] ":" hex[ai+3] ":" hex[ai+4] ":" hex[ai+5]
+ ai += 6
+ # Weird stuff: the host name should be read.
+ # But there are only 5 bytes of nonsense.
+ # The first byte should be the length counter, but it isnt.
+ # The last byte is in fact a 0 byte.
+ # Probably caused by the missing DNS server.
+ # ignore 5 bytes with nonsense
+ nonsense=read_text(ai, 5)
+ ai += 5
+ host_bytes_in=read_number(ai, 8)
+ ai += 8
+ host_bytes_out=read_number(ai, 8)
+ ai += 8
+ readIPsection()
+ readTCPsection()
+ readUDPsection()
+ } else {
+ quit("host format supported only in version 02: " host_version, 1, 1)
+ #address_familiy=read_bytes(hex, ai, 1)
+ #print "address familiy = " address_familiy
+ }
+ printf("\"%s\";\"%s\";%d;%d;%s;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%s;%s\n",
+ ip_address, mac_address, host_bytes_in, host_bytes_out,
+ IPprotos, ip_proto_in, ip_proto_out,
+ tcp_proto_count, tcp_proto_in, tcp_proto_out,
+ udp_proto_count, udp_proto_in, udp_proto_out,
+ ssh_in, ssh_out, rdp_in, rdp_out,
+ TCPports, UDPports)
+ }
+ section_header=read_bytes(hex, ai, 3)
+ if (section_header != "da4752")
+ quit("section header da4752 expected: " section_header, 1, 1)
+ ai += 3
+ db_version=read_bytes(hex, ai, 1)
+ if (db_version != "01")
+ quit("file format supported only in version 01", 1, 1)
+ ai += 1
+ last_time=read_number(ai, 8)
+ ai += 8
+ readGraphsection("60 seconds")
+ readGraphsection("60 minutes")
+ readGraphsection("24 hours")
+ readGraphsection("31 days")
+ # The complete file has been parsed, no bytes should be left over.
+ # Terminate with return value 0 if the byte numbers match.
+ quit("", (addr != ai+1) ?0:1, addr != ai+1)
+ }
+ ' > ${CSVFILENAME}