#!/bin/sh # Copyright 2013 MediaMobil Communication GmbH # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # This script converts a binary .db file into a .csv file. # The .db file was generated by darkstat with the --export option. # The .csv file shall be read by any spreadsheet application. SCRIPTNAME=$( basename $0) if test -z "$( type -P awk )" ; then echo "${SCRIPTNAME}: missing AWK interpreter, at least not found in PATH" echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH" exit 1 fi if test -z "$( type -P od )" ; then echo "${SCRIPTNAME}: missing od file dump tool, at least not found in PATH" echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH" exit 1 fi if test $# -ne 1; then echo "${SCRIPTNAME}: missing parameter; need file name of .db file" exit 1 fi DBFILENAME=$1 if test -r ${DBFILENAME}; then echo ${SCRIPTNAME}: Found file ${DBFILENAME} else echo ${SCRIPTNAME}: file ${DBFILENAME} does not exist exit 1 fi CSVFILENAME=${DBFILENAME%%.*}.csv echo ${SCRIPTNAME}: Writing output into ${CSVFILENAME} # The spec of the .db export format exists for different versions: # http://unix4lyfe.org/gitweb/darkstat/blob/0a152e51f5d9c1771308caa7135d363a722aee18:/export-format.txt # http://git.msquadrat.de/darkstat.git/blob_plain/master:/export-format.txt # http://phil.lavin.me.uk/downloads/parse.phps # Only file format version 1 is supported by us. # Obviously, darkstat itself distinguishes 3 different host format versions. # Only host format version 2 is supported by us. # The darkstat database file is converted from binary format # to ASCII by the standard Unix command od. # Some things don't work correctly yet. # Probably because there is no DNS server configured in our embedded device # that produces .db files within OpenWRT. # - host name contains nonsense at constant length 5 # - "last seen" timing information contains always 0 # - we read the graphics section of the file but ignore it # Let the od tool convert each binary byte into several textual formats. # The AWK script reads all variants and later picks the format it needs. od -Ad -v -tx1 -tu1 -ta -w1 < ${DBFILENAME} | awk ' NF==2 { addr = 0 + $1; hex[addr] = $2; next } NF==1 && addr in dec { ascii[addr]=$1; next } NF==1 && ! (addr in dec) { dec[addr]=$1; next } # Now all variants of the bytes are available in certain arrays. # The array indices cover the range 0 .. addr. function read_bytes(array, address, count, retval, c) { retval="" for (c=0; c 0) print reason if (terminate != 0) { # Any remaining bytes in the file shall be dumped. for (i=ai; i<=addr; i++) print i, hex[i], ascii[i] exit(retval) } } function readIPsection() { ip_protos_data=read_bytes(ascii, ai, 1) if (ip_protos_data != "P") quit("expected ip_protos_data P, found " ip_protos_data, 1, 1) ai += 1 ip_proto_count=read_number(ai, 1) ai += 1 for (pi=0; pi 1) { last_seen=read_number(ai, 4) # This value is always 0 in our files. ai += 4 } mac_address=hex[ai+0] ":" hex[ai+1] ":" hex[ai+2] ":" hex[ai+3] ":" hex[ai+4] ":" hex[ai+5] ai += 6 # Weird stuff: the host name should be read. # But there are only 5 bytes of nonsense. # The first byte should be the length counter, but it isnt. # The last byte is in fact a 0 byte. # Probably caused by the missing DNS server. # ignore 5 bytes with nonsense nonsense=read_text(ai, 5) ai += 5 host_bytes_in=read_number(ai, 8) ai += 8 host_bytes_out=read_number(ai, 8) ai += 8 readIPsection() readTCPsection() readUDPsection() } else { quit("host format supported only in version 02: " host_version, 1, 1) #address_familiy=read_bytes(hex, ai, 1) #print "address familiy = " address_familiy } printf("\"%s\";\"%s\";%d;%d;%s;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%s;%s\n", ip_address, mac_address, host_bytes_in, host_bytes_out, IPprotos, ip_proto_in, ip_proto_out, tcp_proto_count, tcp_proto_in, tcp_proto_out, udp_proto_count, udp_proto_in, udp_proto_out, ssh_in, ssh_out, rdp_in, rdp_out, TCPports, UDPports) } section_header=read_bytes(hex, ai, 3) if (section_header != "da4752") quit("section header da4752 expected: " section_header, 1, 1) ai += 3 db_version=read_bytes(hex, ai, 1) if (db_version != "01") quit("file format supported only in version 01", 1, 1) ai += 1 last_time=read_number(ai, 8) ai += 8 readGraphsection("60 seconds") readGraphsection("60 minutes") readGraphsection("24 hours") readGraphsection("31 days") # The complete file has been parsed, no bytes should be left over. # Terminate with return value 0 if the byte numbers match. quit("", (addr != ai+1) ?0:1, addr != ai+1) } ' > ${CSVFILENAME}