X-Git-Url: https://unix4lyfe.org/gitweb/darkstat-debian/blobdiff_plain/85d7d6e7b81dff84df8ddc87815ce77b8d82bc07..3c6b3682c70be84db86e70c191016913a9836c31:/contrib/darkstat_export diff --git a/contrib/darkstat_export b/contrib/darkstat_export new file mode 100644 index 0000000..f0dacec --- /dev/null +++ b/contrib/darkstat_export @@ -0,0 +1,257 @@ +#!/bin/sh + +# Copyright 2013 MediaMobil Communication GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# This script converts a binary .db file into a .csv file. +# The .db file was generated by darkstat with the --export option. +# The .csv file shall be read by any spreadsheet application. +SCRIPTNAME=$( basename $0) +if test -z "$( type -P awk )" ; then + echo "${SCRIPTNAME}: missing AWK interpreter, at least not found in PATH" + echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH" + exit 1 +fi +if test -z "$( type -P od )" ; then + echo "${SCRIPTNAME}: missing od file dump tool, at least not found in PATH" + echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH" + exit 1 +fi +if test $# -ne 1; then + echo "${SCRIPTNAME}: missing parameter; need file name of .db file" + exit 1 +fi +DBFILENAME=$1 +if test -r ${DBFILENAME}; then + echo ${SCRIPTNAME}: Found file ${DBFILENAME} +else + echo ${SCRIPTNAME}: file ${DBFILENAME} does not exist + exit 1 +fi +CSVFILENAME=${DBFILENAME%%.*}.csv +echo ${SCRIPTNAME}: Writing output into ${CSVFILENAME} + +# The spec of the .db export format exists for different versions: +# http://unix4lyfe.org/gitweb/darkstat/blob/0a152e51f5d9c1771308caa7135d363a722aee18:/export-format.txt +# http://git.msquadrat.de/darkstat.git/blob_plain/master:/export-format.txt +# http://phil.lavin.me.uk/downloads/parse.phps +# Only file format version 1 is supported by us. +# Obviously, darkstat itself distinguishes 3 different host format versions. +# Only host format version 2 is supported by us. +# The darkstat database file is converted from binary format +# to ASCII by the standard Unix command od. + +# Some things don't work correctly yet. +# Probably because there is no DNS server configured in our embedded device +# that produces .db files within OpenWRT. +# - host name contains nonsense at constant length 5 +# - "last seen" timing information contains always 0 +# - we read the graphics section of the file but ignore it + +# Let the od tool convert each binary byte into several textual formats. +# The AWK script reads all variants and later picks the format it needs. +od -Ad -v -tx1 -tu1 -ta -w1 < ${DBFILENAME} | +awk ' + NF==2 { addr = 0 + $1; hex[addr] = $2; next } + NF==1 && addr in dec { ascii[addr]=$1; next } + NF==1 && ! (addr in dec) { dec[addr]=$1; next } + # Now all variants of the bytes are available in certain arrays. + # The array indices cover the range 0 .. addr. + + function read_bytes(array, address, count, retval, c) { + retval="" + for (c=0; c 0) + print reason + if (terminate != 0) { + # Any remaining bytes in the file shall be dumped. + for (i=ai; i<=addr; i++) + print i, hex[i], ascii[i] + exit(retval) + } + } + function readIPsection() { + ip_protos_data=read_bytes(ascii, ai, 1) + if (ip_protos_data != "P") + quit("expected ip_protos_data P, found " ip_protos_data, 1, 1) + ai += 1 + ip_proto_count=read_number(ai, 1) + ai += 1 + for (pi=0; pi 1) { + last_seen=read_number(ai, 4) + # This value is always 0 in our files. + ai += 4 + } + mac_address=hex[ai+0] ":" hex[ai+1] ":" hex[ai+2] ":" hex[ai+3] ":" hex[ai+4] ":" hex[ai+5] + ai += 6 + # Weird stuff: the host name should be read. + # But there are only 5 bytes of nonsense. + # The first byte should be the length counter, but it isnt. + # The last byte is in fact a 0 byte. + # Probably caused by the missing DNS server. + # ignore 5 bytes with nonsense + nonsense=read_text(ai, 5) + ai += 5 + host_bytes_in=read_number(ai, 8) + ai += 8 + host_bytes_out=read_number(ai, 8) + ai += 8 + readIPsection() + readTCPsection() + readUDPsection() + } else { + quit("host format supported only in version 02: " host_version, 1, 1) + #address_familiy=read_bytes(hex, ai, 1) + #print "address familiy = " address_familiy + } + printf("\"%s\";\"%s\";%d;%d;%s;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%s;%s\n", + ip_address, mac_address, host_bytes_in, host_bytes_out, + IPprotos, ip_proto_in, ip_proto_out, + tcp_proto_count, tcp_proto_in, tcp_proto_out, + udp_proto_count, udp_proto_in, udp_proto_out, + ssh_in, ssh_out, rdp_in, rdp_out, + TCPports, UDPports) + } + section_header=read_bytes(hex, ai, 3) + if (section_header != "da4752") + quit("section header da4752 expected: " section_header, 1, 1) + ai += 3 + db_version=read_bytes(hex, ai, 1) + if (db_version != "01") + quit("file format supported only in version 01", 1, 1) + ai += 1 + last_time=read_number(ai, 8) + ai += 8 + readGraphsection("60 seconds") + readGraphsection("60 minutes") + readGraphsection("24 hours") + readGraphsection("31 days") + # The complete file has been parsed, no bytes should be left over. + # Terminate with return value 0 if the byte numbers match. + quit("", (addr != ai+1) ?0:1, addr != ai+1) + } + ' > ${CSVFILENAME}