3 # Copyright 2013 MediaMobil Communication GmbH
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # This script converts a binary .db file into a .csv file.
16 # The .db file was generated by darkstat with the --export option.
17 # The .csv file shall be read by any spreadsheet application.
18 SCRIPTNAME=$( basename $0)
19 if test -z "$( type -P awk )" ; then
20 echo "${SCRIPTNAME}: missing AWK interpreter, at least not found in PATH"
21 echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH"
24 if test -z "$( type -P od )" ; then
25 echo "${SCRIPTNAME}: missing od file dump tool, at least not found in PATH"
26 echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH"
29 if test $# -ne 1; then
30 echo "${SCRIPTNAME}: missing parameter; need file name of .db file"
34 if test -r ${DBFILENAME}; then
35 echo ${SCRIPTNAME}: Found file ${DBFILENAME}
37 echo ${SCRIPTNAME}: file ${DBFILENAME} does not exist
40 CSVFILENAME=${DBFILENAME%%.*}.csv
41 echo ${SCRIPTNAME}: Writing output into ${CSVFILENAME}
43 # The spec of the .db export format exists for different versions:
44 # http://unix4lyfe.org/gitweb/darkstat/blob/0a152e51f5d9c1771308caa7135d363a722aee18:/export-format.txt
45 # http://git.msquadrat.de/darkstat.git/blob_plain/master:/export-format.txt
46 # http://phil.lavin.me.uk/downloads/parse.phps
47 # Only file format version 1 is supported by us.
48 # Obviously, darkstat itself distinguishes 3 different host format versions.
49 # Only host format version 2 is supported by us.
50 # The darkstat database file is converted from binary format
51 # to ASCII by the standard Unix command od.
53 # Some things don't work correctly yet.
54 # Probably because there is no DNS server configured in our embedded device
55 # that produces .db files within OpenWRT.
56 # - host name contains nonsense at constant length 5
57 # - "last seen" timing information contains always 0
58 # - we read the graphics section of the file but ignore it
60 # Let the od tool convert each binary byte into several textual formats.
61 # The AWK script reads all variants and later picks the format it needs.
62 od -Ad -v -tx1 -tu1 -ta -w1 < ${DBFILENAME} |
64 NF==2 { addr = 0 + $1; hex[addr] = $2; next }
65 NF==1 && addr in dec { ascii[addr]=$1; next }
66 NF==1 && ! (addr in dec) { dec[addr]=$1; next }
67 # Now all variants of the bytes are available in certain arrays.
68 # The array indices cover the range 0 .. addr.
70 function read_bytes(array, address, count, retval, c) {
72 for (c=0; c<count; c++)
73 retval = retval array[address+c]
76 function read_number(address, count, retval, c) {
78 for (c=0; c<count; c++)
79 retval = retval*256 + dec[address+c]
82 function read_text(address, count, retval, c) {
84 for (c=0; c<count; c++)
85 retval = retval ascii[address+c]
88 function quit(reason, terminate, retval) {
89 if (length(reason) > 0)
92 # Any remaining bytes in the file shall be dumped.
93 for (i=ai; i<=addr; i++)
94 print i, hex[i], ascii[i]
98 function readIPsection() {
99 ip_protos_data=read_bytes(ascii, ai, 1)
100 if (ip_protos_data != "P")
101 quit("expected ip_protos_data P, found " ip_protos_data, 1, 1)
103 ip_proto_count=read_number(ai, 1)
105 for (pi=0; pi<ip_proto_count; pi++) {
106 ip_proto_type=read_number(ai, 1)
108 IPprotos = IPprotos " " ip_proto_type
109 ip_proto_in += read_number(ai, 8)
111 ip_proto_out += read_number(ai, 8)
115 function readTCPsection() {
116 tcp_protos_data=read_bytes(ascii, ai, 1)
117 if (tcp_protos_data != "T")
118 quit("expected tcp_protos_data T, found " tcp_protos_data, 1, 1)
120 tcp_proto_count=read_number(ai, 2)
122 for (ti=0; ti<tcp_proto_count; ti++) {
123 tcp_proto_port=read_number(ai, 2)
125 TCPports = TCPports " " tcp_proto_port
126 tcp_proto_syn=read_number(ai, 8)
128 tcp_proto_in += read_number(ai, 8)
130 tcp_proto_out += read_number(ai, 8)
132 if (tcp_proto_port == 22) {
133 ssh_in += tcp_proto_in
134 ssh_out += tcp_proto_out
136 if (tcp_proto_port == 3389) {
137 rdp_in += tcp_proto_in
138 rdp_out += tcp_proto_out
142 function readUDPsection() {
143 udp_protos_data=read_bytes(ascii, ai, 1)
144 if (udp_protos_data != "U")
145 quit("expected udp_protos_data U, found " udp_protos_data, 1, 1)
147 udp_proto_count=read_number(ai, 2)
149 for (ui=0; ui<udp_proto_count; ui++) {
150 udp_proto_port=read_number(ai, 2)
152 UDPports = UDPports " " udp_proto_port
153 udp_proto_in += read_number(ai, 8)
155 udp_proto_out += read_number(ai, 8)
157 if (udp_proto_port == 22) {
158 ssh_in += udp_proto_in
159 ssh_out += udp_proto_out
161 if (udp_proto_port == 3389) {
162 rdp_in += udp_proto_in
163 rdp_out += udp_proto_out
167 function readGraphsection(interval) {
168 n_bars=read_number(ai++, 1)
169 i_bars=read_number(ai++, 1)
170 for (bi=0; bi<n_bars; bi++) {
171 graph_bytes_in=read_number(ai, 8)
173 graph_bytes_out=read_number(ai, 8)
179 file_header=read_bytes(hex, 0, 4)
180 if (file_header != "da314159")
181 quit("input data is not an exported darkstat .db file, wrong header: " file_header, 1, 1)
182 section_header=read_bytes(hex, 4, 3)
183 if (section_header != "da4853")
184 quit("section header da4853 expected: " section_header, 1, 1)
185 db_version=read_bytes(hex, 7, 1)
186 if (db_version != "01")
187 quit("file format supported only in version 01", 1, 1)
188 host_count=read_number(8, 4)
190 # Print a header into the .csv file.
191 printf("IP address;MAC address;host in bytes;host out bytes;IP protos;IP in bytes;IP out bytes;TCP port count;TCP in bytes;TCP out bytes;UDP port count;UDP in bytes;UDP out bytes;ssh in bytes;ssh out bytes;rdp in bytes;rdp out bytes;TCP ports;UDP ports\n")
192 for (hi=1; hi<=host_count; hi++) {
193 # Make sure all variables to be printed are initially empty.
194 ip_address=mac_address=""
195 host_bytes_in=host_bytes_out=ip_proto_in=ip_proto_out=tcp_proto_in=tcp_proto_out=udp_proto_in=udp_proto_out=ssh_in=ssh_out=rdp_in=rdp_out=0
196 IPprotos=TCPports=UDPports=""
197 tcp_proto_count=udp_proto_count=0
198 host_header=read_bytes(hex, ai, 3)
199 host_version=read_bytes(hex, ai+3, 1)
201 if (host_version == "02") {
202 ip_address=read_number(ai+0,1) "." read_number(ai+1,1) "." read_number(ai+2,1) "." read_number(ai+3,1)
204 if ((host_version+0) > 1) {
205 last_seen=read_number(ai, 4)
206 # This value is always 0 in our files.
209 mac_address=hex[ai+0] ":" hex[ai+1] ":" hex[ai+2] ":" hex[ai+3] ":" hex[ai+4] ":" hex[ai+5]
211 # Weird stuff: the host name should be read.
212 # But there are only 5 bytes of nonsense.
213 # The first byte should be the length counter, but it isnt.
214 # The last byte is in fact a 0 byte.
215 # Probably caused by the missing DNS server.
216 # ignore 5 bytes with nonsense
217 nonsense=read_text(ai, 5)
219 host_bytes_in=read_number(ai, 8)
221 host_bytes_out=read_number(ai, 8)
227 quit("host format supported only in version 02: " host_version, 1, 1)
228 #address_familiy=read_bytes(hex, ai, 1)
229 #print "address familiy = " address_familiy
231 printf("\"%s\";\"%s\";%d;%d;%s;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%s;%s\n",
232 ip_address, mac_address, host_bytes_in, host_bytes_out,
233 IPprotos, ip_proto_in, ip_proto_out,
234 tcp_proto_count, tcp_proto_in, tcp_proto_out,
235 udp_proto_count, udp_proto_in, udp_proto_out,
236 ssh_in, ssh_out, rdp_in, rdp_out,
239 section_header=read_bytes(hex, ai, 3)
240 if (section_header != "da4752")
241 quit("section header da4752 expected: " section_header, 1, 1)
243 db_version=read_bytes(hex, ai, 1)
244 if (db_version != "01")
245 quit("file format supported only in version 01", 1, 1)
247 last_time=read_number(ai, 8)
249 readGraphsection("60 seconds")
250 readGraphsection("60 minutes")
251 readGraphsection("24 hours")
252 readGraphsection("31 days")
253 # The complete file has been parsed, no bytes should be left over.
254 # Terminate with return value 0 if the byte numbers match.
255 quit("", (addr != ai+1) ?0:1, addr != ai+1)