Give advice, default to two blocks.
[darkstat] / cap.c
1 /* darkstat 3
2 * copyright (c) 2001-2011 Emil Mikulic.
3 *
4 * cap.c: packet capture
5 *
6 * You may use, modify and redistribute this file under the terms of the
7 * GNU General Public License version 2. (see COPYING.GPL)
8 */
9
10 #ifndef linux
11 #error "PACKET_MMAP is only for linux"
12 #endif
13
14 #include <sys/socket.h>
15 #include <sys/ioctl.h>
16 #include <sys/user.h> // PAGE_SIZE
17 #include <sys/mman.h> // mmap()
18 #include <linux/if_ether.h> // ETH_P_ALL
19 #include <linux/if_packet.h> // struct tpacket_req
20 #include <net/if.h> // struct ifreq
21 #include <assert.h>
22 #include <string.h> // memset
23 #include <pcap.h>
24
25 #include "cap.h"
26 #include "decode.h"
27 #include "err.h"
28 #include "localip.h"
29 #include "opt.h"
30
31 static int cap_fd = -1;
32 static unsigned char *cap_buf;
33 static const struct linkhdr *linkhdr = NULL;
34 static struct tpacket_req req;
35
36 /* FIXME: twiddle these */
37 int cap_pages = 1, cap_blocks = 2;
38
39 /* Initialize capture, or exit on failure. */
40 void cap_init(const char *device, const char *filter, int promisc _unused_) {
41 int over_hdr, over_sll, over_mac, snaplen;
42 struct sockaddr_ll sll;
43 struct ifreq ifr;
44
45 verbosef("using experimental PACKET_MMAP capture");
46 if (filter != NULL)
47 warnx("BPF filters are not supported in PACKET_MMAP mode");
48
49 if ((cap_fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) == -1)
50 err(1, "cap socket");
51
52 over_hdr = TPACKET_ALIGN(sizeof(struct tpacket_hdr));
53 over_sll = TPACKET_ALIGN(sizeof(struct sockaddr_ll));
54 /* FIXME: assumes ethernet */
55 over_mac = TPACKET_ALIGN(6 + 6 + 2);
56 linkhdr = getlinkhdr(DLT_EN10MB);
57
58 snaplen = 96;
59 if (opt_want_snaplen > -1)
60 snaplen = opt_want_snaplen;
61 verbosef("using snaplen %d", snaplen);
62
63 req.tp_block_size = PAGE_SIZE * cap_pages;
64 req.tp_block_nr = cap_blocks;
65 req.tp_frame_size = over_hdr + over_sll + over_mac + TPACKET_ALIGN(snaplen);
66 req.tp_frame_nr = (req.tp_block_size / req.tp_frame_size) * req.tp_block_nr;
67
68 assert(req.tp_frame_size > TPACKET_HDRLEN);
69 assert(req.tp_block_nr <= 131072 / sizeof(void*));
70
71 verbosef("%d(hdr) + %d(sll) + %d(mac) + %d(snaplen) + %d(pad) = %d total",
72 over_hdr, over_sll, over_mac, snaplen,
73 TPACKET_ALIGN(snaplen) - snaplen,
74 req.tp_frame_size);
75
76 verbosef("tpacket_req: %d blocks x %d bytes, %d frames x %d bytes",
77 req.tp_block_nr, req.tp_block_size,
78 req.tp_frame_nr, req.tp_frame_size);
79
80 if (setsockopt(cap_fd, SOL_PACKET, PACKET_RX_RING,
81 (void *)&req, sizeof(req)) == -1)
82 err(1, "cap setsockopt");
83
84 if ((cap_buf = mmap(NULL, req.tp_block_size * req.tp_block_nr,
85 PROT_READ|PROT_WRITE, MAP_SHARED, cap_fd,
86 0)) == MAP_FAILED)
87 err(1, "cap mmap");
88
89 /* get interface index */
90 memset(&ifr, 0, sizeof(ifr));
91 strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
92 if (ioctl(cap_fd, SIOCGIFINDEX, &ifr) == -1)
93 err(1, "ifr ioctl");
94 verbosef("interface %s has index %d", device, (int)ifr.ifr_ifindex);
95
96 /* bind */
97 memset(&sll, 0, sizeof(sll));
98 sll.sll_family = AF_PACKET;
99 sll.sll_protocol = htons(ETH_P_ALL);
100 sll.sll_ifindex = ifr.ifr_ifindex;
101 if (bind(cap_fd, (struct sockaddr *)&sll, sizeof(sll)) == -1)
102 err(1, "cap bind");
103
104 verbosef("PACKET_MMAP initialized");
105 }
106
107 /* Set cap_fd in the given fd_set. */
108 void cap_fd_set(fd_set *read_set,
109 int *max_fd,
110 struct timeval *timeout _unused_,
111 int *need_timeout) {
112 assert(*need_timeout == 0); /* we're first to get a shot at this */
113 FD_SET(cap_fd, read_set);
114 *max_fd = MAX(*max_fd, cap_fd);
115 }
116
117 unsigned int cap_pkts_recv = 0, cap_pkts_drop = 0;
118
119 static void
120 cap_stats_update(void)
121 {
122 struct tpacket_stats tp;
123 socklen_t len = sizeof(tp);
124 if (getsockopt(cap_fd, SOL_PACKET, PACKET_STATISTICS, &tp, &len) == -1)
125 warn("getsockopt(PACKET_STATISTICS) failed");
126
127 /* These are cumulative. */
128 cap_pkts_recv += tp.tp_packets;
129 cap_pkts_drop += tp.tp_drops;
130
131 if (tp.tp_drops)
132 verbosef("WARNING: dropped %d frames, consider increasing cap_blocks",
133 tp.tp_drops);
134 }
135
136 /* FIXME: duplicated */
137 /* Print hexdump of received packet. */
138 static void
139 hexdump(const u_char *buf, const uint32_t len)
140 {
141 uint32_t i, col;
142
143 printf("packet of %u bytes:\n", len);
144 for (i=0, col=0; i<len; i++) {
145 if (col == 0) printf(" ");
146 printf("%02x", buf[i]);
147 if (i+1 == linkhdr->hdrlen)
148 printf("[");
149 else if (i+1 == linkhdr->hdrlen + IP_HDR_LEN)
150 printf("]");
151 else printf(" ");
152 col += 3;
153 if (col >= 72) {
154 printf("\n");
155 col = 0;
156 }
157 }
158 if (col != 0) printf("\n");
159 printf("\n");
160 }
161
162 /*
163 * Callback function for pcap_dispatch() which chains to the decoder specified
164 * in linkhdr struct.
165 */
166 static void
167 callback(u_char *user, const struct pcap_pkthdr *h, const u_char *bytes)
168 {
169 if (opt_want_hexdump) hexdump(bytes, h->caplen);
170 linkhdr->handler(user, h, bytes);
171 }
172
173 /* Process any packets currently in the capture buffer. */
174 void cap_poll(fd_set *read_set) {
175 unsigned int total, block, frame;
176
177 if (!FD_ISSET(cap_fd, read_set))
178 return; /* nothing to do */
179
180 /* Once per capture poll, check our IP address. */
181 localip_update(); /* FIXME: this might even be too often */
182
183 total = 0;
184 for (block = 0; block < req.tp_block_nr; block++)
185 for (frame = 0; frame < req.tp_block_size / req.tp_frame_size; frame++) {
186 struct tpacket_hdr *hdr = (struct tpacket_hdr *)(
187 cap_buf + block * req.tp_block_size + frame * req.tp_frame_size);
188 //struct sockaddr_ll *sll = (struct sockaddr_ll *)(
189 // (char*)hdr + TPACKET_ALIGN(sizeof(*hdr)));
190 unsigned char *data;
191 struct pcap_pkthdr pcap_hdr;
192
193 if (!(hdr->tp_status & TP_STATUS_USER))
194 continue; /* owned by kernel */
195
196 /* FIXME: ethernet specific? */
197 data = (unsigned char *)hdr + hdr->tp_mac;
198
199 pcap_hdr.caplen = hdr->tp_snaplen;
200 pcap_hdr.ts.tv_sec = hdr->tp_sec;
201
202 /* This is where the magic happens! */
203 callback(NULL, &pcap_hdr, data); /* <-- */
204
205 hdr->tp_status = TP_STATUS_KERNEL; /* done with this frame */
206 total++;
207 }
208
209 /*FIXME*/fprintf(stderr, "\r%d \r", total);
210 cap_stats_update();
211 }
212
213 void cap_stop(void) {
214 // FIXME: munmap(cap_buf)
215 close(cap_fd);
216 }
217
218 /* Run through entire capfile. */
219 void
220 cap_from_file(const char *capfile _unused_, const char *filter _unused_)
221 {
222 errx(1, "unimplemented");
223 }
224
225 #if 0
226 #include "cdefs.h"
227 #include "cap.h"
228 #include "config.h"
229 #include "conv.h"
230 #include "decode.h"
231 #include "hosts_db.h"
232 #include "localip.h"
233 #include "opt.h"
234
235 #include <sys/ioctl.h>
236 #include <sys/types.h>
237 #include <sys/socket.h>
238 #include <sys/wait.h>
239 #ifdef HAVE_SYS_FILIO_H
240 # include <sys/filio.h> /* Solaris' FIONBIO hides here */
241 #endif
242 #include <assert.h>
243 #include "err.h"
244 #include <stdio.h>
245 #include <stdlib.h>
246 #include <string.h>
247 #include <unistd.h>
248
249 /* The cap process life-cycle:
250 *
251 * Init - cap_init()
252 * Fill fd_set - cap_fd_set()
253 * Poll - cap_poll()
254 * Stop - cap_stop()
255 */
256
257 /* Globals - only useful within this module. */
258 static pcap_t *pcap = NULL;
259 static int pcap_fd = -1;
260 static const struct linkhdr *linkhdr = NULL;
261
262 #define CAP_TIMEOUT 500 /* granularity of capture buffer, in milliseconds */
263
264 /* ---------------------------------------------------------------------------
265 * Init pcap. Exits on failure.
266 */
267 void
268 cap_init(const char *device, const char *filter, int promisc)
269 {
270 char errbuf[PCAP_ERRBUF_SIZE], *tmp_device;
271 int linktype, snaplen, waited;
272
273 /* pcap doesn't like device being const */
274 tmp_device = xstrdup(device);
275
276 /* Open packet capture descriptor. */
277 waited = 0;
278 for (;;) {
279 errbuf[0] = '\0'; /* zero length string */
280 pcap = pcap_open_live(
281 tmp_device,
282 1, /* snaplen, irrelevant at this point */
283 0, /* promisc, also irrelevant */
284 CAP_TIMEOUT,
285 errbuf);
286 if (pcap != NULL) break; /* success! */
287
288 if ((opt_wait_secs != -1) && strstr(errbuf, "device is not up")) {
289 if ((opt_wait_secs > 0) && (waited >= opt_wait_secs))
290 errx(1, "waited %d secs, giving up: pcap_open_live(): %s",
291 waited, errbuf);
292
293 verbosef("waited %d secs, interface is not up", waited);
294 sleep(1);
295 waited++;
296 }
297 else errx(1, "pcap_open_live(): %s", errbuf);
298 }
299
300 /* Work out the linktype and what snaplen we need. */
301 linktype = pcap_datalink(pcap);
302 verbosef("linktype is %d", linktype);
303 if ((linktype == DLT_EN10MB) && opt_want_macs)
304 hosts_db_show_macs = 1;
305 linkhdr = getlinkhdr(linktype);
306 if (linkhdr == NULL)
307 errx(1, "unknown linktype %d", linktype);
308 if (linkhdr->handler == NULL)
309 errx(1, "no handler for linktype %d", linktype);
310 snaplen = getsnaplen(linkhdr);
311 if (opt_want_pppoe) {
312 snaplen += PPPOE_HDR_LEN;
313 if (linktype != DLT_EN10MB)
314 errx(1, "can't do PPPoE decoding on a non-Ethernet linktype");
315 }
316 verbosef("calculated snaplen minimum %d", snaplen);
317 #ifdef linux
318 /* Ubuntu 9.04 has a problem where requesting snaplen <= 60 will
319 * give us 42 bytes, and we need at least 54 for TCP headers.
320 *
321 * Hack to set minimum snaplen to tcpdump's default:
322 */
323 snaplen = MAX(snaplen, 96);
324 #endif
325 if (opt_want_snaplen > -1)
326 snaplen = opt_want_snaplen;
327 verbosef("using snaplen %d", snaplen);
328
329 /* Close and re-open pcap to use the new snaplen. */
330 pcap_close(pcap);
331 errbuf[0] = '\0'; /* zero length string */
332 pcap = pcap_open_live(
333 tmp_device,
334 snaplen,
335 promisc,
336 CAP_TIMEOUT,
337 errbuf);
338
339 if (pcap == NULL)
340 errx(1, "pcap_open_live(): %s", errbuf);
341
342 if (errbuf[0] != '\0') /* not zero length anymore -> warning */
343 warnx("pcap_open_live() warning: %s", errbuf);
344
345 free(tmp_device);
346
347 if (promisc)
348 verbosef("capturing in promiscuous mode");
349 else
350 verbosef("capturing in non-promiscuous mode");
351
352 /* Set filter expression, if any. */
353 if (filter != NULL)
354 {
355 struct bpf_program prog;
356 char *tmp_filter = xstrdup(filter);
357 if (pcap_compile(
358 pcap,
359 &prog,
360 tmp_filter,
361 1, /* optimize */
362 0) /* netmask */
363 == -1)
364 errx(1, "pcap_compile(): %s", pcap_geterr(pcap));
365
366 if (pcap_setfilter(pcap, &prog) == -1)
367 errx(1, "pcap_setfilter(): %s", pcap_geterr(pcap));
368
369 pcap_freecode(&prog);
370 free(tmp_filter);
371 }
372
373 pcap_fd = pcap_fileno(pcap);
374
375 /* set non-blocking */
376 #ifdef linux
377 if (pcap_setnonblock(pcap, 1, errbuf) == -1)
378 errx(1, "pcap_setnonblock(): %s", errbuf);
379 #else
380 { int one = 1;
381 if (ioctl(pcap_fd, FIONBIO, &one) == -1)
382 err(1, "ioctl(pcap_fd, FIONBIO)"); }
383 #endif
384
385 #ifdef BIOCSETWF
386 {
387 /* Deny all writes to the socket */
388 struct bpf_insn bpf_wfilter[] = { BPF_STMT(BPF_RET+BPF_K, 0) };
389 int wf_len = sizeof(bpf_wfilter) / sizeof(struct bpf_insn);
390 struct bpf_program pr;
391
392 pr.bf_len = wf_len;
393 pr.bf_insns = bpf_wfilter;
394
395 if (ioctl(pcap_fd, BIOCSETWF, &pr) == -1)
396 err(1, "ioctl(pcap_fd, BIOCSETFW)");
397 verbosef("filtered out BPF writes");
398 }
399 #endif
400
401 #ifdef BIOCLOCK
402 /* set "locked" flag (no reset) */
403 if (ioctl(pcap_fd, BIOCLOCK) == -1)
404 err(1, "ioctl(pcap_fd, BIOCLOCK)");
405 verbosef("locked down BPF for security");
406 #endif
407 }
408
409 /*
410 * Set pcap_fd in the given fd_set.
411 */
412 void
413 cap_fd_set(
414 #ifdef linux
415 fd_set *read_set _unused_,
416 int *max_fd _unused_,
417 struct timeval *timeout,
418 #else
419 fd_set *read_set,
420 int *max_fd,
421 struct timeval *timeout _unused_,
422 #endif
423 int *need_timeout)
424 {
425 assert(*need_timeout == 0); /* we're first to get a shot at this */
426 #ifdef linux
427 /*
428 * Linux's BPF is immediate, so don't select() as it will lead to horrible
429 * performance. Instead, use a timeout for buffering.
430 */
431 *need_timeout = 1;
432 timeout->tv_sec = 0;
433 timeout->tv_usec = CAP_TIMEOUT * 1000; /* msec->usec */
434 #else
435 /* We have a BSD-like BPF, we can select() on it. */
436 FD_SET(pcap_fd, read_set);
437 *max_fd = MAX(*max_fd, pcap_fd);
438 #endif
439 }
440
441 unsigned int cap_pkts_recv = 0, cap_pkts_drop = 0;
442
443 static void
444 cap_stats_update(void)
445 {
446 struct pcap_stat ps;
447
448 if (pcap_stats(pcap, &ps) != 0) {
449 warnx("pcap_stats(): %s", pcap_geterr(pcap));
450 return;
451 }
452
453 cap_pkts_recv = ps.ps_recv;
454 cap_pkts_drop = ps.ps_drop;
455 }
456
457 /*
458 * Print hexdump of received packet.
459 */
460 static void
461 hexdump(const u_char *buf, const uint32_t len)
462 {
463 uint32_t i, col;
464
465 printf("packet of %u bytes:\n", len);
466 for (i=0, col=0; i<len; i++) {
467 if (col == 0) printf(" ");
468 printf("%02x", buf[i]);
469 if (i+1 == linkhdr->hdrlen)
470 printf("[");
471 else if (i+1 == linkhdr->hdrlen + IP_HDR_LEN)
472 printf("]");
473 else printf(" ");
474 col += 3;
475 if (col >= 72) {
476 printf("\n");
477 col = 0;
478 }
479 }
480 if (col != 0) printf("\n");
481 printf("\n");
482 }
483
484 /*
485 * Callback function for pcap_dispatch() which chains to the decoder specified
486 * in linkhdr struct.
487 */
488 static void
489 callback(u_char *user, const struct pcap_pkthdr *h, const u_char *bytes)
490 {
491 if (opt_want_hexdump) hexdump(bytes, h->caplen);
492 linkhdr->handler(user, h, bytes);
493 }
494
495 /*
496 * Process any packets currently in the capture buffer.
497 */
498 void
499 cap_poll(fd_set *read_set
500 #ifdef linux
501 _unused_
502 #endif
503 )
504 {
505 int total, ret;
506
507 #ifndef linux /* We don't use select() on Linux. */
508 if (!FD_ISSET(pcap_fd, read_set)) {
509 verbosef("cap_poll premature");
510 return;
511 }
512 #endif
513
514 /*
515 * Once per capture poll, check our IP address. It's used in accounting
516 * for traffic graphs.
517 */
518 localip_update(); /* FIXME: this might even be too often */
519
520 total = 0;
521 for (;;) {
522 #ifndef NDEBUG
523 struct timeval t1;
524 gettimeofday(&t1, NULL);
525 #endif
526 ret = pcap_dispatch(
527 pcap,
528 -1, /* count, -1 = entire buffer */
529 callback,
530 NULL); /* user */
531
532 if (ret < 0) {
533 warnx("pcap_dispatch(): %s", pcap_geterr(pcap));
534 return;
535 }
536
537 #ifndef NDEBUG
538 {
539 struct timeval t2;
540 int td;
541
542 gettimeofday(&t2, NULL);
543 td = (t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec;
544 if (td > CAP_TIMEOUT*1000)
545 warnx("pcap_dispatch blocked for %d usec! (expected <= %d usec)\n",
546 td, CAP_TIMEOUT*1000);
547 }
548 #endif
549
550 /* Despite count = -1, Linux will only dispatch one packet at a time. */
551 total += ret;
552
553 #ifdef linux
554 /* keep looping until we've dispatched all the outstanding packets */
555 if (ret == 0) break;
556 #else
557 /* we get them all on the first shot */
558 break;
559 #endif
560 }
561 cap_stats_update();
562 }
563
564 void
565 cap_stop(void)
566 {
567 pcap_close(pcap);
568 }
569
570 /* Run through entire capfile. */
571 void
572 cap_from_file(const char *capfile, const char *filter)
573 {
574 char errbuf[PCAP_ERRBUF_SIZE];
575 int linktype, ret;
576
577 /* Open packet capture descriptor. */
578 errbuf[0] = '\0'; /* zero length string */
579 pcap = pcap_open_offline(capfile, errbuf);
580
581 if (pcap == NULL)
582 errx(1, "pcap_open_offline(): %s", errbuf);
583
584 if (errbuf[0] != '\0') /* not zero length anymore -> warning */
585 warnx("pcap_open_offline() warning: %s", errbuf);
586
587 /* Work out the linktype. */
588 linktype = pcap_datalink(pcap);
589 linkhdr = getlinkhdr(linktype);
590 if (linkhdr == NULL)
591 errx(1, "unknown linktype %d", linktype);
592 if (linkhdr->handler == NULL)
593 errx(1, "no handler for linktype %d", linktype);
594 if (linktype == DLT_EN10MB) /* FIXME: impossible with capfile? */
595 hosts_db_show_macs = 1;
596
597 /* Set filter expression, if any. */ /* FIXME: factor! */
598 if (filter != NULL)
599 {
600 struct bpf_program prog;
601 char *tmp_filter = xstrdup(filter);
602 if (pcap_compile(
603 pcap,
604 &prog,
605 tmp_filter,
606 1, /* optimize */
607 0) /* netmask */
608 == -1)
609 errx(1, "pcap_compile(): %s", pcap_geterr(pcap));
610
611 if (pcap_setfilter(pcap, &prog) == -1)
612 errx(1, "pcap_setfilter(): %s", pcap_geterr(pcap));
613
614 pcap_freecode(&prog);
615 free(tmp_filter);
616 }
617
618 /* Process file. */
619 ret = pcap_dispatch(
620 pcap,
621 -1, /* count, -1 = entire buffer */
622 callback,
623 NULL); /* user */
624
625 if (ret < 0)
626 errx(1, "pcap_dispatch(): %s", pcap_geterr(pcap));
627 }
628
629 #endif
630 /* vim:set ts=3 sw=3 tw=78 expandtab: */