Source: http://www.annexia.org/tmp/ocaml-bitmatch-0.1.tar.gz
License: LGPLv2+ with OCaml linking exception
Erlang has a "byte-oriented" data type which can be treated as a
stream of bits, and provides rather elegant features for creating and
matching over such streams. This is a key feature of Erlang and was
developed because of its history in telecommunications. (More about
the feature in this paper:
http://user.it.uu.se/~kostis/Papers/padl07.pdf)
I have written a camlp4 syntax extension which does much the same in
OCaml. For example, you can now effortlessly parse IP packets:
let display pkt =
bitmatch pkt with
(* IPv4 packet header from RFC 791:
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live | Protocol | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Destination Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Options | Padding |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*)
| 4 : 4; hdrlen : 4; tos : 8; length : 16; (* same as above in OCaml *)
identification : 16; flags : 3; fragoffset : 13;
ttl : 8; protocol : 8; checksum : 16;
source : 32;
dest : 32;
options : (hdrlen-5)*32 : bitstring; (* NB computed length *)
payload : -1 : bitstring ->
printf "IPv4:\n";
printf " header length: %d * 32 bit words\n" hdrlen;
printf " type of service: %d\n" tos;
printf " packet length: %d bytes\n" length;
(* etc *)
(* IPv6 packet header *)
| 6 : 4; tclass : 8; flow : 20;
length : 16; nexthdr : 8; ttl : 8;
source : 128 : bitstring;
dest : 128 : bitstring;
payload : -1 : bitstring ->
printf "IPv6:\n";
printf " traffic class: %d\n" tclass;
printf " flow label: %d\n" flow;
printf " packet (payload) length: %d bytes\n" length;
printf " next header: %d\n" nexthdr;
printf " ttl: %d\n" ttl;
(* etc *)
| version : 4 ->
eprintf "unknown IP version %d\n" version;
exit 1
| _ as pkt ->
eprintf "data is smaller than one nibble:\n";
Bitmatch.hexdump_bitstring stderr pkt;
exit 1
Or filesystems, as in this parser for Linux EXT3 superblocks:
let bits = Bitmatch.bitstring_of_file "tests/ext3_sb"
let () =
bitmatch bits with
| s_inodes_count : 32 : littleendian; (* Inodes count *)
s_blocks_count : 32 : littleendian; (* Blocks count *)
s_r_blocks_count : 32 : littleendian; (* Reserved blocks count *)
s_free_blocks_count : 32 : littleendian; (* Free blocks count *)
s_free_inodes_count : 32 : littleendian; (* Free inodes count *)
s_first_data_block : 32 : littleendian; (* First Data Block *)
s_log_block_size : 32 : littleendian; (* Block size *)
s_log_frag_size : 32 : littleendian; (* Fragment size *)
s_blocks_per_group : 32 : littleendian; (* # Blocks per group *)
s_frags_per_group : 32 : littleendian; (* # Fragments per group *)
s_inodes_per_group : 32 : littleendian; (* # Inodes per group *)
s_mtime : 32 : littleendian; (* Mount time *)
s_wtime : 32 : littleendian; (* Write time *)
s_mnt_count : 16 : littleendian; (* Mount count *)
s_max_mnt_count : 16 : littleendian; (* Maximal mount count *)
0xef53 : 16 : littleendian -> (* Magic signature *)
printf "ext3 superblock:\n";
printf " s_inodes_count = %ld\n" s_inodes_count;
printf " s_blocks_count = %ld\n" s_blocks_count;
printf " s_free_inodes_count = %ld\n" s_free_inodes_count;
printf " s_free_blocks_count = %ld\n" s_free_blocks_count
| _ ->
eprintf "not an ext3 superblock!\n%!";
exit 2
There is also a similar syntax for contructing bitstrings.
Please let me know if you are interested in using this. I may change
the syntax a little before the next release.
Thanks to several people on #ocaml for answering my questions when I
was writing this.
Rich.
--
Richard Jones
Red Hat
_______________________________________________
Caml-list mailing list. Subscription management:
http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
Archives: http://caml.inria.fr
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
Bug reports: http://caml.inria.fr/bin/caml-bugs
At the condition this is not a joke ;-)
I have two questions:
* do you think you can get something efficient (part of the paper you
are linking, talked about performance)
* is there a way to retain part of the data structure... E.g TCP/UDP
packet has the same IP part (the one you describe) + a TCP/UDP
header...
Example
type ip_header_begin =
<<
version: 4;
hdrlen : 4;
tos : 8;
length : 16;
identification : 16;
flags : 3;
fragoffset : 13;
ttl : 8;
>>
type ip_header_end =
<<
checksum : 16;
source : 32;
dest : 32;
>>
;;
type udp_header =
<<
source_port: 16;
destination_port: 16;
length: 16;
checksum: 16;
>>
;;
| ip_beg: ip_header_begin; 17: 8; ip_end: ip_header_end; udp: udp_header ->
Printf.printf "Found an UDP packet (TTL: %d)" ip_beg.ttl
| ip_beg: ip_header_begin; 6: 8; ip_end: ip_header_end; ->
print_string "Found a TCP packet"
(N.B.: there is other way to handle this problem... but this is just a
question).
Anyway, i found this interesting and worth looking at.
Regards,
Sylvain Le Gall
i keep reading it out of the corner of my eye as "ocaml beeyatch"
There's lots of room for optimization in the current code. It mainly
aims to be working.
> * is there a way to retain part of the data structure... E.g TCP/UDP
> packet has the same IP part (the one you describe) + a TCP/UDP
> header...
At the moment you cannot define and reuse patterns, but it's certainly
something which could be added (as in micmatch).
However it is easy to modify the examples so they match over TCP and
UDP packets, either by adding a second whole case to the top level
bitmatch, or by adding a sub-bitmatch matching the payload.
Rich.
--
Richard Jones
Red Hat
_______________________________________________
http://et.redhat.com/~rjones/bitmatch/
http://et.redhat.com/~rjones/bitmatch/html/Bitmatch.html