(* Copyright (C) 2008 Mauricio Fernandez http//eigenclass.org *) open Framework open Str_util module C = Counter let u_hits, u_bytes, s404s, clients, refs = C.make (), C.make (), C.make (), C.make (), C.make () let report lbl fmt c = Printf.printf "Top %s:\n%a\n" lbl (fun ch -> List.iter (fmt ch)) (top_n 10 c) let b = make_word_info 1024 let re = Pcre.regexp "^/ongoing/When/\\d\\d\\dx/\\d\\d\\d\\d/\\d\\d/\\d\\d/[^ .]+$" let self_ref_re = Pcre.regexp "^\"http://www.tbray.org/ongoing/" let record str b u bytes = if bytes <> 0 then C.addL u_bytes u (Int64.of_int bytes); if Pcre.pmatch ~rex:re u then let client = word b str 0 and ref = word b str 10 in C.incr u_hits u; C.incr clients client; if ref <> "\"-\"" && not (Pcre.pmatch ~rex:self_ref_re ref) then C.incr refs (String.sub ref 1 (max 0 (String.length ref - 2))) let process_line str offset len = find_line_words ~offset b str len; if num_words b >= 11 && word b str 5 = "\"GET" then begin match word b str 8 with "200" -> record str b (word b str 6) (try int_of_string (word b str 9) with _ -> 0) | "304" -> record str b (word b str 6) 0 | "404" -> C.incr s404s (word b str 6) | _ -> () end; line_end_offset b let merge ((h, s, c, r), b) = List.iter (uncurry2 C.merge) [u_hits, h; s404s, s; clients, c; refs, r]; C.mergeL u_bytes b let fmtc = Formatting.fprintf_counter and fmtbc = Formatting.fprintf_bytecounter let workers = try int_of_string (Array.get Sys.argv 2) with _ -> 1 let () = File_processor.process_lines ~workers ~chunks:(try int_of_string (Array.get Sys.argv 3) with _ -> workers) ~init:(fun () -> List.iter C.clear [u_hits; s404s; clients; refs]; C.clear u_bytes) ~result:(fun () -> (apply4 C.to_l (u_hits, s404s, clients, refs), C.to_l u_bytes)) ~process_line ~merge (Array.get Sys.argv 1); report "URIs by hit" fmtc u_hits; report "URIs by bytes" fmtbc u_bytes; report "404s" fmtc s404s; report "client addresses" fmtc clients; report "referrers" fmtc refs