%% @copyright 2013 scalaris project http://code.google.com/p/scalaris/ % Licensed under the Apache License, Version 2.0 (the "License"); % you may not use this file except in compliance with the License. % You may obtain a copy of the License at % % http://www.apache.org/licenses/LICENSE-2.0 % % Unless required by applicable law or agreed to in writing, software % distributed under the License is distributed on an "AS IS" BASIS, % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. % See the License for the specific language governing permissions and % limitations under the License. %% @doc DB back-end using HanoiDB. %% HanoiDB is a memory-cached disk backend. %% As disks are large (TB) HanoiDB can hold data much larger than RAM (GB). %% As disks persist data HanoiDB can be stoped and restarted without data loss. %% It is a pure Erlang implementation of Google's LevelDB disk-backed K/V store. %% See http://code.google.com/p/leveldb/ for background about storage levels. %% How to use scalaris with this hanoidb backend: %% -download https://github.com/krestenkrab/hanoidb and compile HanoiDB %% -make sure this db_hanoidb.erl file is in src/ (right with db_ets.erl) %% -rerun scalaris' configure with --enable-hanoidb %% ./configure --enable-hanoidb=/path/to/hanoidb %% -rerun make to rebuild scalaris and tests %% ./make && ./make test %% -enjoy %% FIXME choose one ;-) %% Two keys K and L are considered equal if K == L yields true. %% Two keys K and L are considered equal if they match, i.e. K =:= L %% Made after v0.6.1 svn rev 5666. %% @end -module(db_hanoidb). -include("scalaris.hrl"). -behaviour(db_backend_beh). -define(IN(E), erlang:term_to_binary(E, [{minor_version, 1}])). -define(OUT(E), erlang:binary_to_term(E)). %% primitives -export([new/1, open/1]). -export([put/2, get/2, delete/2]). -export([close/1, close_and_delete/1]). %% db info -export([get_name/1, get_load/1]). %% iteration -export([foldl/3, foldl/4, foldl/5]). -export([foldr/3, foldr/4, foldr/5]). -type db() :: {DB::pid(), FileName::nonempty_string()}. -type key() :: db_backend_beh:key(). %% '$end_of_table' is not allowed as key() or else iterations won't work! -type entry() :: db_backend_beh:entry(). -ifdef(with_export_type_support). -export_type([db/0]). -endif. -type hanoidb_config_option() :: {compress, none | gzip | snappy | lz4} | {page_size, pos_integer()} | {read_buffer_size, pos_integer()} | {write_buffer_size, pos_integer()} | {merge_strategy, fast | predictable } | {sync_strategy, none | sync | {seconds, pos_integer()}} | {expiry_secs, non_neg_integer()} | {spawn_opt, list()}. %% @doc Initializes a new DB handle while ignoring provided DBName like db_toke ;-) -spec new(DBName::nonempty_string()) -> db(). new(_DBName) -> % A disk backend happens in some host directory Dir = util:make_filename(atom_to_list(node())), % like nonode@nohost Prefix = case config:read(db_directory) of Atom when is_atom(Atom) -> "../data"; % case of undefined, failed, not_found, ok... Value -> Value end, HostDir = lists:flatten([Prefix, "/", Dir]), case file:make_dir(HostDir) of ok -> ok; {error, eexist} -> ok; % one host directory can host several db directories {error, Error} -> exit({db_hanoidb, 'cannot create dir', HostDir, Error}) end, % In the host directory we need a database directory (under which hanoidb puts many level files) {_Now_Ms, _Now_s, Now_us} = Now = erlang:now(), {{Year, Month, Day}, {Hour, Minute, Second}} = calendar:now_to_local_time(Now), DirBaseName = util:make_filename( io_lib:format("db_~B-~2..0B-~2..0B_~2..0B-~2..0B-~2..0B_~6..0B", [Year, Month, Day, Hour, Minute, Second, Now_us]) ), FullDirName = lists:flatten([HostDir, "/", DirBaseName]), % directory, no trailing ".hdb" % Init new DB file (actually a subtree) new_db(FullDirName, []). % hanoidb's default options. May need tuning. %% @doc Re-opens an existing-on-disk database. %% BEWARE: use with caution in order to preserve consistency! -spec open(DBName::nonempty_string()) -> db(). open(FileName) -> new_db(FileName, []). % hanoidb's default options. May need tuning. -spec new_db(DirName::string(), HanoiOptions::[hanoidb_config_option()]) -> db(). new_db(DirName, HanoiOptions) -> case hanoidb:open(DirName, HanoiOptions) of % Not a file but a dir store {ok, Tree} -> {Tree, DirName}; ignore -> log:log(error, "[ Node ~w:db_hanoidb ] ~.0p", [self(), ignore]), erlang:error({hanoidb_failed, ignore}); {error, Error2} -> log:log(error, "[ Node ~w:db_hanoidb ] ~.0p", [self(), Error2]), erlang:error({hanoidb_failed, Error2}) end. %% @doc Closes the DB named DBName keeping its data on disk. -spec close(DB::db()) -> true. close({DB, _FileName}) -> ok = hanoidb:close(DB). % hanoidb:stop(). Not needed. %% @doc Closes and deletes the DB named DBName -spec close_and_delete(DB::db()) -> true. close_and_delete({_DB, DirName} = State) -> close(State), % A disk backend happens in some directory Dir = util:make_filename(atom_to_list(node())), % like nonode@nohost Prefix = case config:read(db_directory) of Atom when is_atom(Atom) -> "../data"; % case of undefined, failed, not_found, ok... Value -> Value end, FullDir = lists:flatten([Prefix, "/", Dir]), FullDirName = lists:flatten([FullDir, "/", DirName]), % no trailing ".hdb" {ok, Files} = file:list_dir(FullDirName), lists:map(fun file:delete/1, Files), % remove all files case file:del_dir(FullDirName) of % remove dir (rm -rf) not only file ok -> ok; {error, Reason} -> log:log(error, "[ Node ~w:db_hanoidb ] deleting ~.0p failed: ~.0p", [self(), FullDirName, Reason]) end. %% @doc Saves arbitrary tuple Entry in DB DBName and returns the new DB. %% The key is expected to be the first element of Entry. -spec put(DB::db(), Entry::entry()) -> db(). put({DB, _FileName} = State, Entry) -> ok = hanoidb:put(DB, ?IN(element(1, Entry)), ?IN(Entry) ), State. %% @doc Returns the entry that corresponds to Key or {} if no such tuple exists. -spec get(DB::db(), Key::key()) -> entry() | {}. get({DB, _FileName}, Key) -> case hanoidb:get(DB, ?IN(Key)) of not_found -> {}; {ok, Entry} -> ?OUT(Entry) end. %% @doc Deletes the tuple saved under Key and returns the new DB. %% If such a tuple does not exists nothing is changed. -spec delete(DB::db(), Key::key()) -> db(). delete({DB, _FileName} = State, Key) -> ok = hanoidb:delete(DB, ?IN(Key)), State. %% @doc Returns the name of the DB specified in @see new/1 and open/1. -spec get_name(DB::db()) -> nonempty_string(). get_name({_DB, FileName}) -> FileName. %% @doc Returns the number of stored keys. -spec get_load(DB::db()) -> non_neg_integer(). get_load({DB, _FileName}) -> %% TODO: not really efficient (maybe store the load in the DB?) hanoidb:fold(DB, fun (_K, _V, Load) -> Load + 1 end, 0). %% @doc Equivalent to hanoidb:fold(DB, Fun, Acc0). %% Which @equiv hanoidb:fold_range(DB, Fun, Acc0, #key_range{from_key=<<>>, to_key=undefined}). %% Returns a potentially larger-than-memory dataset. Use with care. -spec foldl(DB::db(), Fun::fun((Key::key(), AccIn::A) -> AccOut::A), Acc0::A) -> Acc1::A. foldl(State, Fun, Acc) -> %hanoidb:fold(DB, Fun, Acc) % TODO wouldn't this be better than helper=get_all_keys+lists:foldr ? foldl_helper(State, Fun, Acc, all, -1). %% @equiv foldl(DB, Fun, Acc0, Interval, get_load(DB)) %% @doc Returns a potentially larger-than-memory dataset. Use with care. -spec foldl(DB::db(), Fun::fun((Key::key(), AccIn::A) -> AccOut::A), Acc0::A, Interval::db_backend_beh:interval()) -> Acc1::A. foldl(State, Fun, Acc, Interval) -> %hanoidb:fold_range(DB, Fun, Acc, #key_range{from_key=K1, to_key=K2}) % TODO check it is possible foldl_helper(State, Fun, Acc, Interval, -1). %% @doc foldl iterates over DB and applies Fun(Entry, AccIn) to every element %% encountered in Interval. On the first call AccIn == Acc0. The iteration %% stops as soon as MaxNum elements have been encountered. %% Returns a potentially larger-than-memory dataset. Use with care. -spec foldl(DB::db(), Fun::fun((Key::key(), AccIn::A) -> AccOut::A), Acc0::A, Intervall::db_backend_beh:interval(), MaxNum::non_neg_integer()) -> Acc1::A. foldl(State, Fun, Acc, Interval, MaxNum) -> %% HINT %% Fun can only be applied in a second pass. It could do a delete (or other %% write op) but CAN HanoiDB handle writes whiles folding ? (TODO check YES?) %% Since we reversed the order while accumulating reverse it by using lists %% fold but "from the other side". TODO check this for HanoiDB %hanoidb:fold_range(DB, Fun, Acc, #key_range{limit=N, from_key=K1, to_key=K2}) % TODO check it is possible foldl_helper(State, Fun, Acc, Interval, MaxNum). %% @private this helper enables us to use -1 as MaxNum. MaxNum == -1 signals that all %% data is to be retrieved. -spec foldl_helper(DB::db(), Fun::fun((Key::key(), AccIn::A) -> AccOut::A), Acc0::A, Intervall::db_backend_beh:interval(), MaxNum::integer()) -> Acc1::A. foldl_helper({DB, _FileName}, Fun, Acc, Interval, MaxNum) -> Keys = get_all_keys(DB, Interval, MaxNum), % hopefully MaxNum caps it. lists:foldr(Fun, Acc, Keys). % db:foldL calls lists:foldR % TODO May be hanoidb:fold_range is less RAM intensive : no need to keep all keys in RAM at once, but continuous folding instead. %% @doc makes a foldr over the whole dataset. %% Returns a potentially larger-than-memory dataset. Use with care. -spec foldr(DB::db(), Fun::fun((Key::key(), AccIn::A) -> AccOut::A), Acc0::A) -> Acc1::A. foldr(State, Fun, Acc) -> foldr_helper(State, Fun, Acc, all, -1). %% @equiv foldr(DB, Fun, Acc0, Interval, get_load(DB)) %% @doc Returns a potentially larger-than-memory dataset. Use with care. -spec foldr(DB::db(), Fun::fun((Key::key(), AccIn::A) -> AccOut::A), Acc0::A, Interval::db_backend_beh:interval()) -> Acc1::A. foldr(State, Fun, Acc, Interval) -> foldr_helper(State, Fun, Acc, Interval, -1). %% @doc foldr iterates over DB and applies Fun(Entry, AccIn) to every element %% encountered in Interval. On the first call AccIn == Acc0. The iteration %% stops as soon as MaxNum elements have been encountered. %% Returns a potentially larger-than-memory dataset. Use with care. -spec foldr(DB::db(), Fun::fun((Key::key(), AccIn::A) -> AccOut::A), Acc0::A, Intervall::db_backend_beh:interval(), MaxNum::non_neg_integer()) -> Acc1::A. foldr(State, Fun, Acc, Interval, MaxNum) -> foldr_helper(State, Fun, Acc, Interval, MaxNum). %% @private this helper enables us to use -1 as MaxNum. MaxNum == -1 signals that all %% data is to be retrieved. -spec foldr_helper(DB::db(), Fun::fun((Key::key(), AccIn::A) -> AccOut::A), Acc0::A, Intervall::db_backend_beh:interval(), MaxNum::integer()) -> Acc1::A. foldr_helper({DB, _FileName}, Fun, Acc, Interval, MaxNum) -> % TODO evaluate hanoidb:fold_range(DB, Fun, Acc, #key_range{limit=N, from_key=K1, to_key=K2}) %% first only retrieve keys so we don't have to load the whole db into memory Keys = get_all_keys(DB, Interval, -1), CutData = case MaxNum of N when N < 0 -> Keys; _ -> lists:sublist(Keys, MaxNum) end, %% see HINT in foldl/5 %% now retrieve actual data lists:foldl(Fun, Acc, CutData). %% @private get_all_keys/3 retrieves all keys in DB that fall into Interval but %% not more than MaxNum. If MaxNum == -1 all Keys are retrieved. If %% MaxNum is positive it starts from the left in term order. -spec get_all_keys(pid(), db_backend_beh:interval(), -1 | non_neg_integer()) -> [key()]. get_all_keys(DB, Interval, MaxNum) -> % TODO evaluate converting scalaris:Intervals to hanoidb:ranges % in order to leverage hanoidb:fold rather than get_all_keys+lists:fold. {_Rest, Keys} = hanoidb:fold( DB, fun (_Key, _Entry, {0, _} = AccIn) -> AccIn; (Key, _Entry, {Max, KeyAcc} = AccIn) -> DeCoded = ?OUT(Key), case is_in(Interval, DeCoded) of true -> {Max - 1, [DeCoded | KeyAcc]}; _ -> AccIn end end, {MaxNum, []} ), Keys. is_in({Key}, OtherKey) -> Key =:= OtherKey; is_in(all, _Key) -> true; is_in({'(', L, R, ')'}, Key) -> Key > L andalso Key < R; is_in({'(', L, R, ']'}, Key) -> Key > L andalso ((Key < R) orelse (Key =:= R)); is_in({'[', L, R, ')'}, Key) -> ((Key > L) orelse (Key =:= L)) andalso Key < R; is_in({'[', L, R, ']'}, Key) -> ((Key > L) orelse (Key =:= L)) andalso ((Key < R) orelse (Key =:= R)). % End of file.