type tree =
| Leaf of char
| Node of tree * tree
type path =
string
let rec find (path : path) (i : int) (tree : tree) : char * int =
assert (0 <= i && i <= String.length path);
match tree with
| Leaf c ->
c, i
| Node (tree0, tree1) ->
assert (i < String.length path);
assert (path.[i] = '0' || path.[i] = '1');
find path (i + 1) (if path.[i] = '0' then tree0 else tree1)
module Q =
BinomialQueue.Make(struct
type t =
tree * int
let compare (_, freq1) (_, freq2) =
freq1 - freq2
end)
type alphabet =
(char, int) Hashtbl.t
let build_tree (alphabet : alphabet) : tree =
assert (Hashtbl.length alphabet >= 2);
let queue : Q.t =
Hashtbl.fold (fun symbol freq queue ->
Q.insert (Leaf symbol, freq) queue
) alphabet Q.empty
in
let rec process (queue : Q.t) : tree =
assert (not (Q.is_empty queue));
let (tree0, freq0), queue = Q.extract queue in
if Q.is_empty queue then
tree0
else
let (tree1, freq1), queue = Q.extract queue in
let tree = Node (tree0, tree1) in
let freq = freq0 + freq1 in
let queue = Q.insert (tree, freq) queue in
process queue
in
process queue
type cipher_text =
string
type encoding_dictionary =
(char, cipher_text) Hashtbl.t
let build_dictionary (tree : tree) : encoding_dictionary =
let dictionary = Hashtbl.create 256 in
let rec traverse (path : string) (tree : tree) : unit =
match tree with
| Leaf c ->
Hashtbl.add dictionary c path
| Node (tree0, tree1) ->
traverse (path ^ "0") tree0;
traverse (path ^ "1") tree1
in
traverse "" tree;
dictionary
let encode_char (dictionary : encoding_dictionary) (c : char) : cipher_text =
try
Hashtbl.find dictionary c
with Not_found ->
assert false
type plain_text =
string
let encode (dictionary : encoding_dictionary) (text : plain_text) : cipher_text =
let buffer = Buffer.create 1024 in
String.iter (fun c ->
Buffer.add_string buffer (encode_char dictionary c)
) text;
Buffer.contents buffer
let decode (tree : tree) (text : cipher_text) : plain_text =
let buffer = Buffer.create 1024 in
let rec loop i =
if i = String.length text then
Buffer.contents buffer
else begin
let c, i = find text i tree in
Buffer.add_char buffer c;
loop i
end
in
loop 0
type decoding_dictionary =
tree
let build_dictionaries alphabet =
let tree = build_tree alphabet in
let dictionary = build_dictionary tree in
dictionary, tree
let write (tree : tree) : string =
let buffer = Buffer.create 1024 in
let rec dump (tree : tree) =
match tree with
| Leaf c ->
Buffer.add_char buffer 'L';
Buffer.add_char buffer c
| Node (tree0, tree1) ->
Buffer.add_char buffer 'N';
dump tree0;
dump tree1
in
dump tree;
Buffer.contents buffer
let read (s : string) : tree =
let i = ref 0 in
let get() =
assert (!i < String.length s);
let c = s.[!i] in
incr i;
c
in
let rec read () : tree =
match get() with
| 'L' ->
let c = get() in
Leaf c
| 'N' ->
let tree0 = read() in
let tree1 = read() in
Node (tree0, tree1)
| _ ->
assert false
in
let tree = read() in
assert (!i = String.length s);
tree
let build_alphabet (text : plain_text) : alphabet =
let table = Hashtbl.create 256 in
String.iter (fun symbol ->
let freq =
try
Hashtbl.find table symbol
with Not_found ->
0
in
Hashtbl.replace table symbol (freq + 1)
) text;
table