From 9a2ccc39e099846e0aaf8a7b37f101b01cbaaafb Mon Sep 17 00:00:00 2001
From: John Whitington <john@coherentgraphics.co.uk>
Date: Wed, 8 Oct 2014 16:10:27 +0100
Subject: [PATCH] Added xmlm in, for use reading and writing XMP metadata

---
 Makefile |    2 +-
 xmlm.ml  | 1232 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 xmlm.mli |  833 ++++++++++++++++++++++++++++++++++++
 3 files changed, 2066 insertions(+), 1 deletion(-)
 create mode 100755 xmlm.ml
 create mode 100755 xmlm.mli

diff --git a/Makefile b/Makefile
index 96aec69..44b800e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 # Build the cpdf command line tools and top level
-MODS = cpdfstrftime cpdf cpdfcommand
+MODS = xmlm cpdfstrftime cpdf cpdfcommand
 
 SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
 
diff --git a/xmlm.ml b/xmlm.ml
new file mode 100755
index 0000000..9f78b4e
--- /dev/null
+++ b/xmlm.ml
@@ -0,0 +1,1232 @@
+(*---------------------------------------------------------------------------
+   Copyright 2007 Daniel C. Bünzli. All rights reserved.
+   Distributed under a BSD3 license, see license at the end of the file.
+   %%NAME%% release %%VERSION%%
+  ---------------------------------------------------------------------------*)
+
+module Std_string = String
+module Std_buffer = Buffer
+
+type std_string = string
+type std_buffer = Buffer.t
+
+module type String = sig
+  type t
+  val empty : t
+  val length : t -> int
+  val append : t -> t -> t
+  val lowercase : t -> t
+  val iter : (int -> unit) -> t -> unit
+  val of_string : std_string -> t
+  val to_utf_8 : ('a -> std_string -> 'a) -> 'a -> t -> 'a
+  val compare : t -> t -> int
+end
+
+module type Buffer = sig
+  type string
+  type t
+  exception Full
+  val create : int -> t
+  val add_uchar : t -> int -> unit
+  val clear : t -> unit
+  val contents : t -> string
+  val length : t -> int
+end
+
+module type S = sig
+  type string
+  type encoding = [
+    | `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE | `ISO_8859_1 | `US_ASCII ]
+  type dtd = string option
+  type name = string * string
+  type attribute = name * string
+  type tag = name * attribute list
+  type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ]
+
+  val ns_xml : string
+  val ns_xmlns : string
+
+  type pos = int * int
+  type error = [
+    | `Max_buffer_size
+    | `Unexpected_eoi
+    | `Malformed_char_stream
+    | `Unknown_encoding of string
+    | `Unknown_entity_ref of string
+    | `Unknown_ns_prefix of string
+    | `Illegal_char_ref of string
+    | `Illegal_char_seq of string
+    | `Expected_char_seqs of string list * string
+    | `Expected_root_element ]
+
+  exception Error of pos * error
+  val error_message : error -> string
+
+  type source = [
+    | `Channel of in_channel
+    | `String of int * std_string
+    | `Fun of (unit -> int) ]
+
+  type input
+
+  val make_input : ?enc:encoding option -> ?strip:bool ->
+    ?ns:(string -> string option) ->
+    ?entity: (string -> string option) -> source -> input
+
+  val input : input -> signal
+
+  val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a)  ->
+    input -> 'a
+
+  val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
+    input -> (dtd * 'a)
+
+  val peek : input -> signal
+  val eoi : input -> bool
+  val pos : input -> pos
+
+  type 'a frag = [ `El of tag * 'a list | `Data of string ]
+  type dest = [
+    | `Channel of out_channel | `Buffer of std_buffer | `Fun of (int -> unit) ]
+
+  type output
+  val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option ->
+    ?ns_prefix:(string -> string option) -> dest -> output
+
+  val output_depth : output -> int
+  val output : output -> signal -> unit
+  val output_tree : ('a -> 'a frag) -> output -> 'a -> unit
+  val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit
+end
+
+
+(* Unicode character lexers *)
+
+exception Malformed                 (* for character stream, internal only. *)
+
+let utf8_len = [|        (* Char byte length according to first UTF-8 byte. *)
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
+  0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
+  0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
+  0; 0; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
+  2; 2; 2; 2; 2; 2; 2; 2; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3;
+  4; 4; 4; 4; 4; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0 |]
+
+let uchar_utf8 i =
+  let b0 = i () in
+  begin match utf8_len.(b0) with
+  | 0 -> raise Malformed
+  | 1 -> b0
+  | 2 ->
+      let b1 = i () in
+      if b1 lsr 6 != 0b10 then raise Malformed else
+      ((b0 land 0x1F) lsl 6) lor (b1 land 0x3F)
+  | 3 ->
+      let b1 = i () in
+      let b2 = i () in
+      if b2 lsr 6 != 0b10 then raise Malformed else
+      begin match b0 with
+      | 0xE0 -> if b1 < 0xA0 || 0xBF < b1 then raise Malformed else ()
+      | 0xED -> if b1 < 0x80 || 0x9F < b1 then raise Malformed else ()
+      | _ -> if b1 lsr 6 != 0b10 then raise Malformed else ()
+      end;
+      ((b0 land 0x0F) lsl 12) lor ((b1 land 0x3F) lsl 6) lor (b2 land 0x3F)
+  | 4 ->
+      let b1 = i () in
+      let b2 = i () in
+      let b3 = i () in
+      if  b3 lsr 6 != 0b10 || b2 lsr 6 != 0b10 then raise Malformed else
+      begin match b0 with
+      | 0xF0 -> if b1 < 0x90 || 0xBF < b1 then raise Malformed else ()
+      | 0xF4 -> if b1 < 0x80 || 0x8F < b1 then raise Malformed else ()
+      | _ -> if b1 lsr 6 != 0b10 then raise Malformed else ()
+      end;
+      ((b0 land 0x07) lsl 18) lor ((b1 land 0x3F) lsl 12) lor
+      ((b2 land 0x3F) lsl 6) lor (b3 land 0x3F)
+  | _ -> assert false
+  end
+
+let int16_be i =
+  let b0 = i () in
+  let b1 = i () in
+  (b0 lsl 8) lor b1
+
+let int16_le i =
+  let b0 = i () in
+  let b1 = i () in
+  (b1 lsl 8) lor b0
+
+let uchar_utf16 int16 i =
+  let c0 = int16 i in
+  if c0 < 0xD800 || c0 > 0xDFFF then c0 else
+  if c0 > 0xDBFF then raise Malformed else
+  let c1 = int16 i in
+  (((c0 land 0x3FF) lsl 10) lor (c1 land 0x3FF)) + 0x10000
+
+let uchar_utf16be = uchar_utf16 int16_be
+let uchar_utf16le = uchar_utf16 int16_le
+let uchar_byte i = i ()
+let uchar_iso_8859_1 i = i ()
+let uchar_ascii i = let b = i () in if b > 127 then raise Malformed else b
+
+(* Functorized streaming XML IO *)
+
+module Make (String : String) (Buffer : Buffer with type string = String.t) =
+struct
+  type string = String.t
+
+  let str = String.of_string
+  let str_eq s s' = (compare s s') = 0
+  let str_empty s = (compare s String.empty) = 0
+  let cat = String.append
+  let str_of_char u =
+    let b = Buffer.create 4 in
+    Buffer.add_uchar b u;
+    Buffer.contents b
+
+  module Ht = Hashtbl.Make (struct type t = string
+      let equal = str_eq
+      let hash = Hashtbl.hash end)
+
+  let u_nl = 0x000A     (* newline *)
+  let u_cr = 0x000D     (* carriage return *)
+  let u_space = 0x0020  (* space *)
+  let u_quot = 0x0022   (* quote *)
+  let u_sharp = 0x0023  (* # *)
+  let u_amp = 0x0026    (* & *)
+  let u_apos = 0x0027   (* ' *)
+  let u_minus = 0x002D  (* - *)
+  let u_slash = 0x002F  (* / *)
+  let u_colon = 0x003A  (* : *)
+  let u_scolon = 0x003B (* ; *)
+  let u_lt = 0x003C     (* < *)
+  let u_eq = 0x003D     (* = *)
+  let u_gt = 0x003E     (* > *)
+  let u_qmark = 0x003F  (* ? *)
+  let u_emark = 0x0021  (* ! *)
+  let u_lbrack = 0x005B (* [ *)
+  let u_rbrack = 0x005D (* ] *)
+  let u_x = 0x0078      (* x *)
+  let u_bom = 0xFEFF    (* BOM *)
+  let u_9 = 0x0039      (* 9 *)
+  let u_F = 0x0046      (* F *)
+  let u_D = 0X0044      (* D *)
+
+  let s_cdata = str "CDATA["
+  let ns_xml = str "http://www.w3.org/XML/1998/namespace"
+  let ns_xmlns = str "http://www.w3.org/2000/xmlns/"
+  let n_xml = str "xml"
+  let n_xmlns = str "xmlns"
+  let n_space = str "space"
+  let n_version = str "version"
+  let n_encoding = str "encoding"
+  let n_standalone = str "standalone"
+  let v_yes = str "yes"
+  let v_no = str "no"
+  let v_preserve = str "preserve"
+  let v_default = str "default"
+  let v_version_1_0 = str "1.0"
+  let v_version_1_1 = str "1.1"
+  let v_utf_8 = str "utf-8"
+  let v_utf_16 = str "utf-16"
+  let v_utf_16be = str "utf-16be"
+  let v_utf_16le = str "utf-16le"
+  let v_iso_8859_1 = str "iso-8859-1"
+  let v_us_ascii = str "us-ascii"
+  let v_ascii = str "ascii"
+
+  let name_str (p,l) = if str_empty p then l else cat p (cat (str ":") l)
+
+  (* Basic types and values *)
+
+  type encoding = [
+    | `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE | `ISO_8859_1 | `US_ASCII ]
+  type dtd = string option
+  type name = string * string
+  type attribute = name * string
+  type tag = name * attribute list
+  type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ]
+
+  (* Input *)
+
+  type pos = int * int
+  type error = [
+    | `Max_buffer_size
+    | `Unexpected_eoi
+    | `Malformed_char_stream
+    | `Unknown_encoding of string
+    | `Unknown_entity_ref of string
+    | `Unknown_ns_prefix of string
+    | `Illegal_char_ref of string
+    | `Illegal_char_seq of string
+    | `Expected_char_seqs of string list * string
+    | `Expected_root_element ]
+
+  exception Error of pos * error
+
+  let error_message e =
+    let bracket l v r = cat (str l) (cat v (str r)) in
+    match e with
+    | `Expected_root_element -> str "expected root element"
+    | `Max_buffer_size -> str "maximal buffer size exceeded"
+    | `Unexpected_eoi -> str "unexpected end of input"
+    | `Malformed_char_stream -> str "malformed character stream"
+    | `Unknown_encoding e -> bracket "unknown encoding (" e ")"
+    | `Unknown_entity_ref e -> bracket "unknown entity reference (" e ")"
+    | `Unknown_ns_prefix e -> bracket "unknown namespace prefix (" e ")"
+    | `Illegal_char_ref s -> bracket "illegal character reference (#" s ")"
+    | `Illegal_char_seq s ->
+        bracket "character sequence illegal here (\"" s "\")"
+    | `Expected_char_seqs (exps, fnd) ->
+        let exps =
+          let exp acc v = cat acc (bracket "\"" v "\", ") in
+          List.fold_left exp String.empty exps
+        in
+        cat (str "expected one of these character sequence: ")
+          (cat exps (bracket "found \"" fnd "\""))
+
+  type limit =                                       (* XML is odd to parse. *)
+    | Stag of name   (* '<' qname *)
+    | Etag of name   (* '</' qname whitespace* *)
+    | Pi of name     (* '<?' qname *)
+    | Comment        (* '<!--' *)
+    | Cdata          (* '<![CDATA[' *)
+    | Dtd            (* '<!' *)
+    | Text           (* other character *)
+    | Eoi            (* End of input *)
+
+  type source = [
+    | `Channel of in_channel
+    | `String of int * std_string
+    | `Fun of (unit -> int) ]
+
+  type input =
+    { enc : encoding option;                           (* Expected encoding. *)
+      strip : bool;               (* Whitespace stripping default behaviour. *)
+      fun_ns : string -> string option;               (* Namespace callback. *)
+      fun_entity : string -> string option;    (* Entity reference callback. *)
+      i : unit -> int;                                  (* Byte level input. *)
+      mutable uchar : (unit -> int) -> int;      (* Unicode character lexer. *)
+      mutable c : int;                               (* Character lookahead. *)
+      mutable cr : bool;                         (* True if last u was '\r'. *)
+      mutable line : int;                            (* Current line number. *)
+      mutable col : int;                           (* Current column number. *)
+      mutable limit : limit;                           (* Last parsed limit. *)
+      mutable peek : signal;                            (* Signal lookahead. *)
+      mutable stripping : bool;             (* True if stripping whitespace. *)
+      mutable last_white : bool;             (* True if last char was white. *)
+      mutable scopes : (name * string list * bool) list;
+         (* Stack of qualified el. name, bound prefixes and strip behaviour. *)
+      ns : string Ht.t;                           (* prefix -> uri bindings. *)
+      ident : Buffer.t;                 (* Buffer for names and entity refs. *)
+      data : Buffer.t; }         (* Buffer for character and attribute data. *)
+
+  let err_input_tree = "input signal not `El_start or `Data"
+  let err_input_doc_tree = "input signal not `Dtd"
+  let err i e = raise (Error ((i.line, i.col), e))
+  let err_illegal_char i u = err i (`Illegal_char_seq (str_of_char u))
+  let err_expected_seqs i exps s = err i (`Expected_char_seqs (exps, s))
+  let err_expected_chars i exps =
+    err i (`Expected_char_seqs (List.map str_of_char exps, str_of_char i.c))
+
+  let u_eoi = max_int
+  let u_start_doc = u_eoi - 1
+  let u_end_doc = u_start_doc - 1
+  let signal_start_stream = `Data String.empty
+
+  let make_input ?(enc = None) ?(strip = false) ?(ns = fun _ -> None)
+      ?(entity = fun _ -> None) src =
+    let i = match src with
+    | `Fun f -> f
+    | `Channel ic -> fun () -> input_byte ic
+    | `String (pos, s) ->
+        let len = Std_string.length s in
+        let pos = ref (pos - 1) in
+        fun () ->
+          incr pos;
+          if !pos = len then raise End_of_file else
+          Char.code (Std_string.get s !pos)
+    in
+    let bindings =
+      let h = Ht.create 15 in
+      Ht.add h String.empty String.empty;
+      Ht.add h n_xml ns_xml;
+      Ht.add h n_xmlns ns_xmlns;
+      h
+    in
+    { enc = enc; strip = strip; fun_ns  = ns; fun_entity = entity;
+      i = i; uchar = uchar_byte; c = u_start_doc; cr = false;
+      line = 1; col = 0; limit = Text; peek = signal_start_stream;
+      stripping = strip; last_white = true; scopes = []; ns = bindings;
+      ident = Buffer.create 64; data = Buffer.create 1024; }
+
+  (* Bracketed non-terminals in comments refer to XML 1.0 non terminals *)
+
+  let r : int -> int -> int -> bool = fun u a b -> a <= u && u <= b
+  let is_white = function 0x0020 | 0x0009 | 0x000D | 0x000A -> true | _ -> false
+
+  let is_char = function                                           (* {Char} *)
+  | u when r u 0x0020 0xD7FF -> true
+  | 0x0009 | 0x000A | 0x000D -> true
+  | u when r u 0xE000 0xFFFD || r u 0x10000 0x10FFFF -> true
+  | _ -> false
+
+  let is_digit u = r u 0x0030 0x0039
+  let is_hex_digit u =
+    r u 0x0030 0x0039 || r u 0x0041 0x0046 || r u 0x0061 0x0066
+
+  let comm_range u =                            (* common to functions below *)
+    r u 0x00C0 0x00D6 || r u 0x00D8 0x00F6 || r u 0x00F8 0x02FF ||
+    r u 0x0370 0x037D || r u 0x037F 0x1FFF || r u 0x200C 0x200D ||
+    r u 0x2070 0x218F || r u 0x2C00 0x2FEF || r u 0x3001 0xD7FF ||
+    r u 0xF900 0xFDCF || r u 0xFDF0 0xFFFD || r u 0x10000 0xEFFFF
+
+  let is_name_start_char = function       (* {NameStartChar} - ':' (XML 1.1) *)
+  | u when r u 0x0061 0x007A || r u 0x0041 0x005A -> true   (* [a-z] | [A-Z] *)
+  | u when is_white u -> false
+  | 0x005F -> true                                                    (* '_' *)
+  | u when comm_range u -> true
+  | _ -> false
+
+  let is_name_char = function                  (* {NameChar} - ':' (XML 1.1) *)
+  | u when r u 0x0061 0x007A || r u 0x0041 0x005A -> true   (* [a-z] | [A-Z] *)
+  | u when is_white u -> false
+  | u when  r u 0x0030 0x0039 -> true                               (* [0-9] *)
+  | 0x005F | 0x002D | 0x002E | 0x00B7 -> true                 (* '_' '-' '.' *)
+  | u when comm_range u || r u 0x0300 0x036F || r u 0x203F 0x2040 -> true
+  | _ -> false
+
+  let rec nextc i =
+    if i.c = u_eoi then err i `Unexpected_eoi;
+    if i.c = u_nl then (i.line <- i.line + 1; i.col <- 1)
+    else i.col <- i.col + 1;
+    i.c <- i.uchar i.i;
+    if not (is_char i.c) then raise Malformed;
+    if i.cr && i.c = u_nl then i.c <- i.uchar i.i;       (* cr nl business *)
+    if i.c = u_cr then (i.cr <- true; i.c <- u_nl) else i.cr <- false
+
+  let nextc_eof i = try nextc i with End_of_file -> i.c <- u_eoi
+  let skip_white i = while (is_white i.c) do nextc i done
+  let skip_white_eof i = while (is_white i.c) do nextc_eof i done
+  let accept i c = if i.c = c then nextc i else err_expected_chars i [ c ]
+
+  let clear_ident i = Buffer.clear i.ident
+  let clear_data i = Buffer.clear i.data
+  let addc_ident i c = Buffer.add_uchar i.ident c
+  let addc_data i c = Buffer.add_uchar i.data c
+
+  let addc_data_strip i c =
+    if is_white c then i.last_white <- true else
+    begin
+      if i.last_white && Buffer.length i.data <> 0 then addc_data i u_space;
+      i.last_white <- false;
+      addc_data i c
+    end
+
+  let expand_name i (prefix, local) =
+    let external_ prefix = match i.fun_ns prefix with
+    | None -> err i (`Unknown_ns_prefix prefix)
+    | Some uri -> uri
+    in
+    try
+      let uri = Ht.find i.ns prefix in
+      if not (str_empty uri) then (uri, local) else
+      if str_empty prefix then String.empty, local else
+      (external_ prefix), local              (* unbound with xmlns:prefix="" *)
+    with Not_found -> external_ prefix, local
+
+  let find_encoding i =                                    (* Encoding mess. *)
+    let reset uchar i = i.uchar <- uchar; i.col <- 0; nextc i in
+    match i.enc with
+    | None ->                                 (* User doesn't know encoding. *)
+        begin match nextc i; i.c with
+        | 0xFE ->                                           (* UTF-16BE BOM. *)
+            nextc i; if i.c <> 0xFF then err i `Malformed_char_stream;
+            reset uchar_utf16be i;
+            true
+        | 0xFF ->                                           (* UTF-16LE BOM. *)
+            nextc i; if i.c <> 0xFE then err i `Malformed_char_stream;
+            reset uchar_utf16le i;
+            true
+        | 0xEF ->                                              (* UTF-8 BOM. *)
+            nextc i; if i.c <> 0xBB then err i `Malformed_char_stream;
+            nextc i; if i.c <> 0xBF then err i `Malformed_char_stream;
+            reset uchar_utf8 i;
+            true
+        | 0x3C | _ ->                    (* UTF-8 or other, try declaration. *)
+            i.uchar <- uchar_utf8;
+            false
+        end
+    | Some e ->                                      (* User knows encoding. *)
+        begin match e with
+        | `US_ASCII -> reset uchar_ascii i
+        | `ISO_8859_1 -> reset uchar_iso_8859_1 i
+        | `UTF_8 ->                                  (* Skip BOM if present. *)
+            reset uchar_utf8 i; if i.c = u_bom then (i.col <- 0; nextc i)
+        | `UTF_16 ->                             (* Which UTF-16 ? look BOM. *)
+            let b0 = nextc i; i.c in
+            let b1 = nextc i; i.c in
+            begin match b0, b1 with
+            | 0xFE, 0xFF -> reset uchar_utf16be i
+            | 0xFF, 0xFE -> reset uchar_utf16le i
+            | _ -> err i `Malformed_char_stream;
+            end
+        | `UTF_16BE ->                               (* Skip BOM if present. *)
+            reset uchar_utf16be i; if i.c = u_bom then (i.col <- 0; nextc i)
+        | `UTF_16LE ->
+            reset uchar_utf16le i; if i.c = u_bom then (i.col <- 0; nextc i)
+        end;
+        true                                      (* Ignore xml declaration. *)
+
+
+  let p_ncname i =                               (* {NCName} (Namespace 1.1) *)
+    clear_ident i;
+    if not (is_name_start_char i.c) then err_illegal_char i i.c else
+    begin
+      addc_ident i i.c; nextc i;
+      while is_name_char i.c do addc_ident i i.c; nextc i done;
+      Buffer.contents i.ident
+    end
+
+  let p_qname i =                                 (* {QName} (Namespace 1.1) *)
+    let n = p_ncname i in
+    if i.c <> u_colon then (String.empty, n) else (nextc i; (n, p_ncname i))
+
+  let p_charref i =                             (* {CharRef}, '&' was eaten. *)
+    let c = ref 0 in
+    clear_ident i;
+    nextc i;
+    if i.c = u_scolon then err i (`Illegal_char_ref String.empty) else
+    begin
+      try
+        if i.c = u_x then
+          begin
+            addc_ident i i.c;
+            nextc i;
+            while (i.c <> u_scolon) do
+              addc_ident i i.c;
+              if not (is_hex_digit i.c) then raise Exit else
+              c := !c * 16 + (if i.c <= u_9 then i.c - 48 else
+                              if i.c <= u_F then i.c - 55 else
+                              i.c - 87);
+              nextc i;
+            done
+          end
+        else
+        while (i.c <> u_scolon) do
+          addc_ident i i.c;
+          if not (is_digit i.c) then raise Exit else
+          c := !c * 10 + (i.c - 48);
+          nextc i
+        done
+      with Exit ->
+        c := -1; while i.c <> u_scolon do addc_ident i i.c; nextc i done
+    end;
+    nextc i;
+    if is_char !c then (clear_ident i; addc_ident i !c; Buffer.contents i.ident)
+    else err i (`Illegal_char_ref (Buffer.contents i.ident))
+
+  let predefined_entities =
+    let h = Ht.create 5 in
+    let e k v = Ht.add h (str k) (str v) in
+    e "lt" "<"; e "gt" ">"; e "amp" "&"; e "apos" "'"; e "quot" "\"";
+    h
+
+  let p_entity_ref i =                        (* {EntityRef}, '&' was eaten. *)
+    let ent = p_ncname i in
+    accept i u_scolon;
+    try Ht.find predefined_entities ent with Not_found ->
+      match i.fun_entity ent with
+      | Some s -> s
+      | None -> err i (`Unknown_entity_ref ent)
+
+  let p_reference i =                                         (* {Reference} *)
+    nextc i; if i.c = u_sharp then p_charref i else p_entity_ref i
+
+  let p_attr_value i =                                    (* {S}? {AttValue} *)
+    skip_white i;
+    let delim =
+      if i.c = u_quot || i.c = u_apos then i.c else
+      err_expected_chars i [ u_quot; u_apos]
+    in
+    nextc i;
+    skip_white i;
+    clear_data i;
+    i.last_white <- true;
+    while (i.c <> delim) do
+      if i.c = u_lt then err_illegal_char i u_lt else
+      if i.c = u_amp then String.iter (addc_data_strip i) (p_reference i)
+      else (addc_data_strip i i.c; nextc i)
+    done;
+    nextc i;
+    Buffer.contents i.data
+
+  let p_attributes i =                            (* ({S} {Attribute})* {S}? *)
+    let rec aux i pre_acc acc =
+      if not (is_white i.c) then pre_acc, acc else
+      begin
+        skip_white i;
+        if i.c = u_slash || i.c = u_gt then pre_acc, acc else
+        begin
+          let (prefix, local) as n = p_qname i in
+          let v = skip_white i; accept i u_eq; p_attr_value i in
+          let att = n, v in
+          if str_empty prefix && str_eq local n_xmlns then
+            begin  (* xmlns *)
+              Ht.add i.ns String.empty v;
+              aux i (String.empty :: pre_acc) (att :: acc)
+            end
+          else if str_eq prefix n_xmlns then
+            begin  (* xmlns:local *)
+              Ht.add i.ns local v;
+              aux i (local :: pre_acc) (att :: acc)
+            end
+          else if str_eq prefix n_xml && str_eq local n_space then
+            begin  (* xml:space *)
+              if str_eq v v_preserve then i.stripping <- false else
+              if str_eq v v_default then i.stripping <- i.strip else ();
+              aux i pre_acc (att :: acc)
+            end
+          else
+          aux i pre_acc (att :: acc)
+        end
+      end
+    in
+    aux i [] []           (* Returns a list of bound prefixes and attributes *)
+
+  let p_limit i =                                   (* Parses a markup limit *)
+    i.limit <-
+      if i.c = u_eoi then Eoi else
+      if i.c <> u_lt then Text else
+      begin
+        nextc i;
+        if i.c = u_qmark then (nextc i; Pi (p_qname i)) else
+        if i.c = u_slash then
+          begin
+            nextc i;
+            let n = p_qname i in
+            skip_white i;
+            Etag n
+          end
+        else if i.c = u_emark then
+          begin
+            nextc i;
+            if i.c = u_minus then (nextc i; accept i u_minus; Comment) else
+            if i.c = u_D then Dtd else
+            if i.c = u_lbrack then
+              begin
+                nextc i;
+                clear_ident i;
+                for k = 1 to 6 do (addc_ident i i.c; nextc i) done;
+                let cdata = Buffer.contents i.ident in
+                if str_eq cdata s_cdata then Cdata else
+                err_expected_seqs i [ s_cdata ] cdata
+              end
+            else
+            err i (`Illegal_char_seq (cat (str "<!") (str_of_char i.c)))
+          end
+        else
+        Stag (p_qname i)
+      end
+
+  let rec skip_comment i =                    (* {Comment}, '<!--' was eaten *)
+    while (i.c <> u_minus) do nextc i done;
+    nextc i;
+    if i.c <> u_minus then skip_comment i else
+    begin
+      nextc i;
+      if i.c <> u_gt then err_expected_chars i [ u_gt ];
+      nextc_eof i
+    end
+
+  let rec skip_pi i =                          (* {PI}, '<?' qname was eaten *)
+    while (i.c <> u_qmark) do nextc i done;
+    nextc i;
+    if i.c <> u_gt then skip_pi i else nextc_eof i
+
+  let rec skip_misc i ~allow_xmlpi = match i.limit with          (* {Misc}* *)
+  | Pi (p,l) when (str_empty p && str_eq n_xml (String.lowercase l)) ->
+      if allow_xmlpi then () else err i (`Illegal_char_seq l)
+  | Pi _ -> skip_pi i; p_limit i; skip_misc i ~allow_xmlpi
+  | Comment -> skip_comment i; p_limit i; skip_misc i ~allow_xmlpi
+  | Text when is_white i.c ->
+      skip_white_eof i; p_limit i; skip_misc i ~allow_xmlpi
+  | _ -> ()
+
+  let p_chardata addc i =           (* {CharData}* ({Reference}{Chardata})* *)
+    while (i.c <> u_lt) do
+      if i.c = u_amp then String.iter (addc i) (p_reference i)
+      else if i.c = u_rbrack then
+        begin
+          addc i i.c;
+          nextc i;
+          if i.c = u_rbrack then begin
+            addc i i.c;
+            nextc i;                                   (* detects ']'*']]>' *)
+            while (i.c = u_rbrack) do addc i i.c; nextc i done;
+            if i.c = u_gt then err i (`Illegal_char_seq (str "]]>"));
+          end
+        end
+      else
+      (addc i i.c; nextc i)
+    done
+
+  let rec p_cdata addc i =                               (* {CData} {CDEnd} *)
+    try while (true) do
+        if i.c = u_rbrack then begin
+          nextc i;
+          while i.c = u_rbrack do
+            nextc i;
+            if i.c = u_gt then (nextc i; raise Exit);
+            addc i u_rbrack
+          done;
+          addc i u_rbrack;
+        end;
+        addc i i.c;
+        nextc i;
+      done with Exit -> ()
+
+  let p_xml_decl i ~ignore_enc ~ignore_utf16 =                (* {XMLDecl}? *)
+    let yes_no = [v_yes; v_no] in
+    let p_val i = skip_white i; accept i u_eq; skip_white i; p_attr_value i in
+    let p_val_exp i exp =
+      let v = p_val i in
+      if not (List.exists (str_eq v) exp) then err_expected_seqs i exp v
+    in
+    match i.limit with
+    | Pi (p, l) when (str_empty p && str_eq l n_xml) ->
+        let v = skip_white i; p_ncname i in
+        if not (str_eq v n_version) then err_expected_seqs i [ n_version ] v;
+        p_val_exp i [v_version_1_0; v_version_1_1];
+        skip_white i;
+        if i.c <> u_qmark then begin
+          let n = p_ncname i in
+          if str_eq n n_encoding then begin
+            let enc = String.lowercase (p_val i) in
+            if not ignore_enc then begin
+              if str_eq enc v_utf_8 then i.uchar <- uchar_utf8 else
+              if str_eq enc v_utf_16be then i.uchar <- uchar_utf16be else
+              if str_eq enc v_utf_16le then i.uchar <- uchar_utf16le else
+              if str_eq enc v_iso_8859_1 then i.uchar <- uchar_iso_8859_1 else
+              if str_eq enc v_us_ascii then i.uchar <- uchar_ascii else
+              if str_eq enc v_ascii then i.uchar <- uchar_ascii else
+              if str_eq enc v_utf_16 then
+                if ignore_utf16 then () else (err i `Malformed_char_stream)
+                (* A BOM should have been found. *)
+              else
+              err i (`Unknown_encoding enc)
+            end;
+            skip_white i;
+            if i.c <> u_qmark then begin
+              let n = p_ncname i in
+              if str_eq n n_standalone then p_val_exp i yes_no else
+              err_expected_seqs i [ n_standalone; str "?>" ] n
+            end
+          end
+          else if str_eq n n_standalone then
+            p_val_exp i yes_no
+          else
+          err_expected_seqs i [ n_encoding; n_standalone; str "?>" ] n
+        end;
+        skip_white i;
+        accept i u_qmark;
+        accept i u_gt;
+        p_limit i
+    | _ -> ()
+
+  let p_dtd_signal i =(* {Misc}* {doctypedecl} {Misc}* *)
+    skip_misc i ~allow_xmlpi:false;
+    if i.limit <> Dtd then `Dtd None else
+    begin
+      let buf = addc_data i in
+      let nest = ref 1 in
+      clear_data i;
+      buf u_lt; buf u_emark;                             (* add eaten "<!" *)
+      while (!nest > 0) do
+        if i.c = u_lt then
+          begin
+            nextc i;
+            if i.c <> u_emark then
+              (buf u_lt; incr nest)
+            else
+            begin
+              nextc i;
+              if i.c <> u_minus then         (* Carefull with comments ! *)
+                (buf u_lt; buf u_emark; incr nest)
+              else
+              begin
+                nextc i;
+                if i.c <> u_minus then
+                  (buf u_lt; buf u_emark; buf u_minus; incr nest)
+                else
+                (nextc i; skip_comment i)
+              end
+            end
+          end
+        else if i.c = u_quot || i.c = u_apos then
+          begin
+            let c = i.c in
+            buf c; nextc i;
+            while (i.c <> c) do (buf i.c; nextc i) done;
+            buf c; nextc i
+          end
+        else if i.c = u_gt then (buf u_gt; nextc i; decr nest)
+        else (buf i.c; nextc i)
+      done;
+      let dtd = Buffer.contents i.data in
+      p_limit i;
+      skip_misc i ~allow_xmlpi:false;
+      `Dtd (Some dtd);
+    end
+
+  let p_data i =
+    let rec bufferize addc i = match i.limit with
+    | Text -> p_chardata addc i; p_limit i; bufferize addc i
+    | Cdata -> p_cdata addc i; p_limit i; bufferize addc i
+    | (Stag _ | Etag _) -> ()
+    | Pi _ -> skip_pi i; p_limit i; bufferize addc i
+    | Comment -> skip_comment i; p_limit i; bufferize addc i
+    | Dtd -> err i (`Illegal_char_seq (str "<!D"))
+    | Eoi -> err i `Unexpected_eoi
+    in
+    clear_data i;
+    i.last_white <- true;
+    bufferize (if i.stripping then addc_data_strip else addc_data) i;
+    let d = Buffer.contents i.data in
+    d
+
+  let p_el_start_signal i n =
+    let expand_att (((prefix, local) as n, v) as att) =
+      if not (str_eq prefix String.empty) then expand_name i n, v else
+      if str_eq local n_xmlns then (ns_xmlns, n_xmlns), v else
+      att (* default namespaces do not influence attributes. *)
+    in
+    let strip = i.stripping in  (* save it here, p_attributes may change it. *)
+    let prefixes, atts = p_attributes i in
+    i.scopes <- (n, prefixes, strip) :: i.scopes;
+    `El_start ((expand_name i n), List.rev_map expand_att atts)
+
+  let p_el_end_signal i n = match i.scopes with
+  | (n', prefixes, strip) :: scopes ->
+      if i.c <> u_gt then err_expected_chars i [ u_gt ];
+      if not (str_eq n n') then err_expected_seqs i [name_str n'] (name_str n);
+      i.scopes <- scopes;
+      i.stripping <- strip;
+      List.iter (Ht.remove i.ns) prefixes;
+      if scopes = [] then i.c <- u_end_doc else (nextc i; p_limit i);
+      `El_end
+  | _ -> assert false
+
+  let p_signal i =
+    if i.scopes = [] then
+      match i.limit with
+      | Stag n -> p_el_start_signal i n
+      | _ -> err i `Expected_root_element
+    else
+    let rec find i = match i.limit with
+    | Stag n -> p_el_start_signal i n
+    | Etag n -> p_el_end_signal i n
+    | Text | Cdata ->
+        let d = p_data i in
+        if str_empty d then find i else `Data d
+    | Pi _ -> skip_pi i; p_limit i; find i
+    | Comment -> skip_comment i; p_limit i; find i
+    | Dtd -> err i (`Illegal_char_seq (str "<!D"))
+    | Eoi -> err i `Unexpected_eoi
+    in
+    begin match i.peek with
+    | `El_start (n, _) ->                   (* finish to input start el. *)
+        skip_white i;
+        if i.c = u_gt then (accept i u_gt; p_limit i) else
+        if i.c = u_slash then
+          begin
+            let tag = match i.scopes with
+            | (tag, _, _) :: _ -> tag | _ -> assert false
+            in
+            (nextc i; i.limit <- Etag tag)
+          end
+        else
+        err_expected_chars i [ u_slash; u_gt ]
+    | _ -> ()
+    end;
+    find i
+
+  let eoi i =
+    try
+      if i.c = u_eoi then true else
+      if i.c <> u_start_doc then false else                (* In a document. *)
+      if i.peek <> `El_end then               (* Start of document sequence. *)
+        begin
+          let ignore_enc = find_encoding i in
+          p_limit i;
+          p_xml_decl i ~ignore_enc ~ignore_utf16:false;
+          i.peek <- p_dtd_signal i;
+          false
+        end
+      else                                          (* Subsequent documents. *)
+      begin
+        nextc_eof i;
+        p_limit i;
+        if i.c = u_eoi then true else
+        begin
+          skip_misc i ~allow_xmlpi:true;
+          if i.c = u_eoi then true else
+          begin
+            p_xml_decl i ~ignore_enc:false ~ignore_utf16:true;
+            i.peek <- p_dtd_signal i;
+            false
+          end
+        end
+      end
+    with
+    | Buffer.Full -> err i `Max_buffer_size
+    | Malformed -> err i `Malformed_char_stream
+    | End_of_file -> err i `Unexpected_eoi
+
+  let peek i = if eoi i then err i `Unexpected_eoi else i.peek
+
+  let input i =
+    try
+      if i.c = u_end_doc then (i.c <- u_start_doc; i.peek) else
+      let s = peek i in
+      i.peek <- p_signal i;
+      s
+    with
+    | Buffer.Full -> err i `Max_buffer_size
+    | Malformed -> err i `Malformed_char_stream
+    | End_of_file -> err i `Unexpected_eoi
+
+  let input_tree ~el ~data i = match input i with
+  | `Data d -> data d
+  | `El_start tag ->
+      let rec aux i tags context = match input i with
+      | `El_start tag -> aux i (tag :: tags) ([] :: context)
+      | `El_end ->
+          begin match tags, context with
+          | tag :: tags', childs :: context' ->
+              let el = el tag (List.rev childs) in
+              begin match context' with
+              | parent :: context'' -> aux i tags' ((el :: parent) :: context'')
+              | [] -> el
+              end
+          | _ -> assert false
+          end
+      | `Data d ->
+          begin match context with
+          | childs :: context' -> aux i tags (((data d) :: childs) :: context')
+          | [] -> assert false
+          end
+      | `Dtd _ -> assert false
+      in
+      aux i (tag :: []) ([] :: [])
+  | _ -> invalid_arg err_input_tree
+
+
+  let input_doc_tree ~el ~data i = match input i with
+  | `Dtd d -> d, input_tree ~el ~data i
+  | _ -> invalid_arg err_input_doc_tree
+
+  let pos i = i.line, i.col
+
+  (* Output *)
+
+  type 'a frag = [ `El of tag * 'a list | `Data of string ]
+  type dest = [
+    | `Channel of out_channel | `Buffer of std_buffer | `Fun of (int -> unit) ]
+
+  type output =
+    { decl : bool;          (* True if the XML declaration should be output. *)
+      nl : bool;                  (* True if a newline is output at the end. *)
+      indent : int option;                          (* Optional indentation. *)
+      fun_prefix : string -> string option;              (* Prefix callback. *)
+      prefixes : string Ht.t;                     (* uri -> prefix bindings. *)
+      outs : std_string -> int -> int -> unit;             (* String output. *)
+      outc : char -> unit;                              (* character output. *)
+      mutable last_el_start : bool;     (* True if last signal was `El_start *)
+      mutable scopes : (name * (string list)) list;
+        (* Qualified el. name and bound uris. *)
+      mutable depth : int; }                                 (* Scope depth. *)
+
+  let err_prefix uri = "unbound namespace (" ^ uri ^ ")"
+  let err_dtd = "dtd signal not allowed here"
+  let err_el_start = "start signal not allowed here"
+  let err_el_end = "end signal without matching start signal"
+  let err_data = "data signal not allowed here"
+
+  let make_output ?(decl = true) ?(nl = false) ?(indent = None)
+      ?(ns_prefix = fun _ ->None) d =
+    let outs, outc = match d with
+    | `Channel c -> (output c), (output_char c)
+    | `Buffer b -> (Std_buffer.add_substring b), (Std_buffer.add_char b)
+    | `Fun f ->
+        let os s p l =
+          for i = p to p + l - 1 do f (Char.code (Std_string.get s i)) done
+        in
+        let oc c = f (Char.code c) in
+        os, oc
+    in
+    let prefixes =
+      let h = Ht.create 10 in
+      Ht.add h String.empty String.empty;
+      Ht.add h ns_xml n_xml;
+      Ht.add h ns_xmlns n_xmlns;
+      h
+    in
+    { decl = decl; outs = outs; outc = outc; nl = nl; indent = indent;
+      last_el_start = false; prefixes = prefixes; scopes = []; depth = -1;
+      fun_prefix = ns_prefix; }
+
+  let output_depth o = o.depth
+  let outs o s = o.outs s 0 (Std_string.length s)
+  let str_utf_8 s = String.to_utf_8 (fun _ s -> s) "" s
+  let out_utf_8 o s = ignore (String.to_utf_8 (fun o s -> outs o s; o) o s)
+
+  let prefix_name o (ns, local) =
+    try
+      if str_eq ns ns_xmlns && str_eq local n_xmlns then (String.empty, n_xmlns)
+      else (Ht.find o.prefixes ns, local)
+    with Not_found ->
+      match o.fun_prefix ns with
+      | None -> invalid_arg (err_prefix (str_utf_8 ns))
+      | Some prefix -> prefix, local
+
+  let bind_prefixes o atts =
+    let add acc ((ns, local), uri) =
+      if not (str_eq ns ns_xmlns) then acc else
+      begin
+        let prefix = if str_eq local n_xmlns then String.empty else local in
+        Ht.add o.prefixes uri prefix;
+        uri :: acc
+      end
+    in
+    List.fold_left add [] atts
+
+  let out_data o s =
+    let out () s =
+      let len = Std_string.length s in
+      let start = ref 0 in
+      let last = ref 0 in
+      let escape e =
+        o.outs s !start (!last - !start);
+        outs o e;
+        incr last;
+        start := !last
+      in
+      while (!last < len) do match Std_string.get s !last with
+      | '<' -> escape "&lt;"         (* Escape markup delimiters. *)
+      | '>' -> escape "&gt;"
+      | '&' -> escape "&amp;"
+   (* | '\'' -> escape "&apos;" *) (* Not needed we use \x22 for attributes. *)
+      | '\x22' -> escape "&quot;"
+      | '\n' | '\t' | '\r' -> incr last
+      | c when c < ' ' -> escape "\xEF\xBF\xBD" (* illegal, subst. by U+FFFD *)
+      | _ -> incr last
+      done;
+      o.outs s !start (!last - !start)
+    in
+    String.to_utf_8 out () s
+
+  let out_qname o (p, l) =
+    if not (str_empty p) then (out_utf_8 o p; o.outc ':');
+    out_utf_8 o l
+
+  let out_attribute o (n, v) =
+    o.outc ' '; out_qname o (prefix_name o n); outs o "=\x22";
+    out_data o v;
+    o.outc '\x22'
+
+  let output o s =
+    let indent o = match o.indent with
+    | None -> ()
+    | Some c -> for i = 1 to (o.depth * c) do o.outc ' ' done
+    in
+    let unindent o = match o.indent with None -> () | Some _ -> o.outc '\n' in
+    if o.depth = -1 then
+      begin match s with
+      | `Dtd d ->
+          if o.decl then outs o "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+          begin match d with
+          | Some dtd -> out_utf_8 o dtd; o.outc '\n'
+          | None -> ()
+          end;
+          o.depth <- 0
+      | `Data _ -> invalid_arg err_data
+      | `El_start _ -> invalid_arg err_el_start
+      | `El_end -> invalid_arg err_el_end
+      end
+    else
+    begin match s with
+    | `El_start (n, atts) ->
+        if o.last_el_start then (outs o ">"; unindent o);
+        indent o;
+        let uris = bind_prefixes o atts in
+        let qn = prefix_name o n in
+        o.outc '<'; out_qname o qn; List.iter (out_attribute o) atts;
+        o.scopes <- (qn, uris) :: o.scopes;
+        o.depth <- o.depth + 1;
+        o.last_el_start <- true
+    | `El_end ->
+        begin match o.scopes with
+        | (n, uris) :: scopes' ->
+            o.depth <- o.depth - 1;
+            if o.last_el_start then outs o "/>" else
+            begin
+              indent o;
+              outs o "</"; out_qname o n; o.outc '>';
+            end;
+            o.scopes <- scopes';
+            List.iter (Ht.remove o.prefixes) uris;
+            o.last_el_start <- false;
+            if o.depth = 0 then (if o.nl then o.outc '\n'; o.depth <- -1;)
+            else unindent o
+        | [] -> invalid_arg err_el_end
+        end
+    | `Data d ->
+        if o.last_el_start then (outs o ">"; unindent o);
+        indent o;
+        out_data o d;
+        unindent o;
+        o.last_el_start <- false
+    | `Dtd _ -> failwith err_dtd
+    end
+
+  let output_tree frag o v =
+    let rec aux o = function
+    | (v :: rest) :: context ->
+        begin match frag v with
+        | `El (tag, childs) ->
+            output o (`El_start tag);
+            aux o (childs :: rest :: context)
+        | (`Data d) as signal ->
+            output o signal;
+            aux o (rest :: context)
+        end
+    | [] :: [] -> ()
+    | [] :: context -> output o `El_end; aux o context
+    | [] -> assert false
+    in
+    aux o ([v] :: [])
+
+  let output_doc_tree frag o (dtd, v) =
+    output o (`Dtd dtd);
+    output_tree frag o v
+
+end
+
+(* Default streaming XML IO *)
+
+module String = struct
+  type t = string
+  let empty = ""
+  let length = String.length
+  let append = ( ^ )
+  let lowercase = String.lowercase
+  let iter f s =
+    let len = Std_string.length s in
+    let pos = ref ~-1 in
+    let i () =
+      incr pos;
+      if !pos = len then raise Exit else
+      Char.code (Std_string.get s !pos)
+    in
+    try while true do f (uchar_utf8 i) done with Exit -> ()
+
+  let of_string s = s
+  let to_utf_8 f v x = f v x
+  let compare = String.compare
+end
+
+module Buffer = struct
+  type string = String.t
+  type t = Buffer.t
+  exception Full
+  let create = Buffer.create
+  let add_uchar b u =
+    try
+      (* UTF-8 encodes an uchar in the buffer, assumes u is valid code point. *)
+      let buf c = Buffer.add_char b (Char.chr c) in
+      if u <= 0x007F then
+        (buf u)
+      else if u <= 0x07FF then
+        (buf (0xC0 lor (u lsr 6));
+         buf (0x80 lor (u land 0x3F)))
+      else if u <= 0xFFFF then
+        (buf (0xE0 lor (u lsr 12));
+         buf (0x80 lor ((u lsr 6) land 0x3F));
+         buf (0x80 lor (u land 0x3F)))
+      else
+      (buf (0xF0 lor (u lsr 18));
+       buf (0x80 lor ((u lsr 12) land 0x3F));
+       buf (0x80 lor ((u lsr 6) land 0x3F));
+       buf (0x80 lor (u land 0x3F)))
+    with Failure _ -> raise Full
+
+  let clear b = Buffer.clear b
+  let contents = Buffer.contents
+  let length = Buffer.length
+end
+
+include Make(String) (Buffer)
+
+(* Pretty printers *)
+
+let pp = Format.fprintf
+let rec pp_list ?(pp_sep = Format.pp_print_cut) pp_v ppf = function
+| [] -> ()
+| v :: vs ->
+    pp_v ppf v; if vs <> [] then (pp_sep ppf (); pp_list ~pp_sep pp_v ppf vs)
+
+let pp_name ppf (p, l) = if p <> "" then pp ppf "%s:%s" p l else pp ppf "%s" l
+let pp_attribute ppf (n, v) = pp ppf "@[<1>(%a,@,%S)@]" pp_name n v
+let pp_tag ppf (name, atts) =
+  let pp_sep ppf () = pp ppf ";@ " in
+  pp ppf "@[<1>(%a,@,@[<1>[%a]@])@]"
+    pp_name name (pp_list ~pp_sep pp_attribute) atts
+
+let pp_dtd ppf = function
+| None -> pp ppf "None"
+| Some dtd -> pp ppf "@[<1>(Some@ %S)@]" dtd
+
+let pp_signal ppf = function
+| `Data s -> pp ppf "@[`Data %S@]" s
+| `El_end -> pp ppf "`El_end"
+| `El_start tag -> pp ppf "@[`El_start %a@]" pp_tag tag
+| `Dtd dtd -> pp ppf "@[`Dtd %a@]" pp_dtd dtd
+
+(*----------------------------------------------------------------------------
+  Copyright 2007 Daniel C. Bünzli
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are
+  met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the
+     distribution.
+
+  3. Neither the name of Daniel C. Bünzli nor the names of
+     contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  ---------------------------------------------------------------------------*)
diff --git a/xmlm.mli b/xmlm.mli
new file mode 100755
index 0000000..c38ee8d
--- /dev/null
+++ b/xmlm.mli
@@ -0,0 +1,833 @@
+(*---------------------------------------------------------------------------
+   Copyright 2007 Daniel C. Bünzli. All rights reserved.
+   Distributed under a BSD3 license, see license at the end of the file.
+   %%NAME%% release %%VERSION%%
+  ---------------------------------------------------------------------------*)
+
+(** Streaming XML codec.
+
+    A well-formed sequence of {{!signal}signals} represents an
+    {{:http://www.w3.org/TR/REC-xml}XML} document tree traversal in
+    depth first order (this has nothing to do with XML
+    well-formedness). Input pulls a well-formed sequence of signals
+    from a data source and output pushes a well-formed sequence of
+    signals to a data destination. Functions are provided to easily
+    transform sequences of signals to/from arborescent data structures.
+
+    Consult the {{!io}features and limitations} and {{!ex}examples}
+    of use.
+
+    {e Release %%VERSION%% — %%MAINTAINER%% }
+
+    {3 References}
+    {ul
+    {- Tim Bray.
+    {e {{:http://www.xml.com/axml/axml.html}The annotated XML Specification}},
+    1998.}
+    {- Tim Bray et al.
+    {e {{:http://www.w3.org/TR/xml-names11}Namespaces in XML 1.1 (2nd ed.)}},
+    2006.}} *)
+
+(**  {1 Basic types and values} *)
+
+(** The type for character encodings. For [`UTF_16], endianness is
+    determined from the
+    {{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM}. *)
+type encoding = [
+  | `UTF_8
+  | `UTF_16
+  (** Endianness determined from the
+      {{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM}. *)
+  | `UTF_16BE
+  | `UTF_16LE
+  | `ISO_8859_1
+  | `US_ASCII ]
+
+type dtd = string option
+(** The type for the optional
+    {{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD}. *)
+
+type name = string * string
+(** The type for attribute and element's
+    {{:http://www.w3.org/TR/xml-names11/#dt-expname}expanded names}
+    [(uri,local)]. An empty [uri] represents a name without a
+    namespace name, i.e. an unprefixed name
+    that is not under the scope of a default namespace. *)
+
+type attribute = name * string
+(** The type for attributes. Name and attribute data. *)
+
+type tag = name * attribute list
+(** The type for an element tag. Tag name and attribute list. *)
+
+type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ]
+(** The type for signals. A {e well-formed} sequence of signals belongs
+    to the language of the [doc] grammar :
+    {[doc ::= `Dtd tree
+tree ::= `El_start child `El_end
+child ::= `Data | tree | epsilon ]}
+    Input and output deal only with well-formed sequences or
+    exceptions are raised.
+*)
+
+val ns_xml : string
+(** Namespace name {{:http://www.w3.org/XML/1998/namespace}value} bound to the
+    reserved ["xml"] prefix. *)
+
+val ns_xmlns : string
+(** Namespace name {{:http://www.w3.org/2000/xmlns/}value} bound to the
+    reserved ["xmlns"] prefix. *)
+
+val pp_dtd : Format.formatter -> dtd -> unit
+(** [pp_dtd ppf dtd] prints an unspecified representation of [dtd] on [ppf]. *)
+
+val pp_name : Format.formatter -> name -> unit
+(** [pp_name ppf name] prints an unspecified representation of [name] on
+    [ppf]. *)
+
+val pp_attribute : Format.formatter -> attribute -> unit
+(** [pp_attribute ppf att] prints an unspecified representation of [att] on
+    [ppf]. *)
+
+val pp_tag : Format.formatter -> tag -> unit
+(** [pp_tag ppf tag] prints an unspecified representation of [tag] on
+    [ppf]. *)
+
+val pp_signal : Format.formatter -> signal -> unit
+(** [pp_signal ppf s] prints an unspecified representation of [s] on
+    [ppf]. *)
+
+(** {1 Input} *)
+
+type pos = int * int
+(** The type for input positions. Line and column number, both start
+    with 1. *)
+
+(** The type for input errors. *)
+type error = [
+  | `Max_buffer_size
+  (** Maximal buffer size exceeded ([Sys.max_string_length]). *)
+  | `Unexpected_eoi
+  (** Unexpected end of input. *)
+  | `Malformed_char_stream
+  (** Malformed underlying character stream. *)
+  | `Unknown_encoding of string
+  (** Unknown encoding. *)
+  | `Unknown_entity_ref of string
+  (** Unknown entity reference, {{!inentity} details}. *)
+  | `Unknown_ns_prefix of string
+  (** Unknown namespace prefix {{!inns} details} *)
+  | `Illegal_char_ref of string
+  (** Illegal character reference. *)
+  | `Illegal_char_seq of string
+  (** Illegal character sequence. *)
+  | `Expected_char_seqs of string list * string
+  (** Expected one of the character sequences in the list but found another. *)
+  | `Expected_root_element
+  (** Expected the document's root element. *) ]
+
+val error_message : error -> string
+(** Converts the error to an english error message. *)
+
+exception Error of pos * error
+(** Raised on input errors. *)
+
+type source = [
+  | `Channel of in_channel | `String of int * string | `Fun of (unit -> int) ]
+(** The type for input sources. For [`String] starts reading at the
+    given integer position. For [`Fun] the function must return the
+    next {e byte} as an [int] and raise [End_of_file] if there is no
+    such byte. *)
+
+type input
+(** The type for input abstractions. *)
+
+val make_input : ?enc:encoding option -> ?strip:bool ->
+  ?ns:(string -> string option) ->
+  ?entity: (string -> string option) -> source -> input
+(** Returns a new input abstraction reading from the given source.
+    {ul
+    {- [enc], character encoding of the document, {{!inenc} details}.
+       Defaults to [None].}
+    {- [strip], strips whitespace in character data, {{!inwspace} details}.
+       Defaults to [false].}
+    {- [ns] is called to bind undeclared namespace prefixes,
+       {{!inns} details}. Default returns always [None].}
+    {- [entity] is called to resolve non predefined entity references,
+       {{!inentity} details}. Default returns always [None].}} *)
+
+val input : input -> signal
+(** Inputs a signal. Repeated invocation of the function with the same
+    input abstraction will generate a {{!signal}well-formed} sequence
+    of signals or an {!Error} is raised. Furthermore there will be no
+    two consecutive [`Data] signals in the sequence and their string
+    is always non empty.
+
+    {b Deprecated} After a well-formed sequence was input another may
+    be input, see {!eoi} and {{!iseq}details}.
+
+    {b Raises} {!Error} on input errors. *)
+
+val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a)  ->
+  input -> 'a
+(** If the next signal is a :
+    {ul
+    {- [`Data] signal, inputs it and invokes [data] with the character data.}
+    {- [`El_start] signal, inputs the sequence of signals until its
+       matching [`El_end] and invokes [el] and [data] as follows
+    {ul
+    {- [el], is called on each [`El_end] signals with the corresponding
+      [`El_start] tag and the result of the callback invocation for the
+      element's children.}
+    {- [data], is called on each [`Data] signals with the character data.
+      This function won't be called twice consecutively or with the empty
+      string.}}}
+    {- Other signals, raises [Invalid_argument].}}
+
+    {b Raises} {!Error} on input errors and [Invalid_argument]
+      if the next signal is not [`El_start] or [`Data]. *)
+
+val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
+  input -> (dtd * 'a)
+(** Same as {!input_tree} but reads a complete {{!signal}well-formed}
+    sequence of signals.
+
+    {b Raises} {!Error} on input errors and [Invalid_argument]
+     if the next signal is not [`Dtd]. *)
+
+val peek : input -> signal
+(** Same as {!input} but doesn't remove the signal from the sequence.
+
+    {b Raises} {!Error} on input errors. *)
+
+val eoi : input -> bool
+(** Returns [true] if the end of input is reached. See {{!iseq}details}.
+
+    {b Raises} {!Error} on input errors. *)
+
+val pos : input -> pos
+(** Current position in the input abstraction. *)
+
+(** {1 Output} *)
+
+type 'a frag = [ `El of tag * 'a list | `Data of string ]
+(** The type for deconstructing data structures of type ['a]. *)
+
+type dest = [ `Channel of out_channel | `Buffer of Buffer.t
+            | `Fun of (int -> unit) ]
+(** The type for output destinations. For [`Buffer], the buffer won't
+    be cleared. For [`Fun] the function is called with the output {e
+    bytes} as [int]s. *)
+
+type output
+(** The type for output abstractions. *)
+
+val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option ->
+  ?ns_prefix:(string -> string option) -> dest -> output
+(** Returns a new output abstraction writing to the given destination.
+    {ul
+    {- [decl], if [true] the {{:http://www.w3.org/TR/REC-xml/#NT-XMLDecl} XML
+     declaration} is output (defaults to [true]).}
+    {- [nl], if [true] a newline is output when the root's element [`El_end]
+     signal is output.
+    Defaults to [false].}
+    {- [indent], identation behaviour, see {{!outindent} details}. Defaults to
+      [None].}
+    {- [ns_prefix], undeclared namespace prefix bindings,
+       see {{!outns}details}. Default returns always [None].}} *)
+
+val output : output -> signal -> unit
+(** Outputs a signal.
+
+    {b Deprecated.} After a well-formed sequence of signals was output
+    a new well-formed sequence can be output.
+
+    {b Raises} [Invalid_argument] if the resulting signal sequence on
+    the output abstraction is not {{!signal}well-formed} or if a
+    namespace name could not be bound to a prefix. *)
+
+val output_depth : output -> int
+(** [output_depth o] is [o]'s current element nesting level (undefined
+    before the first [`El_start] and after the last [`El_end]). *)
+
+val output_tree : ('a -> 'a frag) -> output -> 'a -> unit
+(** Outputs signals corresponding to a value by recursively
+    applying the given value deconstructor.
+
+    {b Raises} see {!output}. *)
+
+val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit
+(** Same as {!output_tree} but outputs a complete {{!signal}well-formed}
+    sequence of signals.
+
+    {b Raises} see {!output}. *)
+
+(** {1:sto Functorial interface (deprecated)}
+
+    {b WARNING.} The functioral interface is deprecated and will be
+    removed.
+
+    {!Make} allows client to specify types for strings and internal
+    buffers. Among other things this can be used to perform
+    hash-consing or to process the character stream, e.g. to normalize
+    unicode characters or to convert to a custom encoding. *)
+
+type std_string = string
+type std_buffer = Buffer.t
+
+(** Input signature for strings. *)
+module type String = sig
+
+  type t
+  (** The type for strings. *)
+
+  val empty : t
+  (** The empty string. *)
+
+  val length : t -> int
+  (** Returns the length of the string. *)
+
+  val append : t -> t -> t
+  (** Concatenates two strings. *)
+
+  val lowercase : t -> t
+  (** New string with uppercase letter translated
+      to lowercase (correctness is only needed for ASCII
+      {{:http://www.unicode.org/glossary/#code_point}code point}). *)
+
+  val iter : (int -> unit) -> t -> unit
+  (** Iterates over the unicode
+      {{:http://www.unicode.org/glossary/#code_point}code point}
+      of the given string. *)
+
+  val of_string : std_string -> t
+  (** String from an OCaml string. *)
+
+  val to_utf_8 : ('a -> std_string -> 'a) -> 'a -> t -> 'a
+  (** [to_utf_8 f v s], is [f (... (f (f v s1) s2) ...) sn]. Where the
+      concatenation of [s1], [s2], ... [sn] is [s] as an UTF-8 stream. *)
+
+  val compare : t -> t -> int
+  (** String comparison. Binary comparison is sufficent. *)
+end
+
+(** Input signature for internal buffers. *)
+module type Buffer = sig
+
+  type string
+  (** The type for strings. *)
+
+  type t
+  (** The type for buffers. *)
+
+  exception Full
+  (** Raised if the buffer cannot be grown. *)
+
+  val create : int -> t
+  (** Creates a buffer of the given size. *)
+
+  val add_uchar : t -> int -> unit
+  (** Adds the given (guaranteed valid) unicode
+      {{:http://www.unicode.org/glossary/#code_point}code point} to a
+      buffer.
+
+      {b Raises} {!Full} if the buffer cannot be grown. *)
+
+  val clear : t -> unit
+  (** Clears the buffer. *)
+
+  val contents : t -> string
+  (** Returns the buffer contents. *)
+
+  val length : t -> int
+  (** Returns the number of characters contained in the buffer. *)
+end
+
+(** Output signature of {!Make}. *)
+module type S = sig
+
+  (** {1 Basic types and values} *)
+
+  type string
+
+  type encoding = [
+    | `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE | `ISO_8859_1| `US_ASCII ]
+  type dtd = string option
+  type name = string * string
+  type attribute = name * string
+  type tag = name * attribute list
+  type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ]
+
+  val ns_xml : string
+  val ns_xmlns : string
+
+  (** {1 Input} *)
+
+  type pos = int * int
+  type error = [
+    | `Max_buffer_size
+    | `Unexpected_eoi
+    | `Malformed_char_stream
+    | `Unknown_encoding of string
+    | `Unknown_entity_ref of string
+    | `Unknown_ns_prefix of string
+    | `Illegal_char_ref of string
+    | `Illegal_char_seq of string
+    | `Expected_char_seqs of string list * string
+    | `Expected_root_element ]
+
+  exception Error of pos * error
+  val error_message : error -> string
+
+  type source = [
+  | `Channel of in_channel
+  | `String of int * std_string
+  | `Fun of (unit -> int) ]
+
+  type input
+
+  val make_input : ?enc:encoding option -> ?strip:bool ->
+    ?ns:(string -> string option) ->
+    ?entity: (string -> string option) -> source -> input
+
+  val input : input -> signal
+
+  val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a)  ->
+    input -> 'a
+
+  val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
+    input -> (dtd * 'a)
+
+  val peek : input -> signal
+  val eoi : input -> bool
+  val pos : input -> pos
+
+  (** {1 Output} *)
+
+  type 'a frag = [ `El of tag * 'a list | `Data of string ]
+  type dest = [
+    | `Channel of out_channel | `Buffer of std_buffer | `Fun of (int -> unit) ]
+
+  type output
+  val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option ->
+    ?ns_prefix:(string -> string option) -> dest -> output
+
+  val output_depth : output -> int
+  val output : output -> signal -> unit
+  val output_tree : ('a -> 'a frag) -> output -> 'a -> unit
+  val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit
+end
+
+(** Functor building streaming XML IO with the given strings and buffers. *)
+module Make (String : String) (Buffer : Buffer with type string = String.t) : S
+  with type string = String.t
+
+(** {1:io Features and limitations}
+
+    The module assumes strings are immutable, thus strings
+    the client gives or receives {e during} the input and output process
+    must not be modified.
+    {2:input Input}
+    {3:inenc Encoding}
+
+    The parser supports ASCII, US-ASCII,
+    {{:http://www.faqs.org/rfcs/rfc3629.html} UTF-8},
+    {{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16},
+    {{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16LE},
+    {{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16BE} and
+    {{:http://anubis.dkuug.dk/JTC1/SC2/WG3/docs/n411.pdf}ISO-8559-1}
+    (Latin-1) encoded documents. But strings returned by
+    the library are {b always} UTF-8 encoded.
+
+    The encoding can be specified explicitly using the optional
+    argument [enc]. Otherwise the parser uses UTF-16 or UTF-8 if there is a
+    {{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM} at the
+    beginning of the document. If there is no BOM it uses the encoding
+    specified in the {{:http://www.w3.org/TR/REC-xml/#NT-XMLDecl} XML
+    declaration}. Finally, if there is no XML declaration UTF-8 is assumed.
+    {3:inwspace White space handling}
+
+    The parser performs
+    {{:http://www.w3.org/TR/REC-xml/#AVNormalize}attribute data
+    normalization} on {e every} attribute data.  This means that
+    attribute data does not have leading and trailling white space and that
+    any white space is collapsed and transformed to a single space
+    character ([U+0020]).
+
+    White space handling of character data depends on the [strip]
+    argument. If [strip] is [true], character data is treated like
+    attribute data, white space before and after elements is removed
+    and any white space is collapsed and transformed to a single
+    space character ([U+0020]), except if the data is under the scope of a {e
+    xml:space} attribute whose value is {e preserve}.  If [strip] is
+    [false] all white space data is preserved as present in the
+    document (however all kinds of
+    {{:http://www.w3.org/TR/REC-xml/#sec-line-ends}line ends} are
+    translated to the newline character ([U+000A]).  {3:inns Namespaces}
+
+    Xmlm's {{!name}names} are
+    {{:http://www.w3.org/TR/xml-names11/#dt-expname}expanded names}.
+    The parser automatically handles the document's namespace
+    declarations.  Undeclared namespace prefixes can be bound via the
+    callback [ns], which must return a namespace name. If [ns] returns
+    [None] an [`Unknown_ns_prefix] error is raised.
+
+    Attributes used for namespace declarations are preserved by the
+    parser. They are in the {!ns_xmlns} namespace. Default namespace
+    declarations made with {i xmlns} have the attribute name
+    [(Xmlm.ns_xmlns, "xmlns")]. Prefix declarations have the prefix as
+    the local name, for example {i xmlns:ex} results in the attribute name
+    [(Xmlm.ns_xmlns, "ex")].
+
+    Regarding constraints on the usage of the {i xml} and {i xmlns}
+    prefixes by documents, the parser does not report errors on violations
+    of the {i must} constraints listed in
+    {{:http://www.w3.org/TR/xml-names11/#xmlReserved}this paragraph}.
+
+    {3:inentity Character and entity references}
+
+    {{:http://www.w3.org/TR/REC-xml/#dt-charref}Character references}
+    and {{:http://www.w3.org/TR/REC-xml/#sec-predefined-ent}predefined
+    entities} are automatically resolved. Other entity references can
+    be resolved by the callback [entity], which must return an UTF-8
+    string corresponding to the
+    replacement character data.  The replacement data is {e not}
+    analysed for further references, it is added to the data as such
+    modulo white space stripping. If [entity] returns [None] the error
+    [`Unknown_entity_ref] is returned.
+
+    {3:iseq Sequences of documents (deprecated)}
+
+    {b WARNING.} This feature is deprecated and will be removed.
+
+    When a well-formed sequence of signals is input, no data is consumed beyond
+    the closing ['>'] of the document's root element.
+
+    If you want to parse a document as
+    {{:http://www.w3.org/TR/REC-xml/#NT-document}defined} in the XML
+    specification, call {!eoi} after a well-formed sequence of
+    signals, it must return [true]. If you expect another document on
+    the same input abstraction a new well-formed sequence of signals
+    can be {!input}. Use {!eoi} to check if a document follows (this
+    may consume data).
+
+    Invoking {!eoi} after a well-formed sequence of signals skips
+    whitespaces, comments and processing instructions until it gets to
+    either an {{:http://www.w3.org/TR/REC-xml/#NT-XMLDecl} XML
+    declaration} or a {{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD}
+    or the start of a new element or the end of input (in which case
+    {!eoi} returns [true]).  If there is a new document but there is no
+    XML declaration or the declaration specifies UTF-16, the same
+    encoding as for the previous document is used.
+
+    {3:inmisc Miscellaneous}
+    {ul
+    {- Parses the more liberal and simpler XML 1.1
+    {{:http://www.w3.org/TR/xml11/#NT-Name}Name} definition (minus [':'] because
+    of namespaces).}
+    {- The {{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD} is parsed
+      roughly (no guarantee it is well formed) and its information is ignored.}
+    {- The parser drops
+    {{:http://www.w3.org/TR/REC-xml/#dt-comment}comments},
+    {{:http://www.w3.org/TR/REC-xml/#dt-pi}processing instructions}, and
+    {{:http://www.w3.org/TR/REC-xml/#sec-rmd}standalone declaration}.}
+    {- Element attributes are not checked for uniqueness.}
+    {- Attribute and character data chunks are limited by
+       [Sys.max_string_length].
+       The error [`Max_buffer_size] is raised if the limit is hit.}
+    {- Tail recursive.}
+    {- Non validating.}
+    }
+
+    {2:output Output}
+    {3:outenc Encoding}
+
+    Outputs only {{:http://www.faqs.org/rfcs/rfc3629.html} UTF-8}
+    encoded documents.  Strings given to
+    output functions {b must be} UTF-8 encoded, no checks are
+    performed. Unicode characters that are not legal XML
+    {{:http://www.w3.org/TR/REC-xml/#NT-Char}characters} are replaced
+    by the {{:http://unicode.org/glossary/#replacement_character}Unicode
+    replacement character}.
+
+    {3:outns Namespaces}
+
+    Xmlm's {{:#TYPEname}names} are
+    {{:http://www.w3.org/TR/xml-names11/#dt-expname}expanded names}.
+    Expanded names are automatically converted to
+    {{:http://www.w3.org/TR/xml-names11/#dt-qualname}qualified
+    names} by the output abstraction. There is no particular api to specify
+    prefixes and default namespaces,
+    the actual result depends solely on the output
+    of attributes belonging to the {!ns_xmlns} namespace. For example to set
+    the default namespace of an element to {i http://example.org/myns},
+    use the following attribute :
+    {[(* xmlns='http://example.org/myns' *)
+let default_ns = (Xmlm.ns_xmlns, "xmlns"), "http://example.org/myns"]}
+    To bind the prefix ["ex"] to {i http://example.org/ex}, use the
+    following attribute :
+    {[(* xmlns:ex='http://example.org/ex' *)
+let ex_ns = (Xmlm.ns_xmlns, "ex"), "http://example.org/ex"]}
+    Note that outputing input signals without
+    touching namespace declaration attributes will preserve existing
+    prefixes and bindings provided the same namespace name is not
+    bound to different prefixes in a given context.
+
+    The callback [ns_prefix] of an output abstraction can be used to
+    give a prefix to a namespace name lacking a prefix binding in the
+    current output scope. Given a namespace name the function must return
+    the prefix to use. Note that this
+    will {b not} add any namespace declaration attribute to the
+    output.  If the function returns [None], {!output} will raise
+    [Invalid_argument].  The default function returns always [None].
+    {3:outindent Indentation}
+
+    Output can be indented by specifying the [indent] argument when an
+       output abstraction is created. If [indent] is [None] (default)
+       signal output does not introduce any extra white space.  If
+       [ident] is [Some c], each {!signal} is output on its own line
+       (for empty elements [`El_start] and [`El_end] are collapsed on a single
+       line) and nested elements are indented with [c] space
+       characters.
+
+    {3:oseq Sequences of documents (deprecated)}
+
+    {b WARNING.} This feature is deprecated and will be removed.
+
+    After a well-formed sequence of signals was output, the output
+    abstraction can be reused to output a new well-formed sequence of
+    signals.
+
+    {3:outmisc Miscellaneous}
+    {ul
+    {- Output on a channel does not flush it.}
+    {- In attribute and character data you provide, markup
+       delimiters ['<'],['>'],['&'], and ['\"'] are
+        automatically escaped to
+        {{:http://www.w3.org/TR/REC-xml/#sec-predefined-ent}predefined
+        entities}.}
+    {- No checks are peformed on the prefix and local part of output
+      names to verify they are
+      {{:http://www.w3.org/TR/xml-names11/#NT-NCName}NCName}s.
+      For example using the tag name [("","dip d")] will produce
+      a non well-formed document because of the space character.}
+    {- Tail recursive.}}
+
+    {2 Tips}
+    {ul
+    {- The best options to do an input/output round trip
+       and preserve as much information as possible is to
+       input with [strip = false] and output with [indent = None].}
+    {- Complete whitespace control on output is achieved
+       with [indent = None] and suitable [`Data] signals}}
+
+    {1:ex Examples}
+
+    {2:exseq Sequential processing}
+
+    Sequential processing has the advantage that you don't need to get
+    the whole document tree in memory to process it.
+
+    The following function reads a {e single} document on an
+    input channel and outputs it.
+{[let id ic oc =
+  let i = Xmlm.make_input (`Channel ic) in
+  let o = Xmlm.make_output (`Channel oc) in
+  let rec pull i o depth =
+    Xmlm.output o (Xmlm.peek i);
+    match Xmlm.input i with
+    | `El_start _ -> pull i o (depth + 1)
+    | `El_end -> if depth = 1 then () else pull i o (depth - 1)
+    | `Data _ -> pull i o depth
+    | `Dtd _ -> assert false
+  in
+  Xmlm.output o (Xmlm.input i); (* `Dtd *)
+  pull i o 0;
+  if not (Xmlm.eoi i) then invalid_arg "document not well-formed"]}
+
+    The following function reads a {e sequence} of documents on an
+    input channel and outputs it.
+{[let id_seq ic oc =
+  let i = Xmlm.make_input (`Channel ic) in
+  let o = Xmlm.make_output ~nl:true (`Channel oc) in
+  while not (Xmlm.eoi i) do Xmlm.output o (Xmlm.input i) done]}
+    The following function reads a {e sequence} of documents on the
+    input channel. In each document's tree it prunes non root elements
+    whose name belongs to [prune_list].
+{[let prune_docs prune_list ic oc =
+  let i = Xmlm.make_input (`Channel ic) in
+  let o = Xmlm.make_output ~nl:true (`Channel oc) in
+  let copy i o = Xmlm.output o (Xmlm.input i) in
+  let prune (name, _) = List.mem name prune_list in
+  let rec process i o d =
+    let rec skip i d = match Xmlm.input i with
+    | `El_start _ -> skip i (d + 1)
+    | `El_end -> if d = 1 then () else skip i (d - 1)
+    | s -> skip i d
+    in
+    match Xmlm.peek i with
+    | `El_start tag when prune tag -> skip i 0; process i o d
+    | `El_start _ -> copy i o; process i o (d + 1)
+    | `El_end -> copy i o; if d = 0 then () else process i o (d - 1)
+    | `Data _ -> copy i o; process i o d
+    | `Dtd _ -> assert false
+  in
+  let rec docs i o =
+    copy i o; (* `Dtd *)
+    copy i o; (* root start *)
+    process i o 0;
+    if Xmlm.eoi i then () else docs i o
+  in
+  docs i o]}
+
+    {2:extree Tree processing}
+
+    A document's sequence of signals can be easily converted
+    to an arborescent data structure. Assume your trees are defined by :
+    {[type tree = E of Xmlm.tag * tree list | D of string]}
+    The following functions input/output xml documents from/to abstractions
+    as value of type [tree].
+{[let in_tree i =
+  let el tag childs = E (tag, childs)  in
+  let data d = D d in
+  Xmlm.input_doc_tree ~el ~data i
+
+let out_tree o t =
+  let frag = function
+  | E (tag, childs) -> `El (tag, childs)
+  | D d -> `Data d
+  in
+  Xmlm.output_doc_tree frag o t]}
+
+    {2:exrow Tabular data processing}
+
+    We show how to process XML data that represents tabular data (some
+    people like do that).
+
+    The file we need to deal with represents nominal data about
+    {{:http://www.w3.org/}W3C bureaucrats}. There are no namespaces
+    and attributes are ignored. The element structure of the document
+    is :
+    {ul {- <list>
+     {ul {- <bureaucrat> represents a W3C bureaucrat
+           (zero or more).
+
+        A bureaucrat contains the following elements, in order.
+        {ul {- <name> its name (mandatory, string).}
+            {- <surname> its surname (mandatory, string).}
+            {- <honest> present iff he implemented one of its spec
+               (optional, empty).}
+            {- <obfuscation_level> its grade on the
+               open scale of obfuscation (mandatory, float).}
+            {- <tr> (zero or more, string), technical reports he
+               worked on.}}}}}}
+
+    In OCaml we represent a W3C bureaucrat by this type :
+{[type w3c_bureaucrat = {
+    name : string;
+    surname : string;
+    honest : bool;
+    obfuscation_level : float;
+    trs : string list; }]}
+    The following functions input and output W3C bureaucrats as lists
+    of values of type [w3c_bureaucrat].
+{[let in_w3c_bureaucrats src =
+  let i = Xmlm.make_input ~strip:true src in
+  let tag n = ("", n), [] in
+  let error () = invalid_arg "parse error" in
+  let accept s i = if Xmlm.input i = s then () else error () in
+  let rec i_seq el acc i = match Xmlm.peek i with
+  | `El_start _ -> i_seq el ((el i) :: acc) i
+  | `El_end -> List.rev acc
+  | _ -> error ()
+  in
+  let i_el n i =
+    accept (`El_start (tag n)) i;
+    let d = match Xmlm.peek i with
+    | `Data d -> ignore (Xmlm.input i); d
+    | `El_end -> ""
+    | _ -> error ()
+    in
+    accept (`El_end) i;
+    d
+  in
+  let i_bureaucrat i =
+    try
+      accept (`El_start (tag "bureaucrat")) i;
+      let name = i_el "name" i in
+      let surname = i_el "surname" i in
+      let honest = match Xmlm.peek i with
+      | `El_start (("", "honest"), []) -> ignore (i_el "honest" i); true
+      | _ -> false
+      in
+      let obf = float_of_string (i_el "obfuscation_level" i) in
+      let trs = i_seq (i_el "tr") [] i in
+      accept (`El_end) i;
+      { name = name; surname = surname; honest = honest;
+        obfuscation_level = obf; trs = trs }
+    with
+    | Failure _ -> error () (* float_of_string *)
+  in
+  accept (`Dtd None) i;
+  accept (`El_start (tag "list")) i;
+  let bl = i_seq i_bureaucrat [] i in
+  accept (`El_end) i;
+  if not (Xmlm.eoi i) then invalid_arg "more than one document";
+  bl
+
+let out_w3c_bureaucrats dst bl =
+  let tag n = ("", n), [] in
+  let o = Xmlm.make_output ~nl:true ~indent:(Some 2) dst in
+  let out = Xmlm.output o in
+  let o_el n d =
+    out (`El_start (tag n));
+    if d <> "" then out (`Data d);
+    out `El_end
+  in
+  let o_bureaucrat b =
+    out (`El_start (tag "bureaucrat"));
+    o_el "name" b.name;
+    o_el "surname" b.surname;
+    if b.honest then o_el "honest" "";
+    o_el "obfuscation_level" (string_of_float b.obfuscation_level);
+    List.iter (o_el "tr") b.trs;
+    out `El_end
+  in
+  out (`Dtd None);
+  out (`El_start (tag "list"));
+  List.iter o_bureaucrat bl;
+  out (`El_end)]}
+*)
+
+(*---------------------------------------------------------------------------
+  Copyright 2007 Daniel C. Bünzli
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are
+  met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the
+     distribution.
+
+  3. Neither the name of Daniel C. Bünzli nor the names of
+     contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  ---------------------------------------------------------------------------*)