diff --git a/cpdf.ml b/cpdf.ml index e37d3e4..0f2abde 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -3227,8 +3227,8 @@ let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" (* For OCaml < 4.00 *) let string_trim s = implode - (dropwhile - Pdf.is_whitespace (rev (dropwhile Pdf.is_whitespace (explode s)))) + (rev (dropwhile + Pdf.is_whitespace (rev (dropwhile Pdf.is_whitespace (explode s))))) let combine_with_spaces strs = string_trim diff --git a/xmlm.ml b/xmlm.ml index 1bb8bbc..3430e91 100755 --- a/xmlm.ml +++ b/xmlm.ml @@ -1,7 +1,7 @@ (*--------------------------------------------------------------------------- - Copyright 2007 Daniel C. Bünzli. All rights reserved. - Distributed under a BSD3 license, see license at the end of the file. - %%NAME%% release %%VERSION%% + Copyright (c) 2007 Daniel C. Bünzli. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + %%NAME%% %%VERSION%% ---------------------------------------------------------------------------*) module Std_string = String @@ -24,7 +24,7 @@ end module type Buffer = sig type string - type t + type t exception Full val create : int -> t val add_uchar : t -> int -> unit @@ -33,88 +33,88 @@ module type Buffer = sig val length : t -> int end -module type S = sig - type string - type encoding = [ +module type S = sig + type string + type encoding = [ | `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE | `ISO_8859_1 | `US_ASCII ] type dtd = string option - type name = string * string + type name = string * string type attribute = name * string type tag = name * attribute list type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ] - - val ns_xml : string + + val ns_xml : string val ns_xmlns : string - - type pos = int * int + + type pos = int * int type error = [ - | `Max_buffer_size + | `Max_buffer_size | `Unexpected_eoi | `Malformed_char_stream | `Unknown_encoding of string - | `Unknown_entity_ref of string - | `Unknown_ns_prefix of string - | `Illegal_char_ref of string - | `Illegal_char_seq of string + | `Unknown_entity_ref of string + | `Unknown_ns_prefix of string + | `Illegal_char_ref of string + | `Illegal_char_seq of string | `Expected_char_seqs of string list * string - | `Expected_root_element ] - + | `Expected_root_element ] + exception Error of pos * error - val error_message : error -> string - - type source = [ - | `Channel of in_channel - | `String of int * std_string + val error_message : error -> string + + type source = [ + | `Channel of in_channel + | `String of int * std_string | `Fun of (unit -> int) ] - - type input - - val make_input : ?enc:encoding option -> ?strip:bool -> - ?ns:(string -> string option) -> + + type input + + val make_input : ?enc:encoding option -> ?strip:bool -> + ?ns:(string -> string option) -> ?entity: (string -> string option) -> source -> input - + val input : input -> signal - - val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) -> + + val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) -> input -> 'a - - val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) -> + + val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) -> input -> (dtd * 'a) - + val peek : input -> signal val eoi : input -> bool - val pos : input -> pos - + val pos : input -> pos + type 'a frag = [ `El of tag * 'a list | `Data of string ] - type dest = [ + type dest = [ | `Channel of out_channel | `Buffer of std_buffer | `Fun of (int -> unit) ] - + type output - val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option -> + val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option -> ?ns_prefix:(string -> string option) -> dest -> output - + val output_depth : output -> int val output : output -> signal -> unit val output_tree : ('a -> 'a frag) -> output -> 'a -> unit - val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit + val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit end (* Unicode character lexers *) exception Malformed (* for character stream, internal only. *) - + let utf8_len = [| (* Char byte length according to first UTF-8 byte. *) - 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; - 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; - 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; - 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; - 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; - 1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; - 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; - 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; - 0; 0; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; - 2; 2; 2; 2; 2; 2; 2; 2; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; + 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; + 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; + 0; 0; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; + 2; 2; 2; 2; 2; 2; 2; 2; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 4; 4; 4; 4; 4; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0 |] let uchar_utf8 i = @@ -136,7 +136,7 @@ let uchar_utf8 i = | _ -> if b1 lsr 6 != 0b10 then raise Malformed else () end; ((b0 land 0x0F) lsl 12) lor ((b1 land 0x3F) lsl 6) lor (b2 land 0x3F) - | 4 -> + | 4 -> let b1 = i () in let b2 = i () in let b3 = i () in @@ -146,53 +146,53 @@ let uchar_utf8 i = | 0xF4 -> if b1 < 0x80 || 0x8F < b1 then raise Malformed else () | _ -> if b1 lsr 6 != 0b10 then raise Malformed else () end; - ((b0 land 0x07) lsl 18) lor ((b1 land 0x3F) lsl 12) lor + ((b0 land 0x07) lsl 18) lor ((b1 land 0x3F) lsl 12) lor ((b2 land 0x3F) lsl 6) lor (b3 land 0x3F) - | _ -> assert false + | _ -> assert false end - -let int16_be i = + +let int16_be i = let b0 = i () in let b1 = i () in (b0 lsl 8) lor b1 - -let int16_le i = + +let int16_le i = let b0 = i () in let b1 = i () in - (b1 lsl 8) lor b0 - -let uchar_utf16 int16 i = + (b1 lsl 8) lor b0 + +let uchar_utf16 int16 i = let c0 = int16 i in if c0 < 0xD800 || c0 > 0xDFFF then c0 else if c0 > 0xDBFF then raise Malformed else let c1 = int16 i in (((c0 land 0x3FF) lsl 10) lor (c1 land 0x3FF)) + 0x10000 - + let uchar_utf16be = uchar_utf16 int16_be -let uchar_utf16le = uchar_utf16 int16_le +let uchar_utf16le = uchar_utf16 int16_le let uchar_byte i = i () let uchar_iso_8859_1 i = i () let uchar_ascii i = let b = i () in if b > 127 then raise Malformed else b - + (* Functorized streaming XML IO *) -module Make (String : String) (Buffer : Buffer with type string = String.t) = +module Make (String : String) (Buffer : Buffer with type string = String.t) = struct type string = String.t - + let str = String.of_string let str_eq s s' = (compare s s') = 0 let str_empty s = (compare s String.empty) = 0 - let cat = String.append - let str_of_char u = - let b = Buffer.create 4 in + let cat = String.append + let str_of_char u = + let b = Buffer.create 4 in Buffer.add_uchar b u; Buffer.contents b - - module Ht = Hashtbl.Make (struct type t = string + + module Ht = Hashtbl.Make (struct type t = string let equal = str_eq let hash = Hashtbl.hash end) - + let u_nl = 0x000A (* newline *) let u_cr = 0x000D (* carriage return *) let u_space = 0x0020 (* space *) @@ -216,10 +216,10 @@ struct let u_9 = 0x0039 (* 9 *) let u_F = 0x0046 (* F *) let u_D = 0X0044 (* D *) - - let s_cdata = str "CDATA[" + + let s_cdata = str "CDATA[" let ns_xml = str "http://www.w3.org/XML/1998/namespace" - let ns_xmlns = str "http://www.w3.org/2000/xmlns/" + let ns_xmlns = str "http://www.w3.org/2000/xmlns/" let n_xml = str "xml" let n_xmlns = str "xmlns" let n_space = str "space" @@ -237,39 +237,39 @@ struct let v_utf_16be = str "utf-16be" let v_utf_16le = str "utf-16le" let v_iso_8859_1 = str "iso-8859-1" - let v_us_ascii = str "us-ascii" + let v_us_ascii = str "us-ascii" let v_ascii = str "ascii" let name_str (p,l) = if str_empty p then l else cat p (cat (str ":") l) - + (* Basic types and values *) - - type encoding = [ + + type encoding = [ | `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE | `ISO_8859_1 | `US_ASCII ] type dtd = string option - type name = string * string + type name = string * string type attribute = name * string type tag = name * attribute list type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ] - + (* Input *) - - type pos = int * int + + type pos = int * int type error = [ - | `Max_buffer_size + | `Max_buffer_size | `Unexpected_eoi | `Malformed_char_stream | `Unknown_encoding of string - | `Unknown_entity_ref of string - | `Unknown_ns_prefix of string - | `Illegal_char_ref of string - | `Illegal_char_seq of string + | `Unknown_entity_ref of string + | `Unknown_ns_prefix of string + | `Illegal_char_ref of string + | `Illegal_char_seq of string | `Expected_char_seqs of string list * string | `Expected_root_element ] - + exception Error of pos * error - - let error_message e = + + let error_message e = let bracket l v r = cat (str l) (cat v (str r)) in match e with | `Expected_root_element -> str "expected root element" @@ -282,30 +282,30 @@ struct | `Illegal_char_ref s -> bracket "illegal character reference (#" s ")" | `Illegal_char_seq s -> bracket "character sequence illegal here (\"" s "\")" - | `Expected_char_seqs (exps, fnd) -> - let exps = + | `Expected_char_seqs (exps, fnd) -> + let exps = let exp acc v = cat acc (bracket "\"" v "\", ") in List.fold_left exp String.empty exps in - cat (str "expected one of these character sequence: ") + cat (str "expected one of these character sequence: ") (cat exps (bracket "found \"" fnd "\"")) - + type limit = (* XML is odd to parse. *) - | Stag of name (* '<' qname *) - | Etag of name (* ' int) ] - - type input = + + type input = { enc : encoding option; (* Expected encoding. *) strip : bool; (* Whitespace stripping default behaviour. *) fun_ns : string -> string option; (* Namespace callback. *) @@ -325,35 +325,35 @@ struct ns : string Ht.t; (* prefix -> uri bindings. *) ident : Buffer.t; (* Buffer for names and entity refs. *) data : Buffer.t; } (* Buffer for character and attribute data. *) - + let err_input_tree = "input signal not `El_start or `Data" let err_input_doc_tree = "input signal not `Dtd" let err i e = raise (Error ((i.line, i.col), e)) let err_illegal_char i u = err i (`Illegal_char_seq (str_of_char u)) let err_expected_seqs i exps s = err i (`Expected_char_seqs (exps, s)) - let err_expected_chars i exps = + let err_expected_chars i exps = err i (`Expected_char_seqs (List.map str_of_char exps, str_of_char i.c)) - + let u_eoi = max_int let u_start_doc = u_eoi - 1 let u_end_doc = u_start_doc - 1 let signal_start_stream = `Data String.empty - - let make_input ?(enc = None) ?(strip = false) ?(ns = fun _ -> None) - ?(entity = fun _ -> None) src = + + let make_input ?(enc = None) ?(strip = false) ?(ns = fun _ -> None) + ?(entity = fun _ -> None) src = let i = match src with - | `Fun f -> f - | `Channel ic -> fun () -> input_byte ic - | `String (pos, s) -> + | `Fun f -> f + | `Channel ic -> fun () -> input_byte ic + | `String (pos, s) -> let len = Std_string.length s in let pos = ref (pos - 1) in - fun () -> + fun () -> incr pos; - if !pos = len then raise End_of_file else + if !pos = len then raise End_of_file else Char.code (Std_string.get s !pos) in - let bindings = - let h = Ht.create 15 in + let bindings = + let h = Ht.create 15 in Ht.add h String.empty String.empty; Ht.add h n_xml ns_xml; Ht.add h n_xmlns ns_xmlns; @@ -361,38 +361,38 @@ struct in { enc = enc; strip = strip; fun_ns = ns; fun_entity = entity; i = i; uchar = uchar_byte; c = u_start_doc; cr = false; - line = 1; col = 0; limit = Text; peek = signal_start_stream; - stripping = strip; last_white = true; scopes = []; ns = bindings; + line = 1; col = 0; limit = Text; peek = signal_start_stream; + stripping = strip; last_white = true; scopes = []; ns = bindings; ident = Buffer.create 64; data = Buffer.create 1024; } - + (* Bracketed non-terminals in comments refer to XML 1.0 non terminals *) - + let r : int -> int -> int -> bool = fun u a b -> a <= u && u <= b let is_white = function 0x0020 | 0x0009 | 0x000D | 0x000A -> true | _ -> false - + let is_char = function (* {Char} *) | u when r u 0x0020 0xD7FF -> true | 0x0009 | 0x000A | 0x000D -> true | u when r u 0xE000 0xFFFD || r u 0x10000 0x10FFFF -> true | _ -> false - + let is_digit u = r u 0x0030 0x0039 - let is_hex_digit u = + let is_hex_digit u = r u 0x0030 0x0039 || r u 0x0041 0x0046 || r u 0x0061 0x0066 - + let comm_range u = (* common to functions below *) - r u 0x00C0 0x00D6 || r u 0x00D8 0x00F6 || r u 0x00F8 0x02FF || - r u 0x0370 0x037D || r u 0x037F 0x1FFF || r u 0x200C 0x200D || - r u 0x2070 0x218F || r u 0x2C00 0x2FEF || r u 0x3001 0xD7FF || + r u 0x00C0 0x00D6 || r u 0x00D8 0x00F6 || r u 0x00F8 0x02FF || + r u 0x0370 0x037D || r u 0x037F 0x1FFF || r u 0x200C 0x200D || + r u 0x2070 0x218F || r u 0x2C00 0x2FEF || r u 0x3001 0xD7FF || r u 0xF900 0xFDCF || r u 0xFDF0 0xFFFD || r u 0x10000 0xEFFFF - + let is_name_start_char = function (* {NameStartChar} - ':' (XML 1.1) *) | u when r u 0x0061 0x007A || r u 0x0041 0x005A -> true (* [a-z] | [A-Z] *) | u when is_white u -> false | 0x005F -> true (* '_' *) - | u when comm_range u -> true + | u when comm_range u -> true | _ -> false - + let is_name_char = function (* {NameChar} - ':' (XML 1.1) *) | u when r u 0x0061 0x007A || r u 0x0041 0x005A -> true (* [a-z] | [A-Z] *) | u when is_white u -> false @@ -400,70 +400,70 @@ struct | 0x005F | 0x002D | 0x002E | 0x00B7 -> true (* '_' '-' '.' *) | u when comm_range u || r u 0x0300 0x036F || r u 0x203F 0x2040 -> true | _ -> false - - let rec nextc i = + + let rec nextc i = if i.c = u_eoi then err i `Unexpected_eoi; - if i.c = u_nl then (i.line <- i.line + 1; i.col <- 1) + if i.c = u_nl then (i.line <- i.line + 1; i.col <- 1) else i.col <- i.col + 1; i.c <- i.uchar i.i; if not (is_char i.c) then raise Malformed; if i.cr && i.c = u_nl then i.c <- i.uchar i.i; (* cr nl business *) if i.c = u_cr then (i.cr <- true; i.c <- u_nl) else i.cr <- false - + let nextc_eof i = try nextc i with End_of_file -> i.c <- u_eoi - let skip_white i = while (is_white i.c) do nextc i done + let skip_white i = while (is_white i.c) do nextc i done let skip_white_eof i = while (is_white i.c) do nextc_eof i done let accept i c = if i.c = c then nextc i else err_expected_chars i [ c ] - + let clear_ident i = Buffer.clear i.ident let clear_data i = Buffer.clear i.data let addc_ident i c = Buffer.add_uchar i.ident c let addc_data i c = Buffer.add_uchar i.data c - - let addc_data_strip i c = + + let addc_data_strip i c = if is_white c then i.last_white <- true else begin if i.last_white && Buffer.length i.data <> 0 then addc_data i u_space; i.last_white <- false; addc_data i c end - - let expand_name i (prefix, local) = + + let expand_name i (prefix, local) = let external_ prefix = match i.fun_ns prefix with | None -> err i (`Unknown_ns_prefix prefix) | Some uri -> uri in try - let uri = Ht.find i.ns prefix in + let uri = Ht.find i.ns prefix in if not (str_empty uri) then (uri, local) else - if str_empty prefix then String.empty, local else + if str_empty prefix then String.empty, local else (external_ prefix), local (* unbound with xmlns:prefix="" *) with Not_found -> external_ prefix, local - + let find_encoding i = (* Encoding mess. *) - let reset uchar i = i.uchar <- uchar; i.col <- 0; nextc i in + let reset uchar i = i.uchar <- uchar; i.col <- 0; nextc i in match i.enc with | None -> (* User doesn't know encoding. *) - begin match nextc i; i.c with + begin match nextc i; i.c with | 0xFE -> (* UTF-16BE BOM. *) nextc i; if i.c <> 0xFF then err i `Malformed_char_stream; reset uchar_utf16be i; - true + true | 0xFF -> (* UTF-16LE BOM. *) nextc i; if i.c <> 0xFE then err i `Malformed_char_stream; reset uchar_utf16le i; - true + true | 0xEF -> (* UTF-8 BOM. *) nextc i; if i.c <> 0xBB then err i `Malformed_char_stream; nextc i; if i.c <> 0xBF then err i `Malformed_char_stream; reset uchar_utf8 i; true | 0x3C | _ -> (* UTF-8 or other, try declaration. *) - i.uchar <- uchar_utf8; - false + i.uchar <- uchar_utf8; + false end | Some e -> (* User knows encoding. *) - begin match e with + begin match e with | `US_ASCII -> reset uchar_ascii i | `ISO_8859_1 -> reset uchar_iso_8859_1 i | `UTF_8 -> (* Skip BOM if present. *) @@ -471,7 +471,7 @@ struct | `UTF_16 -> (* Which UTF-16 ? look BOM. *) let b0 = nextc i; i.c in let b1 = nextc i; i.c in - begin match b0, b1 with + begin match b0, b1 with | 0xFE, 0xFF -> reset uchar_utf16be i | 0xFF, 0xFE -> reset uchar_utf16le i | _ -> err i `Malformed_char_stream; @@ -482,76 +482,76 @@ struct reset uchar_utf16le i; if i.c = u_bom then (i.col <- 0; nextc i) end; true (* Ignore xml declaration. *) - - - let p_ncname i = (* {NCName} (Namespace 1.1) *) + + + let p_ncname i = (* {NCName} (Namespace 1.1) *) clear_ident i; if not (is_name_start_char i.c) then err_illegal_char i i.c else - begin + begin addc_ident i i.c; nextc i; while is_name_char i.c do addc_ident i i.c; nextc i done; Buffer.contents i.ident end - + let p_qname i = (* {QName} (Namespace 1.1) *) let n = p_ncname i in if i.c <> u_colon then (String.empty, n) else (nextc i; (n, p_ncname i)) - - let p_charref i = (* {CharRef}, '&' was eaten. *) + + let p_charref i = (* {CharRef}, '&' was eaten. *) let c = ref 0 in clear_ident i; nextc i; if i.c = u_scolon then err i (`Illegal_char_ref String.empty) else - begin + begin try - if i.c = u_x then - begin + if i.c = u_x then + begin addc_ident i i.c; nextc i; - while (i.c <> u_scolon) do - addc_ident i i.c; - if not (is_hex_digit i.c) then raise Exit else + while (i.c <> u_scolon) do + addc_ident i i.c; + if not (is_hex_digit i.c) then raise Exit else c := !c * 16 + (if i.c <= u_9 then i.c - 48 else - if i.c <= u_F then i.c - 55 else + if i.c <= u_F then i.c - 55 else i.c - 87); nextc i; done end else - while (i.c <> u_scolon) do + while (i.c <> u_scolon) do addc_ident i i.c; - if not (is_digit i.c) then raise Exit else + if not (is_digit i.c) then raise Exit else c := !c * 10 + (i.c - 48); nextc i done - with Exit -> + with Exit -> c := -1; while i.c <> u_scolon do addc_ident i i.c; nextc i done end; nextc i; if is_char !c then (clear_ident i; addc_ident i !c; Buffer.contents i.ident) else err i (`Illegal_char_ref (Buffer.contents i.ident)) - - let predefined_entities = + + let predefined_entities = let h = Ht.create 5 in let e k v = Ht.add h (str k) (str v) in - e "lt" "<"; e "gt" ">"; e "amp" "&"; e "apos" "'"; e "quot" "\""; + e "lt" "<"; e "gt" ">"; e "amp" "&"; e "apos" "'"; e "quot" "\""; h - + let p_entity_ref i = (* {EntityRef}, '&' was eaten. *) let ent = p_ncname i in accept i u_scolon; - try Ht.find predefined_entities ent with Not_found -> + try Ht.find predefined_entities ent with Not_found -> match i.fun_entity ent with | Some s -> s | None -> err i (`Unknown_entity_ref ent) - + let p_reference i = (* {Reference} *) nextc i; if i.c = u_sharp then p_charref i else p_entity_ref i - + let p_attr_value i = (* {S}? {AttValue} *) skip_white i; - let delim = - if i.c = u_quot || i.c = u_apos then i.c else + let delim = + if i.c = u_quot || i.c = u_apos then i.c else err_expected_chars i [ u_quot; u_apos] in nextc i; @@ -565,24 +565,24 @@ struct done; nextc i; Buffer.contents i.data - - let p_attributes i = (* ({S} {Attribute})* {S}? *) - let rec aux i pre_acc acc = + + let p_attributes i = (* ({S} {Attribute})* {S}? *) + let rec aux i pre_acc acc = if not (is_white i.c) then pre_acc, acc else begin skip_white i; - if i.c = u_slash || i.c = u_gt then pre_acc, acc else - begin + if i.c = u_slash || i.c = u_gt then pre_acc, acc else + begin let (prefix, local) as n = p_qname i in let v = skip_white i; accept i u_eq; p_attr_value i in let att = n, v in if str_empty prefix && str_eq local n_xmlns then - begin (* xmlns *) + begin (* xmlns *) Ht.add i.ns String.empty v; aux i (String.empty :: pre_acc) (att :: acc) end - else if str_eq prefix n_xmlns then - begin (* xmlns:local *) + else if str_eq prefix n_xmlns then + begin (* xmlns:local *) Ht.add i.ns local v; aux i (local :: pre_acc) (att :: acc) end @@ -598,32 +598,32 @@ struct end in aux i [] [] (* Returns a list of bound prefixes and attributes *) - + let p_limit i = (* Parses a markup limit *) i.limit <- if i.c = u_eoi then Eoi else - if i.c <> u_lt then Text else + if i.c <> u_lt then Text else begin nextc i; if i.c = u_qmark then (nextc i; Pi (p_qname i)) else - if i.c = u_slash then - begin - nextc i; - let n = p_qname i in + if i.c = u_slash then + begin + nextc i; + let n = p_qname i in skip_white i; Etag n end - else if i.c = u_emark then - begin + else if i.c = u_emark then + begin nextc i; if i.c = u_minus then (nextc i; accept i u_minus; Comment) else if i.c = u_D then Dtd else - if i.c = u_lbrack then - begin + if i.c = u_lbrack then + begin nextc i; clear_ident i; for k = 1 to 6 do (addc_ident i i.c; nextc i) done; - let cdata = Buffer.contents i.ident in + let cdata = Buffer.contents i.ident in if str_eq cdata s_cdata then Cdata else err_expected_seqs i [ s_cdata ] cdata end @@ -633,39 +633,39 @@ struct else Stag (p_qname i) end - + let rec skip_comment i = (* {Comment}, '