-safe-string
This commit is contained in:
parent
1635580c73
commit
18075b84ce
4
Makefile
4
Makefile
|
@ -7,8 +7,8 @@ RESULT = cpdf
|
||||||
ANNOTATE = true
|
ANNOTATE = true
|
||||||
PACKS = camlpdf
|
PACKS = camlpdf
|
||||||
|
|
||||||
OCAMLNCFLAGS = -g -unsafe-string -w -3 -annot
|
OCAMLNCFLAGS = -g -safe-string -w -3 -annot
|
||||||
OCAMLBCFLAGS = -g -unsafe-string -w -3 -annot
|
OCAMLBCFLAGS = -g -safe-string -w -3 -annot
|
||||||
OCAMLLDFLAGS = -g
|
OCAMLLDFLAGS = -g
|
||||||
|
|
||||||
all : native-code native-code-library byte-code-library top htdoc
|
all : native-code native-code-library byte-code-library top htdoc
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
%FIXME: Finish and document -dump-attachments.
|
||||||
%FIXME: Mention that OpenAction supersedes PageLayout so use -remove-dict-option to get rid of it
|
%FIXME: Mention that OpenAction supersedes PageLayout so use -remove-dict-option to get rid of it
|
||||||
%FIXME: Document new -hard-box option
|
%FIXME: Document new -hard-box option
|
||||||
%FIXME: Document that -upright also shifts the page to 0,0
|
%FIXME: Document that -upright also shifts the page to 0,0
|
||||||
|
|
2
xmlm.ml
2
xmlm.ml
|
@ -961,7 +961,7 @@ struct
|
||||||
let make_output ?(decl = true) ?(nl = false) ?(indent = None)
|
let make_output ?(decl = true) ?(nl = false) ?(indent = None)
|
||||||
?(ns_prefix = fun _ ->None) d =
|
?(ns_prefix = fun _ ->None) d =
|
||||||
let outs, outc = match d with
|
let outs, outc = match d with
|
||||||
| `Channel c -> (output c), (output_char c)
|
| `Channel c -> (output_substring c), (output_char c)
|
||||||
| `Buffer b -> (Std_buffer.add_substring b), (Std_buffer.add_char b)
|
| `Buffer b -> (Std_buffer.add_substring b), (Std_buffer.add_char b)
|
||||||
| `Fun f ->
|
| `Fun f ->
|
||||||
let os s p l =
|
let os s p l =
|
||||||
|
|
488
xmlm.mli
488
xmlm.mli
|
@ -1,57 +1,57 @@
|
||||||
(*---------------------------------------------------------------------------
|
(*---------------------------------------------------------------------------
|
||||||
Copyright 2007 Daniel C. Bünzli. All rights reserved.
|
Copyright (c) 2007 Daniel C. Bünzli. All rights reserved.
|
||||||
Distributed under a BSD3 license, see license at the end of the file.
|
Distributed under the ISC license, see terms at the end of the file.
|
||||||
%%NAME%% release %%VERSION%%
|
%%NAME%% %%VERSION%%
|
||||||
---------------------------------------------------------------------------*)
|
---------------------------------------------------------------------------*)
|
||||||
|
|
||||||
(** Streaming XML codec.
|
(** Streaming XML codec.
|
||||||
|
|
||||||
A well-formed sequence of {{!signal}signals} represents an
|
A well-formed sequence of {{!signal}signals} represents an
|
||||||
{{:http://www.w3.org/TR/REC-xml}XML} document tree traversal in
|
{{:http://www.w3.org/TR/REC-xml}XML} document tree traversal in
|
||||||
depth first order (this has nothing to do with XML
|
depth first order (this has nothing to do with XML
|
||||||
well-formedness). Input pulls a well-formed sequence of signals
|
well-formedness). Input pulls a well-formed sequence of signals
|
||||||
from a data source and output pushes a well-formed sequence of
|
from a data source and output pushes a well-formed sequence of
|
||||||
signals to a data destination. Functions are provided to easily
|
signals to a data destination. Functions are provided to easily
|
||||||
transform sequences of signals to/from arborescent data structures.
|
transform sequences of signals to/from arborescent data structures.
|
||||||
|
|
||||||
Consult the {{!io}features and limitations} and {{!ex}examples}
|
Consult the {{!io}features and limitations} and {{!ex}examples}
|
||||||
of use.
|
of use.
|
||||||
|
|
||||||
{e Release %%VERSION%% — %%MAINTAINER%% }
|
{e %%VERSION%% — {{:%%PKG_HOMEPAGE%% }homepage}}
|
||||||
|
|
||||||
{3 References}
|
{3 References}
|
||||||
{ul
|
{ul
|
||||||
{- Tim Bray.
|
{- Tim Bray.
|
||||||
{e {{:http://www.xml.com/axml/axml.html}The annotated XML Specification}},
|
{e {{:http://www.xml.com/axml/axml.html}The annotated XML Specification}},
|
||||||
1998.}
|
1998.}
|
||||||
{- Tim Bray et al.
|
{- Tim Bray et al.
|
||||||
{e {{:http://www.w3.org/TR/xml-names11}Namespaces in XML 1.1 (2nd ed.)}},
|
{e {{:http://www.w3.org/TR/xml-names11}Namespaces in XML 1.1 (2nd ed.)}},
|
||||||
2006.}} *)
|
2006.}} *)
|
||||||
|
|
||||||
(** {1 Basic types and values} *)
|
(** {1 Basic types and values} *)
|
||||||
|
|
||||||
(** The type for character encodings. For [`UTF_16], endianness is
|
(** The type for character encodings. For [`UTF_16], endianness is
|
||||||
determined from the
|
determined from the
|
||||||
{{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM}. *)
|
{{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM}. *)
|
||||||
type encoding = [
|
type encoding = [
|
||||||
| `UTF_8
|
| `UTF_8
|
||||||
| `UTF_16
|
| `UTF_16
|
||||||
(** Endianness determined from the
|
(** Endianness determined from the
|
||||||
{{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM}. *)
|
{{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM}. *)
|
||||||
| `UTF_16BE
|
| `UTF_16BE
|
||||||
| `UTF_16LE
|
| `UTF_16LE
|
||||||
| `ISO_8859_1
|
| `ISO_8859_1
|
||||||
| `US_ASCII ]
|
| `US_ASCII ]
|
||||||
|
|
||||||
type dtd = string option
|
type dtd = string option
|
||||||
(** The type for the optional
|
(** The type for the optional
|
||||||
{{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD}. *)
|
{{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD}. *)
|
||||||
|
|
||||||
type name = string * string
|
type name = string * string
|
||||||
(** The type for attribute and element's
|
(** The type for attribute and element's
|
||||||
{{:http://www.w3.org/TR/xml-names11/#dt-expname}expanded names}
|
{{:http://www.w3.org/TR/xml-names11/#dt-expname}expanded names}
|
||||||
[(uri,local)]. An empty [uri] represents a name without a
|
[(uri,local)]. An empty [uri] represents a name without a
|
||||||
namespace name, i.e. an unprefixed name
|
namespace name, i.e. an unprefixed name
|
||||||
that is not under the scope of a default namespace. *)
|
that is not under the scope of a default namespace. *)
|
||||||
|
|
||||||
type attribute = name * string
|
type attribute = name * string
|
||||||
|
@ -65,46 +65,69 @@ type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ]
|
||||||
to the language of the [doc] grammar :
|
to the language of the [doc] grammar :
|
||||||
{[doc ::= `Dtd tree
|
{[doc ::= `Dtd tree
|
||||||
tree ::= `El_start child `El_end
|
tree ::= `El_start child `El_end
|
||||||
child ::= `Data | tree | epsilon ]}
|
child ::= `Data trees | trees
|
||||||
|
trees ::= tree child | epsilon]}
|
||||||
|
The [trees] production is used to expresses the fact that there will
|
||||||
|
never be two consecutive `Data signals in the children of an element.
|
||||||
|
|
||||||
Input and output deal only with well-formed sequences or
|
Input and output deal only with well-formed sequences or
|
||||||
exceptions are raised.
|
exceptions are raised. However on output consecutive [`Data]
|
||||||
*)
|
signals are allowed. *)
|
||||||
|
|
||||||
val ns_xml : string
|
val ns_xml : string
|
||||||
(** Namespace name {{:http://www.w3.org/XML/1998/namespace}value} bound to the
|
(** Namespace name {{:http://www.w3.org/XML/1998/namespace}value} bound to the
|
||||||
reserved ["xml"] prefix. *)
|
reserved ["xml"] prefix. *)
|
||||||
|
|
||||||
val ns_xmlns : string
|
val ns_xmlns : string
|
||||||
(** Namespace name {{:http://www.w3.org/2000/xmlns/}value} bound to the
|
(** Namespace name {{:http://www.w3.org/2000/xmlns/}value} bound to the
|
||||||
reserved ["xmlns"] prefix. *)
|
reserved ["xmlns"] prefix. *)
|
||||||
|
|
||||||
|
val pp_dtd : Format.formatter -> dtd -> unit
|
||||||
|
(** [pp_dtd ppf dtd] prints an unspecified representation of [dtd] on [ppf]. *)
|
||||||
|
|
||||||
|
val pp_name : Format.formatter -> name -> unit
|
||||||
|
(** [pp_name ppf name] prints an unspecified representation of [name] on
|
||||||
|
[ppf]. *)
|
||||||
|
|
||||||
|
val pp_attribute : Format.formatter -> attribute -> unit
|
||||||
|
(** [pp_attribute ppf att] prints an unspecified representation of [att] on
|
||||||
|
[ppf]. *)
|
||||||
|
|
||||||
|
val pp_tag : Format.formatter -> tag -> unit
|
||||||
|
(** [pp_tag ppf tag] prints an unspecified representation of [tag] on
|
||||||
|
[ppf]. *)
|
||||||
|
|
||||||
|
val pp_signal : Format.formatter -> signal -> unit
|
||||||
|
(** [pp_signal ppf s] prints an unspecified representation of [s] on
|
||||||
|
[ppf]. *)
|
||||||
|
|
||||||
(** {1 Input} *)
|
(** {1 Input} *)
|
||||||
|
|
||||||
type pos = int * int
|
type pos = int * int
|
||||||
(** The type for input positions. Line and column number, both start
|
(** The type for input positions. Line and column number, both start
|
||||||
with 1. *)
|
with 1. *)
|
||||||
|
|
||||||
(** The type for input errors. *)
|
(** The type for input errors. *)
|
||||||
type error = [
|
type error = [
|
||||||
| `Max_buffer_size
|
| `Max_buffer_size
|
||||||
(** Maximal buffer size exceeded ([Sys.max_string_length]). *)
|
(** Maximal buffer size exceeded ([Sys.max_string_length]). *)
|
||||||
| `Unexpected_eoi
|
| `Unexpected_eoi
|
||||||
(** Unexpected end of input. *)
|
(** Unexpected end of input. *)
|
||||||
| `Malformed_char_stream
|
| `Malformed_char_stream
|
||||||
(** Malformed underlying character stream. *)
|
(** Malformed underlying character stream. *)
|
||||||
| `Unknown_encoding of string
|
| `Unknown_encoding of string
|
||||||
(** Unknown encoding. *)
|
(** Unknown encoding. *)
|
||||||
| `Unknown_entity_ref of string
|
| `Unknown_entity_ref of string
|
||||||
(** Unknown entity reference, {{!inentity} details}. *)
|
(** Unknown entity reference, {{!inentity} details}. *)
|
||||||
| `Unknown_ns_prefix of string
|
| `Unknown_ns_prefix of string
|
||||||
(** Unknown namespace prefix {{!inns} details} *)
|
(** Unknown namespace prefix {{!inns} details} *)
|
||||||
| `Illegal_char_ref of string
|
| `Illegal_char_ref of string
|
||||||
(** Illegal character reference. *)
|
(** Illegal character reference. *)
|
||||||
| `Illegal_char_seq of string
|
| `Illegal_char_seq of string
|
||||||
(** Illegal character sequence. *)
|
(** Illegal character sequence. *)
|
||||||
| `Expected_char_seqs of string list * string
|
| `Expected_char_seqs of string list * string
|
||||||
(** Expected one of the character sequences in the list but found another. *)
|
(** Expected one of the character sequences in the list but found another. *)
|
||||||
| `Expected_root_element
|
| `Expected_root_element
|
||||||
(** Expected the document's root element. *) ]
|
(** Expected the document's root element. *) ]
|
||||||
|
|
||||||
val error_message : error -> string
|
val error_message : error -> string
|
||||||
|
@ -113,7 +136,7 @@ val error_message : error -> string
|
||||||
exception Error of pos * error
|
exception Error of pos * error
|
||||||
(** Raised on input errors. *)
|
(** Raised on input errors. *)
|
||||||
|
|
||||||
type source = [
|
type source = [
|
||||||
| `Channel of in_channel | `String of int * string | `Fun of (unit -> int) ]
|
| `Channel of in_channel | `String of int * string | `Fun of (unit -> int) ]
|
||||||
(** The type for input sources. For [`String] starts reading at the
|
(** The type for input sources. For [`String] starts reading at the
|
||||||
given integer position. For [`Fun] the function must return the
|
given integer position. For [`Fun] the function must return the
|
||||||
|
@ -123,70 +146,70 @@ type source = [
|
||||||
type input
|
type input
|
||||||
(** The type for input abstractions. *)
|
(** The type for input abstractions. *)
|
||||||
|
|
||||||
val make_input : ?enc:encoding option -> ?strip:bool ->
|
val make_input : ?enc:encoding option -> ?strip:bool ->
|
||||||
?ns:(string -> string option) ->
|
?ns:(string -> string option) ->
|
||||||
?entity: (string -> string option) -> source -> input
|
?entity: (string -> string option) -> source -> input
|
||||||
(** Returns a new input abstraction reading from the given source.
|
(** Returns a new input abstraction reading from the given source.
|
||||||
{ul
|
{ul
|
||||||
{- [enc], character encoding of the document, {{!inenc} details}.
|
{- [enc], character encoding of the document, {{!inenc} details}.
|
||||||
Defaults to [None].}
|
Defaults to [None].}
|
||||||
{- [strip], strips whitespace in character data, {{!inwspace} details}.
|
{- [strip], strips whitespace in character data, {{!inwspace} details}.
|
||||||
Defaults to [false].}
|
Defaults to [false].}
|
||||||
{- [ns] is called to bind undeclared namespace prefixes,
|
{- [ns] is called to bind undeclared namespace prefixes,
|
||||||
{{!inns} details}. Default returns always [None].}
|
{{!inns} details}. Default returns always [None].}
|
||||||
{- [entity] is called to resolve non predefined entity references,
|
{- [entity] is called to resolve non predefined entity references,
|
||||||
{{!inentity} details}. Default returns always [None].}} *)
|
{{!inentity} details}. Default returns always [None].}} *)
|
||||||
|
|
||||||
val input : input -> signal
|
val input : input -> signal
|
||||||
(** Inputs a signal. Repeated invocation of the function with the same
|
(** Inputs a signal. Repeated invocation of the function with the same
|
||||||
input abstraction will generate a {{!signal}well-formed} sequence
|
input abstraction will generate a {{!signal}well-formed} sequence
|
||||||
of signals or an {!Error} is raised. Furthermore there will be no
|
of signals or an {!Error} is raised. Furthermore there will be no
|
||||||
two consecutive [`Data] signals in the sequence and their string
|
two consecutive [`Data] signals in the sequence and their string
|
||||||
is always non empty.
|
is always non empty.
|
||||||
|
|
||||||
{b Deprecated} After a well-formed sequence was input another may
|
{b Deprecated} After a well-formed sequence was input another may
|
||||||
be input, see {!eoi} and {{!iseq}details}.
|
be input, see {!eoi} and {{!iseq}details}.
|
||||||
|
|
||||||
{b Raises} {!Error} on input errors. *)
|
{b Raises} {!Error} on input errors. *)
|
||||||
|
|
||||||
val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
|
val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
|
||||||
input -> 'a
|
input -> 'a
|
||||||
(** If the next signal is a :
|
(** If the next signal is a :
|
||||||
{ul
|
{ul
|
||||||
{- [`Data] signal, inputs it and invokes [data] with the character data.}
|
{- [`Data] signal, inputs it and invokes [data] with the character data.}
|
||||||
{- [`El_start] signal, inputs the sequence of signals until its
|
{- [`El_start] signal, inputs the sequence of signals until its
|
||||||
matching [`El_end] and invokes [el] and [data] as follows
|
matching [`El_end] and invokes [el] and [data] as follows
|
||||||
{ul
|
{ul
|
||||||
{- [el], is called on each [`El_end] signals with the corresponding
|
{- [el], is called on each [`El_end] signals with the corresponding
|
||||||
[`El_start] tag and the result of the callback invocation for the
|
[`El_start] tag and the result of the callback invocation for the
|
||||||
element's children.}
|
element's children.}
|
||||||
{- [data], is called on each [`Data] signals with the character data.
|
{- [data], is called on each [`Data] signals with the character data.
|
||||||
This function won't be called twice consecutively or with the empty
|
This function won't be called twice consecutively or with the empty
|
||||||
string.}}}
|
string.}}}
|
||||||
{- Other signals, raises [Invalid_argument].}}
|
{- Other signals, raises [Invalid_argument].}}
|
||||||
|
|
||||||
{b Raises} {!Error} on input errors and [Invalid_argument]
|
{b Raises} {!Error} on input errors and [Invalid_argument]
|
||||||
if the next signal is not [`El_start] or [`Data]. *)
|
if the next signal is not [`El_start] or [`Data]. *)
|
||||||
|
|
||||||
val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
|
val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
|
||||||
input -> (dtd * 'a)
|
input -> (dtd * 'a)
|
||||||
(** Same as {!input_tree} but reads a complete {{!signal}well-formed}
|
(** Same as {!input_tree} but reads a complete {{!signal}well-formed}
|
||||||
sequence of signals.
|
sequence of signals.
|
||||||
|
|
||||||
{b Raises} {!Error} on input errors and [Invalid_argument]
|
{b Raises} {!Error} on input errors and [Invalid_argument]
|
||||||
if the next signal is not [`Dtd]. *)
|
if the next signal is not [`Dtd]. *)
|
||||||
|
|
||||||
val peek : input -> signal
|
val peek : input -> signal
|
||||||
(** Same as {!input} but doesn't remove the signal from the sequence.
|
(** Same as {!input} but doesn't remove the signal from the sequence.
|
||||||
|
|
||||||
{b Raises} {!Error} on input errors. *)
|
|
||||||
|
|
||||||
val eoi : input -> bool
|
|
||||||
(** Returns [true] if the end of input is reached. See {{!iseq}details}.
|
|
||||||
|
|
||||||
{b Raises} {!Error} on input errors. *)
|
{b Raises} {!Error} on input errors. *)
|
||||||
|
|
||||||
val pos : input -> pos
|
val eoi : input -> bool
|
||||||
|
(** Returns [true] if the end of input is reached. See {{!iseq}details}.
|
||||||
|
|
||||||
|
{b Raises} {!Error} on input errors. *)
|
||||||
|
|
||||||
|
val pos : input -> pos
|
||||||
(** Current position in the input abstraction. *)
|
(** Current position in the input abstraction. *)
|
||||||
|
|
||||||
(** {1 Output} *)
|
(** {1 Output} *)
|
||||||
|
@ -194,31 +217,31 @@ val pos : input -> pos
|
||||||
type 'a frag = [ `El of tag * 'a list | `Data of string ]
|
type 'a frag = [ `El of tag * 'a list | `Data of string ]
|
||||||
(** The type for deconstructing data structures of type ['a]. *)
|
(** The type for deconstructing data structures of type ['a]. *)
|
||||||
|
|
||||||
type dest = [ `Channel of out_channel | `Buffer of Buffer.t
|
type dest = [ `Channel of out_channel | `Buffer of Buffer.t
|
||||||
| `Fun of (int -> unit) ]
|
| `Fun of (int -> unit) ]
|
||||||
(** The type for output destinations. For [`Buffer], the buffer won't
|
(** The type for output destinations. For [`Buffer], the buffer won't
|
||||||
be cleared. For [`Fun] the function is called with the output {e
|
be cleared. For [`Fun] the function is called with the output {e
|
||||||
bytes} as [int]s. *)
|
bytes} as [int]s. *)
|
||||||
|
|
||||||
type output
|
type output
|
||||||
(** The type for output abstractions. *)
|
(** The type for output abstractions. *)
|
||||||
|
|
||||||
val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option ->
|
val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option ->
|
||||||
?ns_prefix:(string -> string option) -> dest -> output
|
?ns_prefix:(string -> string option) -> dest -> output
|
||||||
(** Returns a new output abstraction writing to the given destination.
|
(** Returns a new output abstraction writing to the given destination.
|
||||||
{ul
|
{ul
|
||||||
{- [decl], if [true] the {{:http://www.w3.org/TR/REC-xml/#NT-XMLDecl} XML
|
{- [decl], if [true] the {{:http://www.w3.org/TR/REC-xml/#NT-XMLDecl} XML
|
||||||
declaration} is output (defaults to [true]).}
|
declaration} is output (defaults to [true]).}
|
||||||
{- [nl], if [true] a newline is output when the root's element [`El_end]
|
{- [nl], if [true] a newline is output when the root's element [`El_end]
|
||||||
signal is output.
|
signal is output.
|
||||||
Defaults to [false].}
|
Defaults to [false].}
|
||||||
{- [indent], identation behaviour, see {{!outindent} details}. Defaults to
|
{- [indent], identation behaviour, see {{!outindent} details}. Defaults to
|
||||||
[None].}
|
[None].}
|
||||||
{- [ns_prefix], undeclared namespace prefix bindings,
|
{- [ns_prefix], undeclared namespace prefix bindings,
|
||||||
see {{!outns}details}. Default returns always [None].}} *)
|
see {{!outns}details}. Default returns always [None].}} *)
|
||||||
|
|
||||||
val output : output -> signal -> unit
|
val output : output -> signal -> unit
|
||||||
(** Outputs a signal.
|
(** Outputs a signal.
|
||||||
|
|
||||||
{b Deprecated.} After a well-formed sequence of signals was output
|
{b Deprecated.} After a well-formed sequence of signals was output
|
||||||
a new well-formed sequence can be output.
|
a new well-formed sequence can be output.
|
||||||
|
@ -230,22 +253,22 @@ val output : output -> signal -> unit
|
||||||
val output_depth : output -> int
|
val output_depth : output -> int
|
||||||
(** [output_depth o] is [o]'s current element nesting level (undefined
|
(** [output_depth o] is [o]'s current element nesting level (undefined
|
||||||
before the first [`El_start] and after the last [`El_end]). *)
|
before the first [`El_start] and after the last [`El_end]). *)
|
||||||
|
|
||||||
val output_tree : ('a -> 'a frag) -> output -> 'a -> unit
|
val output_tree : ('a -> 'a frag) -> output -> 'a -> unit
|
||||||
(** Outputs signals corresponding to a value by recursively
|
(** Outputs signals corresponding to a value by recursively
|
||||||
applying the given value deconstructor.
|
applying the given value deconstructor.
|
||||||
|
|
||||||
{b Raises} see {!output}. *)
|
{b Raises} see {!output}. *)
|
||||||
|
|
||||||
val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit
|
val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit
|
||||||
(** Same as {!output_tree} but outputs a complete {{!signal}well-formed}
|
(** Same as {!output_tree} but outputs a complete {{!signal}well-formed}
|
||||||
sequence of signals.
|
sequence of signals.
|
||||||
|
|
||||||
{b Raises} see {!output}. *)
|
{b Raises} see {!output}. *)
|
||||||
|
|
||||||
(** {1:sto Functorial interface (deprecated)}
|
(** {1:sto Functorial interface (deprecated)}
|
||||||
|
|
||||||
{b WARNING.} The functioral interface is deprecated and will be
|
{b WARNING.} The functioral interface is deprecated and will be
|
||||||
removed.
|
removed.
|
||||||
|
|
||||||
{!Make} allows client to specify types for strings and internal
|
{!Make} allows client to specify types for strings and internal
|
||||||
|
@ -258,16 +281,16 @@ type std_buffer = Buffer.t
|
||||||
|
|
||||||
(** Input signature for strings. *)
|
(** Input signature for strings. *)
|
||||||
module type String = sig
|
module type String = sig
|
||||||
|
|
||||||
type t
|
type t
|
||||||
(** The type for strings. *)
|
(** The type for strings. *)
|
||||||
|
|
||||||
val empty : t
|
val empty : t
|
||||||
(** The empty string. *)
|
(** The empty string. *)
|
||||||
|
|
||||||
val length : t -> int
|
val length : t -> int
|
||||||
(** Returns the length of the string. *)
|
(** Returns the length of the string. *)
|
||||||
|
|
||||||
val append : t -> t -> t
|
val append : t -> t -> t
|
||||||
(** Concatenates two strings. *)
|
(** Concatenates two strings. *)
|
||||||
|
|
||||||
|
@ -277,7 +300,7 @@ module type String = sig
|
||||||
{{:http://www.unicode.org/glossary/#code_point}code point}). *)
|
{{:http://www.unicode.org/glossary/#code_point}code point}). *)
|
||||||
|
|
||||||
val iter : (int -> unit) -> t -> unit
|
val iter : (int -> unit) -> t -> unit
|
||||||
(** Iterates over the unicode
|
(** Iterates over the unicode
|
||||||
{{:http://www.unicode.org/glossary/#code_point}code point}
|
{{:http://www.unicode.org/glossary/#code_point}code point}
|
||||||
of the given string. *)
|
of the given string. *)
|
||||||
|
|
||||||
|
@ -297,129 +320,129 @@ module type Buffer = sig
|
||||||
|
|
||||||
type string
|
type string
|
||||||
(** The type for strings. *)
|
(** The type for strings. *)
|
||||||
|
|
||||||
type t
|
type t
|
||||||
(** The type for buffers. *)
|
(** The type for buffers. *)
|
||||||
|
|
||||||
exception Full
|
exception Full
|
||||||
(** Raised if the buffer cannot be grown. *)
|
(** Raised if the buffer cannot be grown. *)
|
||||||
|
|
||||||
val create : int -> t
|
val create : int -> t
|
||||||
(** Creates a buffer of the given size. *)
|
(** Creates a buffer of the given size. *)
|
||||||
|
|
||||||
val add_uchar : t -> int -> unit
|
val add_uchar : t -> int -> unit
|
||||||
(** Adds the given (guaranteed valid) unicode
|
(** Adds the given (guaranteed valid) unicode
|
||||||
{{:http://www.unicode.org/glossary/#code_point}code point} to a
|
{{:http://www.unicode.org/glossary/#code_point}code point} to a
|
||||||
buffer.
|
buffer.
|
||||||
|
|
||||||
{b Raises} {!Full} if the buffer cannot be grown. *)
|
{b Raises} {!Full} if the buffer cannot be grown. *)
|
||||||
|
|
||||||
val clear : t -> unit
|
val clear : t -> unit
|
||||||
(** Clears the buffer. *)
|
(** Clears the buffer. *)
|
||||||
|
|
||||||
val contents : t -> string
|
val contents : t -> string
|
||||||
(** Returns the buffer contents. *)
|
(** Returns the buffer contents. *)
|
||||||
|
|
||||||
val length : t -> int
|
val length : t -> int
|
||||||
(** Returns the number of characters contained in the buffer. *)
|
(** Returns the number of characters contained in the buffer. *)
|
||||||
end
|
end
|
||||||
|
|
||||||
(** Output signature of {!Make}. *)
|
(** Output signature of {!Make}. *)
|
||||||
module type S = sig
|
module type S = sig
|
||||||
|
|
||||||
(** {1 Basic types and values} *)
|
|
||||||
|
|
||||||
type string
|
(** {1 Basic types and values} *)
|
||||||
|
|
||||||
type encoding = [
|
type string
|
||||||
|
|
||||||
|
type encoding = [
|
||||||
| `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE | `ISO_8859_1| `US_ASCII ]
|
| `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE | `ISO_8859_1| `US_ASCII ]
|
||||||
type dtd = string option
|
type dtd = string option
|
||||||
type name = string * string
|
type name = string * string
|
||||||
type attribute = name * string
|
type attribute = name * string
|
||||||
type tag = name * attribute list
|
type tag = name * attribute list
|
||||||
type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ]
|
type signal = [ `Dtd of dtd | `El_start of tag | `El_end | `Data of string ]
|
||||||
|
|
||||||
val ns_xml : string
|
|
||||||
val ns_xmlns : string
|
|
||||||
|
|
||||||
(** {1 Input} *)
|
|
||||||
|
|
||||||
type pos = int * int
|
val ns_xml : string
|
||||||
|
val ns_xmlns : string
|
||||||
|
|
||||||
|
(** {1 Input} *)
|
||||||
|
|
||||||
|
type pos = int * int
|
||||||
type error = [
|
type error = [
|
||||||
| `Max_buffer_size
|
| `Max_buffer_size
|
||||||
| `Unexpected_eoi
|
| `Unexpected_eoi
|
||||||
| `Malformed_char_stream
|
| `Malformed_char_stream
|
||||||
| `Unknown_encoding of string
|
| `Unknown_encoding of string
|
||||||
| `Unknown_entity_ref of string
|
| `Unknown_entity_ref of string
|
||||||
| `Unknown_ns_prefix of string
|
| `Unknown_ns_prefix of string
|
||||||
| `Illegal_char_ref of string
|
| `Illegal_char_ref of string
|
||||||
| `Illegal_char_seq of string
|
| `Illegal_char_seq of string
|
||||||
| `Expected_char_seqs of string list * string
|
| `Expected_char_seqs of string list * string
|
||||||
| `Expected_root_element ]
|
| `Expected_root_element ]
|
||||||
|
|
||||||
exception Error of pos * error
|
exception Error of pos * error
|
||||||
val error_message : error -> string
|
val error_message : error -> string
|
||||||
|
|
||||||
type source = [
|
type source = [
|
||||||
| `Channel of in_channel
|
| `Channel of in_channel
|
||||||
| `String of int * std_string
|
| `String of int * std_string
|
||||||
| `Fun of (unit -> int) ]
|
| `Fun of (unit -> int) ]
|
||||||
|
|
||||||
type input
|
type input
|
||||||
|
|
||||||
val make_input : ?enc:encoding option -> ?strip:bool ->
|
val make_input : ?enc:encoding option -> ?strip:bool ->
|
||||||
?ns:(string -> string option) ->
|
?ns:(string -> string option) ->
|
||||||
?entity: (string -> string option) -> source -> input
|
?entity: (string -> string option) -> source -> input
|
||||||
|
|
||||||
val input : input -> signal
|
val input : input -> signal
|
||||||
|
|
||||||
val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
|
val input_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
|
||||||
input -> 'a
|
input -> 'a
|
||||||
|
|
||||||
val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
|
val input_doc_tree : el:(tag -> 'a list -> 'a) -> data:(string -> 'a) ->
|
||||||
input -> (dtd * 'a)
|
input -> (dtd * 'a)
|
||||||
|
|
||||||
val peek : input -> signal
|
val peek : input -> signal
|
||||||
val eoi : input -> bool
|
val eoi : input -> bool
|
||||||
val pos : input -> pos
|
val pos : input -> pos
|
||||||
|
|
||||||
(** {1 Output} *)
|
(** {1 Output} *)
|
||||||
|
|
||||||
type 'a frag = [ `El of tag * 'a list | `Data of string ]
|
type 'a frag = [ `El of tag * 'a list | `Data of string ]
|
||||||
type dest = [
|
type dest = [
|
||||||
| `Channel of out_channel | `Buffer of std_buffer | `Fun of (int -> unit) ]
|
| `Channel of out_channel | `Buffer of std_buffer | `Fun of (int -> unit) ]
|
||||||
|
|
||||||
type output
|
type output
|
||||||
val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option ->
|
val make_output : ?decl:bool -> ?nl:bool -> ?indent:int option ->
|
||||||
?ns_prefix:(string -> string option) -> dest -> output
|
?ns_prefix:(string -> string option) -> dest -> output
|
||||||
|
|
||||||
val output_depth : output -> int
|
val output_depth : output -> int
|
||||||
val output : output -> signal -> unit
|
val output : output -> signal -> unit
|
||||||
val output_tree : ('a -> 'a frag) -> output -> 'a -> unit
|
val output_tree : ('a -> 'a frag) -> output -> 'a -> unit
|
||||||
val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit
|
val output_doc_tree : ('a -> 'a frag) -> output -> (dtd * 'a) -> unit
|
||||||
end
|
end
|
||||||
|
|
||||||
(** Functor building streaming XML IO with the given strings and buffers. *)
|
(** Functor building streaming XML IO with the given strings and buffers. *)
|
||||||
module Make (String : String) (Buffer : Buffer with type string = String.t) : S
|
module Make (String : String) (Buffer : Buffer with type string = String.t) : S
|
||||||
with type string = String.t
|
with type string = String.t
|
||||||
|
|
||||||
(** {1:io Features and limitations}
|
(** {1:io Features and limitations}
|
||||||
|
|
||||||
The module assumes strings are immutable, thus strings
|
The module assumes strings are immutable, thus strings
|
||||||
the client gives or receives {e during} the input and output process
|
the client gives or receives {e during} the input and output process
|
||||||
must not be modified.
|
must not be modified.
|
||||||
{2:input Input}
|
{2:input Input}
|
||||||
{3:inenc Encoding}
|
{3:inenc Encoding}
|
||||||
|
|
||||||
The parser supports ASCII, US-ASCII,
|
The parser supports ASCII, US-ASCII,
|
||||||
{{:http://www.faqs.org/rfcs/rfc3629.html} UTF-8},
|
{{:http://www.faqs.org/rfcs/rfc3629.html} UTF-8},
|
||||||
{{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16},
|
{{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16},
|
||||||
{{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16LE},
|
{{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16LE},
|
||||||
{{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16BE} and
|
{{:http://www.faqs.org/rfcs/rfc2781.html} UTF-16BE} and
|
||||||
{{:http://anubis.dkuug.dk/JTC1/SC2/WG3/docs/n411.pdf}ISO-8559-1}
|
{{:http://anubis.dkuug.dk/JTC1/SC2/WG3/docs/n411.pdf}ISO-8559-1}
|
||||||
(Latin-1) encoded documents. But strings returned by
|
(Latin-1) encoded documents. But strings returned by
|
||||||
the library are {b always} UTF-8 encoded.
|
the library are {b always} UTF-8 encoded.
|
||||||
|
|
||||||
The encoding can be specified explicitly using the optional
|
The encoding can be specified explicitly using the optional
|
||||||
argument [enc]. Otherwise the parser uses UTF-16 or UTF-8 if there is a
|
argument [enc]. Otherwise the parser uses UTF-16 or UTF-8 if there is a
|
||||||
{{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM} at the
|
{{:http://www.unicode.org/unicode/faq/utf_bom.html#BOM}BOM} at the
|
||||||
|
@ -431,8 +454,8 @@ module Make (String : String) (Buffer : Buffer with type string = String.t) : S
|
||||||
The parser performs
|
The parser performs
|
||||||
{{:http://www.w3.org/TR/REC-xml/#AVNormalize}attribute data
|
{{:http://www.w3.org/TR/REC-xml/#AVNormalize}attribute data
|
||||||
normalization} on {e every} attribute data. This means that
|
normalization} on {e every} attribute data. This means that
|
||||||
attribute data does not have leading and trailling white space and that
|
attribute data does not have leading and trailling white space and that
|
||||||
any white space is collapsed and transformed to a single space
|
any white space is collapsed and transformed to a single space
|
||||||
character ([U+0020]).
|
character ([U+0020]).
|
||||||
|
|
||||||
White space handling of character data depends on the [strip]
|
White space handling of character data depends on the [strip]
|
||||||
|
@ -461,9 +484,9 @@ module Make (String : String) (Buffer : Buffer with type string = String.t) : S
|
||||||
[(Xmlm.ns_xmlns, "ex")].
|
[(Xmlm.ns_xmlns, "ex")].
|
||||||
|
|
||||||
Regarding constraints on the usage of the {i xml} and {i xmlns}
|
Regarding constraints on the usage of the {i xml} and {i xmlns}
|
||||||
prefixes by documents, the parser does not report errors on violations
|
prefixes by documents, the parser does not report errors on violations
|
||||||
of the {i must} constraints listed in
|
of the {i must} constraints listed in
|
||||||
{{:http://www.w3.org/TR/xml-names11/#xmlReserved}this paragraph}.
|
{{:http://www.w3.org/TR/xml-names11/#xmlReserved}this paragraph}.
|
||||||
|
|
||||||
{3:inentity Character and entity references}
|
{3:inentity Character and entity references}
|
||||||
|
|
||||||
|
@ -475,14 +498,14 @@ module Make (String : String) (Buffer : Buffer with type string = String.t) : S
|
||||||
replacement character data. The replacement data is {e not}
|
replacement character data. The replacement data is {e not}
|
||||||
analysed for further references, it is added to the data as such
|
analysed for further references, it is added to the data as such
|
||||||
modulo white space stripping. If [entity] returns [None] the error
|
modulo white space stripping. If [entity] returns [None] the error
|
||||||
[`Unknown_entity_ref] is returned.
|
[`Unknown_entity_ref] is returned.
|
||||||
|
|
||||||
{3:iseq Sequences of documents (deprecated)}
|
{3:iseq Sequences of documents (deprecated)}
|
||||||
|
|
||||||
{b WARNING.} This feature is deprecated and will be removed.
|
{b WARNING.} This feature is deprecated and will be removed.
|
||||||
|
|
||||||
When a well-formed sequence of signals is input, no data is consumed beyond
|
When a well-formed sequence of signals is input, no data is consumed beyond
|
||||||
the closing ['>'] of the document's root element.
|
the closing ['>'] of the document's root element.
|
||||||
|
|
||||||
If you want to parse a document as
|
If you want to parse a document as
|
||||||
{{:http://www.w3.org/TR/REC-xml/#NT-document}defined} in the XML
|
{{:http://www.w3.org/TR/REC-xml/#NT-document}defined} in the XML
|
||||||
|
@ -503,32 +526,32 @@ module Make (String : String) (Buffer : Buffer with type string = String.t) : S
|
||||||
|
|
||||||
{3:inmisc Miscellaneous}
|
{3:inmisc Miscellaneous}
|
||||||
{ul
|
{ul
|
||||||
{- Parses the more liberal and simpler XML 1.1
|
{- Parses the more liberal and simpler XML 1.1
|
||||||
{{:http://www.w3.org/TR/xml11/#NT-Name}Name} definition (minus [':'] because
|
{{:http://www.w3.org/TR/xml11/#NT-Name}Name} definition (minus [':'] because
|
||||||
of namespaces).}
|
of namespaces).}
|
||||||
{- The {{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD} is parsed
|
{- The {{:http://www.w3.org/TR/REC-xml/#dt-doctype}DTD} is parsed
|
||||||
roughly (no guarantee it is well formed) and its information is ignored.}
|
roughly (no guarantee it is well formed) and its information is ignored.}
|
||||||
{- The parser drops
|
{- The parser drops
|
||||||
{{:http://www.w3.org/TR/REC-xml/#dt-comment}comments},
|
{{:http://www.w3.org/TR/REC-xml/#dt-comment}comments},
|
||||||
{{:http://www.w3.org/TR/REC-xml/#dt-pi}processing instructions}, and
|
{{:http://www.w3.org/TR/REC-xml/#dt-pi}processing instructions}, and
|
||||||
{{:http://www.w3.org/TR/REC-xml/#sec-rmd}standalone declaration}.}
|
{{:http://www.w3.org/TR/REC-xml/#sec-rmd}standalone declaration}.}
|
||||||
{- Element attributes are not checked for uniqueness.}
|
{- Element attributes are not checked for uniqueness.}
|
||||||
{- Attribute and character data chunks are limited by
|
{- Attribute and character data chunks are limited by
|
||||||
[Sys.max_string_length].
|
[Sys.max_string_length].
|
||||||
The error [`Max_buffer_size] is raised if the limit is hit.}
|
The error [`Max_buffer_size] is raised if the limit is hit.}
|
||||||
{- Tail recursive.}
|
{- Tail recursive.}
|
||||||
{- Non validating.}
|
{- Non validating.}
|
||||||
}
|
}
|
||||||
|
|
||||||
{2:output Output}
|
{2:output Output}
|
||||||
{3:outenc Encoding}
|
{3:outenc Encoding}
|
||||||
|
|
||||||
Outputs only {{:http://www.faqs.org/rfcs/rfc3629.html} UTF-8}
|
Outputs only {{:http://www.faqs.org/rfcs/rfc3629.html} UTF-8}
|
||||||
encoded documents. Strings given to
|
encoded documents. Strings given to
|
||||||
output functions {b must be} UTF-8 encoded, no checks are
|
output functions {b must be} UTF-8 encoded, no checks are
|
||||||
performed. Unicode characters that are not legal XML
|
performed. Unicode characters that are not legal XML
|
||||||
{{:http://www.w3.org/TR/REC-xml/#NT-Char}characters} are replaced
|
{{:http://www.w3.org/TR/REC-xml/#NT-Char}characters} are replaced
|
||||||
by the {{:http://unicode.org/glossary/#replacement_character}Unicode
|
by the {{:http://unicode.org/glossary/#replacement_character}Unicode
|
||||||
replacement character}.
|
replacement character}.
|
||||||
|
|
||||||
{3:outns Namespaces}
|
{3:outns Namespaces}
|
||||||
|
@ -537,15 +560,15 @@ module Make (String : String) (Buffer : Buffer with type string = String.t) : S
|
||||||
{{:http://www.w3.org/TR/xml-names11/#dt-expname}expanded names}.
|
{{:http://www.w3.org/TR/xml-names11/#dt-expname}expanded names}.
|
||||||
Expanded names are automatically converted to
|
Expanded names are automatically converted to
|
||||||
{{:http://www.w3.org/TR/xml-names11/#dt-qualname}qualified
|
{{:http://www.w3.org/TR/xml-names11/#dt-qualname}qualified
|
||||||
names} by the output abstraction. There is no particular api to specify
|
names} by the output abstraction. There is no particular api to specify
|
||||||
prefixes and default namespaces,
|
prefixes and default namespaces,
|
||||||
the actual result depends solely on the output
|
the actual result depends solely on the output
|
||||||
of attributes belonging to the {!ns_xmlns} namespace. For example to set
|
of attributes belonging to the {!ns_xmlns} namespace. For example to set
|
||||||
the default namespace of an element to {i http://example.org/myns},
|
the default namespace of an element to {i http://example.org/myns},
|
||||||
use the following attribute :
|
use the following attribute :
|
||||||
{[(* xmlns='http://example.org/myns' *)
|
{[(* xmlns='http://example.org/myns' *)
|
||||||
let default_ns = (Xmlm.ns_xmlns, "xmlns"), "http://example.org/myns"]}
|
let default_ns = (Xmlm.ns_xmlns, "xmlns"), "http://example.org/myns"]}
|
||||||
To bind the prefix ["ex"] to {i http://example.org/ex}, use the
|
To bind the prefix ["ex"] to {i http://example.org/ex}, use the
|
||||||
following attribute :
|
following attribute :
|
||||||
{[(* xmlns:ex='http://example.org/ex' *)
|
{[(* xmlns:ex='http://example.org/ex' *)
|
||||||
let ex_ns = (Xmlm.ns_xmlns, "ex"), "http://example.org/ex"]}
|
let ex_ns = (Xmlm.ns_xmlns, "ex"), "http://example.org/ex"]}
|
||||||
|
@ -556,7 +579,7 @@ let ex_ns = (Xmlm.ns_xmlns, "ex"), "http://example.org/ex"]}
|
||||||
|
|
||||||
The callback [ns_prefix] of an output abstraction can be used to
|
The callback [ns_prefix] of an output abstraction can be used to
|
||||||
give a prefix to a namespace name lacking a prefix binding in the
|
give a prefix to a namespace name lacking a prefix binding in the
|
||||||
current output scope. Given a namespace name the function must return
|
current output scope. Given a namespace name the function must return
|
||||||
the prefix to use. Note that this
|
the prefix to use. Note that this
|
||||||
will {b not} add any namespace declaration attribute to the
|
will {b not} add any namespace declaration attribute to the
|
||||||
output. If the function returns [None], {!output} will raise
|
output. If the function returns [None], {!output} will raise
|
||||||
|
@ -571,7 +594,7 @@ let ex_ns = (Xmlm.ns_xmlns, "ex"), "http://example.org/ex"]}
|
||||||
line) and nested elements are indented with [c] space
|
line) and nested elements are indented with [c] space
|
||||||
characters.
|
characters.
|
||||||
|
|
||||||
{3:oseq Sequences of documents (deprecated)}
|
{3:oseq Sequences of documents (deprecated)}
|
||||||
|
|
||||||
{b WARNING.} This feature is deprecated and will be removed.
|
{b WARNING.} This feature is deprecated and will be removed.
|
||||||
|
|
||||||
|
@ -582,44 +605,44 @@ let ex_ns = (Xmlm.ns_xmlns, "ex"), "http://example.org/ex"]}
|
||||||
{3:outmisc Miscellaneous}
|
{3:outmisc Miscellaneous}
|
||||||
{ul
|
{ul
|
||||||
{- Output on a channel does not flush it.}
|
{- Output on a channel does not flush it.}
|
||||||
{- In attribute and character data you provide, markup
|
{- In attribute and character data you provide, markup
|
||||||
delimiters ['<'],['>'],['&'], and ['\"'] are
|
delimiters ['<'],['>'],['&'], and ['\"'] are
|
||||||
automatically escaped to
|
automatically escaped to
|
||||||
{{:http://www.w3.org/TR/REC-xml/#sec-predefined-ent}predefined
|
{{:http://www.w3.org/TR/REC-xml/#sec-predefined-ent}predefined
|
||||||
entities}.}
|
entities}.}
|
||||||
{- No checks are peformed on the prefix and local part of output
|
{- No checks are peformed on the prefix and local part of output
|
||||||
names to verify they are
|
names to verify they are
|
||||||
{{:http://www.w3.org/TR/xml-names11/#NT-NCName}NCName}s.
|
{{:http://www.w3.org/TR/xml-names11/#NT-NCName}NCName}s.
|
||||||
For example using the tag name [("","dip d")] will produce
|
For example using the tag name [("","dip d")] will produce
|
||||||
a non well-formed document because of the space character.}
|
a non well-formed document because of the space character.}
|
||||||
{- Tail recursive.}}
|
{- Tail recursive.}}
|
||||||
|
|
||||||
{2 Tips}
|
{2 Tips}
|
||||||
{ul
|
{ul
|
||||||
{- The best options to do an input/output round trip
|
{- The best options to do an input/output round trip
|
||||||
and preserve as much information as possible is to
|
and preserve as much information as possible is to
|
||||||
input with [strip = false] and output with [indent = None].}
|
input with [strip = false] and output with [indent = None].}
|
||||||
{- Complete whitespace control on output is achieved
|
{- Complete whitespace control on output is achieved
|
||||||
with [indent = None] and suitable [`Data] signals}}
|
with [indent = None] and suitable [`Data] signals}}
|
||||||
|
|
||||||
{1:ex Examples}
|
{1:ex Examples}
|
||||||
|
|
||||||
{2:exseq Sequential processing}
|
{2:exseq Sequential processing}
|
||||||
|
|
||||||
Sequential processing has the advantage that you don't need to get
|
Sequential processing has the advantage that you don't need to get
|
||||||
the whole document tree in memory to process it.
|
the whole document tree in memory to process it.
|
||||||
|
|
||||||
The following function reads a {e single} document on an
|
The following function reads a {e single} document on an
|
||||||
input channel and outputs it.
|
input channel and outputs it.
|
||||||
{[let id ic oc =
|
{[let id ic oc =
|
||||||
let i = Xmlm.make_input (`Channel ic) in
|
let i = Xmlm.make_input (`Channel ic) in
|
||||||
let o = Xmlm.make_output (`Channel oc) in
|
let o = Xmlm.make_output (`Channel oc) in
|
||||||
let rec pull i o depth =
|
let rec pull i o depth =
|
||||||
Xmlm.output o (Xmlm.peek i);
|
Xmlm.output o (Xmlm.peek i);
|
||||||
match Xmlm.input i with
|
match Xmlm.input i with
|
||||||
| `El_start _ -> pull i o (depth + 1)
|
| `El_start _ -> pull i o (depth + 1)
|
||||||
| `El_end -> if depth = 1 then () else pull i o (depth - 1)
|
| `El_end -> if depth = 1 then () else pull i o (depth - 1)
|
||||||
| `Data _ -> pull i o depth
|
| `Data _ -> pull i o depth
|
||||||
| `Dtd _ -> assert false
|
| `Dtd _ -> assert false
|
||||||
in
|
in
|
||||||
Xmlm.output o (Xmlm.input i); (* `Dtd *)
|
Xmlm.output o (Xmlm.input i); (* `Dtd *)
|
||||||
|
@ -628,32 +651,32 @@ let ex_ns = (Xmlm.ns_xmlns, "ex"), "http://example.org/ex"]}
|
||||||
|
|
||||||
The following function reads a {e sequence} of documents on an
|
The following function reads a {e sequence} of documents on an
|
||||||
input channel and outputs it.
|
input channel and outputs it.
|
||||||
{[let id_seq ic oc =
|
{[let id_seq ic oc =
|
||||||
let i = Xmlm.make_input (`Channel ic) in
|
let i = Xmlm.make_input (`Channel ic) in
|
||||||
let o = Xmlm.make_output ~nl:true (`Channel oc) in
|
let o = Xmlm.make_output ~nl:true (`Channel oc) in
|
||||||
while not (Xmlm.eoi i) do Xmlm.output o (Xmlm.input i) done]}
|
while not (Xmlm.eoi i) do Xmlm.output o (Xmlm.input i) done]}
|
||||||
The following function reads a {e sequence} of documents on the
|
The following function reads a {e sequence} of documents on the
|
||||||
input channel. In each document's tree it prunes non root elements
|
input channel. In each document's tree it prunes non root elements
|
||||||
whose name belongs to [prune_list].
|
whose name belongs to [prune_list].
|
||||||
{[let prune_docs prune_list ic oc =
|
{[let prune_docs prune_list ic oc =
|
||||||
let i = Xmlm.make_input (`Channel ic) in
|
let i = Xmlm.make_input (`Channel ic) in
|
||||||
let o = Xmlm.make_output ~nl:true (`Channel oc) in
|
let o = Xmlm.make_output ~nl:true (`Channel oc) in
|
||||||
let copy i o = Xmlm.output o (Xmlm.input i) in
|
let copy i o = Xmlm.output o (Xmlm.input i) in
|
||||||
let prune (name, _) = List.mem name prune_list in
|
let prune (name, _) = List.mem name prune_list in
|
||||||
let rec process i o d =
|
let rec process i o d =
|
||||||
let rec skip i d = match Xmlm.input i with
|
let rec skip i d = match Xmlm.input i with
|
||||||
| `El_start _ -> skip i (d + 1)
|
| `El_start _ -> skip i (d + 1)
|
||||||
| `El_end -> if d = 1 then () else skip i (d - 1)
|
| `El_end -> if d = 1 then () else skip i (d - 1)
|
||||||
| s -> skip i d
|
| s -> skip i d
|
||||||
in
|
in
|
||||||
match Xmlm.peek i with
|
match Xmlm.peek i with
|
||||||
| `El_start tag when prune tag -> skip i 0; process i o d
|
| `El_start tag when prune tag -> skip i 0; process i o d
|
||||||
| `El_start _ -> copy i o; process i o (d + 1)
|
| `El_start _ -> copy i o; process i o (d + 1)
|
||||||
| `El_end -> copy i o; if d = 0 then () else process i o (d - 1)
|
| `El_end -> copy i o; if d = 0 then () else process i o (d - 1)
|
||||||
| `Data _ -> copy i o; process i o d
|
| `Data _ -> copy i o; process i o d
|
||||||
| `Dtd _ -> assert false
|
| `Dtd _ -> assert false
|
||||||
in
|
in
|
||||||
let rec docs i o =
|
let rec docs i o =
|
||||||
copy i o; (* `Dtd *)
|
copy i o; (* `Dtd *)
|
||||||
copy i o; (* root start *)
|
copy i o; (* root start *)
|
||||||
process i o 0;
|
process i o 0;
|
||||||
|
@ -661,24 +684,24 @@ let ex_ns = (Xmlm.ns_xmlns, "ex"), "http://example.org/ex"]}
|
||||||
in
|
in
|
||||||
docs i o]}
|
docs i o]}
|
||||||
|
|
||||||
{2:extree Tree processing}
|
{2:extree Tree processing}
|
||||||
|
|
||||||
A document's sequence of signals can be easily converted
|
A document's sequence of signals can be easily converted
|
||||||
to an arborescent data structure. Assume your trees are defined by :
|
to an arborescent data structure. Assume your trees are defined by :
|
||||||
{[type tree = E of Xmlm.tag * tree list | D of string]}
|
{[type tree = E of Xmlm.tag * tree list | D of string]}
|
||||||
The following functions input/output xml documents from/to abstractions
|
The following functions input/output xml documents from/to abstractions
|
||||||
as value of type [tree].
|
as value of type [tree].
|
||||||
{[let in_tree i =
|
{[let in_tree i =
|
||||||
let el tag childs = E (tag, childs) in
|
let el tag childs = E (tag, childs) in
|
||||||
let data d = D d in
|
let data d = D d in
|
||||||
Xmlm.input_doc_tree ~el ~data i
|
Xmlm.input_doc_tree ~el ~data i
|
||||||
|
|
||||||
let out_tree o t =
|
let out_tree o t =
|
||||||
let frag = function
|
let frag = function
|
||||||
| E (tag, childs) -> `El (tag, childs)
|
| E (tag, childs) -> `El (tag, childs)
|
||||||
| D d -> `Data d
|
| D d -> `Data d
|
||||||
in
|
in
|
||||||
Xmlm.output_doc_tree frag o t]}
|
Xmlm.output_doc_tree frag o t]}
|
||||||
|
|
||||||
{2:exrow Tabular data processing}
|
{2:exrow Tabular data processing}
|
||||||
|
|
||||||
|
@ -696,7 +719,7 @@ let out_tree o t =
|
||||||
A bureaucrat contains the following elements, in order.
|
A bureaucrat contains the following elements, in order.
|
||||||
{ul {- <name> its name (mandatory, string).}
|
{ul {- <name> its name (mandatory, string).}
|
||||||
{- <surname> its surname (mandatory, string).}
|
{- <surname> its surname (mandatory, string).}
|
||||||
{- <honest> present iff he implemented one of its spec
|
{- <honest> present iff he implemented one of its spec
|
||||||
(optional, empty).}
|
(optional, empty).}
|
||||||
{- <obfuscation_level> its grade on the
|
{- <obfuscation_level> its grade on the
|
||||||
open scale of obfuscation (mandatory, float).}
|
open scale of obfuscation (mandatory, float).}
|
||||||
|
@ -704,25 +727,25 @@ let out_tree o t =
|
||||||
worked on.}}}}}}
|
worked on.}}}}}}
|
||||||
|
|
||||||
In OCaml we represent a W3C bureaucrat by this type :
|
In OCaml we represent a W3C bureaucrat by this type :
|
||||||
{[type w3c_bureaucrat = {
|
{[type w3c_bureaucrat = {
|
||||||
name : string;
|
name : string;
|
||||||
surname : string;
|
surname : string;
|
||||||
honest : bool;
|
honest : bool;
|
||||||
obfuscation_level : float;
|
obfuscation_level : float;
|
||||||
trs : string list; }]}
|
trs : string list; }]}
|
||||||
The following functions input and output W3C bureaucrats as lists
|
The following functions input and output W3C bureaucrats as lists
|
||||||
of values of type [w3c_bureaucrat].
|
of values of type [w3c_bureaucrat].
|
||||||
{[let in_w3c_bureaucrats src =
|
{[let in_w3c_bureaucrats src =
|
||||||
let i = Xmlm.make_input ~strip:true src in
|
let i = Xmlm.make_input ~strip:true src in
|
||||||
let tag n = ("", n), [] in
|
let tag n = ("", n), [] in
|
||||||
let error () = invalid_arg "parse error" in
|
let error () = invalid_arg "parse error" in
|
||||||
let accept s i = if Xmlm.input i = s then () else error () in
|
let accept s i = if Xmlm.input i = s then () else error () in
|
||||||
let rec i_seq el acc i = match Xmlm.peek i with
|
let rec i_seq el acc i = match Xmlm.peek i with
|
||||||
| `El_start _ -> i_seq el ((el i) :: acc) i
|
| `El_start _ -> i_seq el ((el i) :: acc) i
|
||||||
| `El_end -> List.rev acc
|
| `El_end -> List.rev acc
|
||||||
| _ -> error ()
|
| _ -> error ()
|
||||||
in
|
in
|
||||||
let i_el n i =
|
let i_el n i =
|
||||||
accept (`El_start (tag n)) i;
|
accept (`El_start (tag n)) i;
|
||||||
let d = match Xmlm.peek i with
|
let d = match Xmlm.peek i with
|
||||||
| `Data d -> ignore (Xmlm.input i); d
|
| `Data d -> ignore (Xmlm.input i); d
|
||||||
|
@ -732,7 +755,7 @@ let out_tree o t =
|
||||||
accept (`El_end) i;
|
accept (`El_end) i;
|
||||||
d
|
d
|
||||||
in
|
in
|
||||||
let i_bureaucrat i =
|
let i_bureaucrat i =
|
||||||
try
|
try
|
||||||
accept (`El_start (tag "bureaucrat")) i;
|
accept (`El_start (tag "bureaucrat")) i;
|
||||||
let name = i_el "name" i in
|
let name = i_el "name" i in
|
||||||
|
@ -744,7 +767,7 @@ let out_tree o t =
|
||||||
let obf = float_of_string (i_el "obfuscation_level" i) in
|
let obf = float_of_string (i_el "obfuscation_level" i) in
|
||||||
let trs = i_seq (i_el "tr") [] i in
|
let trs = i_seq (i_el "tr") [] i in
|
||||||
accept (`El_end) i;
|
accept (`El_end) i;
|
||||||
{ name = name; surname = surname; honest = honest;
|
{ name = name; surname = surname; honest = honest;
|
||||||
obfuscation_level = obf; trs = trs }
|
obfuscation_level = obf; trs = trs }
|
||||||
with
|
with
|
||||||
| Failure _ -> error () (* float_of_string *)
|
| Failure _ -> error () (* float_of_string *)
|
||||||
|
@ -756,16 +779,16 @@ let out_tree o t =
|
||||||
if not (Xmlm.eoi i) then invalid_arg "more than one document";
|
if not (Xmlm.eoi i) then invalid_arg "more than one document";
|
||||||
bl
|
bl
|
||||||
|
|
||||||
let out_w3c_bureaucrats dst bl =
|
let out_w3c_bureaucrats dst bl =
|
||||||
let tag n = ("", n), [] in
|
let tag n = ("", n), [] in
|
||||||
let o = Xmlm.make_output ~nl:true ~indent:(Some 2) dst in
|
let o = Xmlm.make_output ~nl:true ~indent:(Some 2) dst in
|
||||||
let out = Xmlm.output o in
|
let out = Xmlm.output o in
|
||||||
let o_el n d =
|
let o_el n d =
|
||||||
out (`El_start (tag n));
|
out (`El_start (tag n));
|
||||||
if d <> "" then out (`Data d);
|
if d <> "" then out (`Data d);
|
||||||
out `El_end
|
out `El_end
|
||||||
in
|
in
|
||||||
let o_bureaucrat b =
|
let o_bureaucrat b =
|
||||||
out (`El_start (tag "bureaucrat"));
|
out (`El_start (tag "bureaucrat"));
|
||||||
o_el "name" b.name;
|
o_el "name" b.name;
|
||||||
o_el "surname" b.surname;
|
o_el "surname" b.surname;
|
||||||
|
@ -781,34 +804,17 @@ let out_w3c_bureaucrats dst bl =
|
||||||
*)
|
*)
|
||||||
|
|
||||||
(*---------------------------------------------------------------------------
|
(*---------------------------------------------------------------------------
|
||||||
Copyright 2007 Daniel C. Bünzli
|
Copyright (c) 2007 Daniel C. Bünzli
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Permission to use, copy, modify, and/or distribute this software for any
|
||||||
modification, are permitted provided that the following conditions are
|
purpose with or without fee is hereby granted, provided that the above
|
||||||
met:
|
copyright notice and this permission notice appear in all copies.
|
||||||
|
|
||||||
1. Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
2. Redistributions in binary form must reproduce the above copyright
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
notice, this list of conditions and the following disclaimer in the
|
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
documentation and/or other materials provided with the
|
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
distribution.
|
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
3. Neither the name of Daniel C. Bünzli nor the names of
|
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
contributors may be used to endorse or promote products derived
|
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
---------------------------------------------------------------------------*)
|
---------------------------------------------------------------------------*)
|
||||||
|
|
Loading…
Reference in New Issue