Vendor in streams for tjjson
This commit is contained in:
parent
31588000f8
commit
d02ff84dd8
3
Changes
3
Changes
|
@ -1,6 +1,7 @@
|
||||||
2.5 (Upcoming 2022)
|
2.5 (Upcoming 2022)
|
||||||
|
|
||||||
o TODO: Vendor in camlp-streams module before deprecation
|
o Vendored in camlp-streams module before deprecation
|
||||||
|
o Can read as well as write PDFs in JSON format with -input-json
|
||||||
o Environment variable CPDF_REPRODUCIBLE_DATES for testing
|
o Environment variable CPDF_REPRODUCIBLE_DATES for testing
|
||||||
o Environment variable CPDF_DEBUG for -debug
|
o Environment variable CPDF_DEBUG for -debug
|
||||||
o Effectively make stderr unbuffered
|
o Effectively make stderr unbuffered
|
||||||
|
|
5
Makefile
5
Makefile
|
@ -1,6 +1,7 @@
|
||||||
# Build the cpdf command line tools and top level
|
# Build the cpdf command line tools and top level
|
||||||
MODS = tjutil tjutf16 tjllist tjparserMonad tjjson xmlm \
|
MODS = cpdfstream tjutil tjutf16 tjllist tjparserMonad tjjson xmlm \
|
||||||
cpdfwriteJSON cpdfstrftime cpdfcoord cpdfpagespec cpdfposition cpdf cpdfcommand
|
cpdfwriteJSON cpdfreadJSON cpdfstrftime cpdfcoord \
|
||||||
|
cpdfpagespec cpdfposition cpdf cpdfcommand
|
||||||
|
|
||||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
(* Read JSON into a PDF *)
|
||||||
|
let read i = Pdf.empty ()
|
|
@ -0,0 +1 @@
|
||||||
|
val read : Pdfio.input -> Pdf.t
|
|
@ -0,0 +1,236 @@
|
||||||
|
(**************************************************************************)
|
||||||
|
(* *)
|
||||||
|
(* OCaml *)
|
||||||
|
(* *)
|
||||||
|
(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)
|
||||||
|
(* *)
|
||||||
|
(* Copyright 1997 Institut National de Recherche en Informatique et *)
|
||||||
|
(* en Automatique. *)
|
||||||
|
(* *)
|
||||||
|
(* All rights reserved. This file is distributed under the terms of *)
|
||||||
|
(* the GNU Lesser General Public License version 2.1, with the *)
|
||||||
|
(* special exception on linking described in the file LICENSE. *)
|
||||||
|
(* *)
|
||||||
|
(**************************************************************************)
|
||||||
|
|
||||||
|
type 'a t = 'a cell option
|
||||||
|
and 'a cell = { mutable count : int; mutable data : 'a data }
|
||||||
|
and 'a data =
|
||||||
|
Sempty
|
||||||
|
| Scons of 'a * 'a data
|
||||||
|
| Sapp of 'a data * 'a data
|
||||||
|
| Slazy of 'a data Lazy.t
|
||||||
|
| Sgen of 'a gen
|
||||||
|
| Sbuffio : buffio -> char data
|
||||||
|
and 'a gen = { mutable curr : 'a option option; func : int -> 'a option }
|
||||||
|
and buffio =
|
||||||
|
{ ic : in_channel; buff : bytes; mutable len : int; mutable ind : int }
|
||||||
|
|
||||||
|
exception Failure
|
||||||
|
exception Error of string
|
||||||
|
|
||||||
|
let count = function
|
||||||
|
| None -> 0
|
||||||
|
| Some { count } -> count
|
||||||
|
let data = function
|
||||||
|
| None -> Sempty
|
||||||
|
| Some { data } -> data
|
||||||
|
|
||||||
|
let fill_buff b =
|
||||||
|
b.len <- input b.ic b.buff 0 (Bytes.length b.buff); b.ind <- 0
|
||||||
|
|
||||||
|
|
||||||
|
let rec get_data : type v. int -> v data -> v data = fun count d -> match d with
|
||||||
|
(* Returns either Sempty or Scons(a, _) even when d is a generator
|
||||||
|
or a buffer. In those cases, the item a is seen as extracted from
|
||||||
|
the generator/buffer.
|
||||||
|
The count parameter is used for calling `Sgen-functions'. *)
|
||||||
|
Sempty | Scons (_, _) -> d
|
||||||
|
| Sapp (d1, d2) ->
|
||||||
|
begin match get_data count d1 with
|
||||||
|
Scons (a, d11) -> Scons (a, Sapp (d11, d2))
|
||||||
|
| Sempty -> get_data count d2
|
||||||
|
| _ -> assert false
|
||||||
|
end
|
||||||
|
| Sgen {curr = Some None} -> Sempty
|
||||||
|
| Sgen ({curr = Some(Some a)} as g) ->
|
||||||
|
g.curr <- None; Scons(a, d)
|
||||||
|
| Sgen g ->
|
||||||
|
begin match g.func count with
|
||||||
|
None -> g.curr <- Some(None); Sempty
|
||||||
|
| Some a -> Scons(a, d)
|
||||||
|
(* Warning: anyone using g thinks that an item has been read *)
|
||||||
|
end
|
||||||
|
| Sbuffio b ->
|
||||||
|
if b.ind >= b.len then fill_buff b;
|
||||||
|
if b.len == 0 then Sempty else
|
||||||
|
let r = Bytes.unsafe_get b.buff b.ind in
|
||||||
|
(* Warning: anyone using g thinks that an item has been read *)
|
||||||
|
b.ind <- succ b.ind; Scons(r, d)
|
||||||
|
| Slazy f -> get_data count (Lazy.force f)
|
||||||
|
|
||||||
|
|
||||||
|
let rec peek_data : type v. v cell -> v option = fun s ->
|
||||||
|
(* consult the first item of s *)
|
||||||
|
match s.data with
|
||||||
|
Sempty -> None
|
||||||
|
| Scons (a, _) -> Some a
|
||||||
|
| Sapp (_, _) ->
|
||||||
|
begin match get_data s.count s.data with
|
||||||
|
Scons(a, _) as d -> s.data <- d; Some a
|
||||||
|
| Sempty -> None
|
||||||
|
| _ -> assert false
|
||||||
|
end
|
||||||
|
| Slazy f -> s.data <- (Lazy.force f); peek_data s
|
||||||
|
| Sgen {curr = Some a} -> a
|
||||||
|
| Sgen g -> let x = g.func s.count in g.curr <- Some x; x
|
||||||
|
| Sbuffio b ->
|
||||||
|
if b.ind >= b.len then fill_buff b;
|
||||||
|
if b.len == 0 then begin s.data <- Sempty; None end
|
||||||
|
else Some (Bytes.unsafe_get b.buff b.ind)
|
||||||
|
|
||||||
|
|
||||||
|
let peek = function
|
||||||
|
| None -> None
|
||||||
|
| Some s -> peek_data s
|
||||||
|
|
||||||
|
|
||||||
|
let rec junk_data : type v. v cell -> unit = fun s ->
|
||||||
|
match s.data with
|
||||||
|
Scons (_, d) -> s.count <- (succ s.count); s.data <- d
|
||||||
|
| Sgen ({curr = Some _} as g) -> s.count <- (succ s.count); g.curr <- None
|
||||||
|
| Sbuffio b ->
|
||||||
|
if b.ind >= b.len then fill_buff b;
|
||||||
|
if b.len == 0 then s.data <- Sempty
|
||||||
|
else (s.count <- (succ s.count); b.ind <- succ b.ind)
|
||||||
|
| _ ->
|
||||||
|
match peek_data s with
|
||||||
|
None -> ()
|
||||||
|
| Some _ -> junk_data s
|
||||||
|
|
||||||
|
|
||||||
|
let junk = function
|
||||||
|
| None -> ()
|
||||||
|
| Some data -> junk_data data
|
||||||
|
|
||||||
|
let rec nget_data n s =
|
||||||
|
if n <= 0 then [], s.data, 0
|
||||||
|
else
|
||||||
|
match peek_data s with
|
||||||
|
Some a ->
|
||||||
|
junk_data s;
|
||||||
|
let (al, d, k) = nget_data (pred n) s in a :: al, Scons (a, d), succ k
|
||||||
|
| None -> [], s.data, 0
|
||||||
|
|
||||||
|
|
||||||
|
let npeek_data n s =
|
||||||
|
let (al, d, len) = nget_data n s in
|
||||||
|
s.count <- (s.count - len);
|
||||||
|
s.data <- d;
|
||||||
|
al
|
||||||
|
|
||||||
|
|
||||||
|
let npeek n = function
|
||||||
|
| None -> []
|
||||||
|
| Some d -> npeek_data n d
|
||||||
|
|
||||||
|
let next s =
|
||||||
|
match peek s with
|
||||||
|
Some a -> junk s; a
|
||||||
|
| None -> raise Failure
|
||||||
|
|
||||||
|
|
||||||
|
let empty s =
|
||||||
|
match peek s with
|
||||||
|
Some _ -> raise Failure
|
||||||
|
| None -> ()
|
||||||
|
|
||||||
|
|
||||||
|
let iter f strm =
|
||||||
|
let rec do_rec () =
|
||||||
|
match peek strm with
|
||||||
|
Some a -> junk strm; ignore(f a); do_rec ()
|
||||||
|
| None -> ()
|
||||||
|
in
|
||||||
|
do_rec ()
|
||||||
|
|
||||||
|
|
||||||
|
(* Stream building functions *)
|
||||||
|
|
||||||
|
let from f = Some {count = 0; data = Sgen {curr = None; func = f}}
|
||||||
|
|
||||||
|
let of_list l =
|
||||||
|
Some {count = 0; data = List.fold_right (fun x l -> Scons (x, l)) l Sempty}
|
||||||
|
|
||||||
|
|
||||||
|
let of_string s =
|
||||||
|
let count = ref 0 in
|
||||||
|
from (fun _ ->
|
||||||
|
(* We cannot use the index passed by the [from] function directly
|
||||||
|
because it returns the current stream count, with absolutely no
|
||||||
|
guarantee that it will start from 0. For example, in the case
|
||||||
|
of [Stream.icons 'c' (Stream.from_string "ab")], the first
|
||||||
|
access to the string will be made with count [1] already.
|
||||||
|
*)
|
||||||
|
let c = !count in
|
||||||
|
if c < String.length s
|
||||||
|
then (incr count; Some s.[c])
|
||||||
|
else None)
|
||||||
|
|
||||||
|
|
||||||
|
let of_bytes s =
|
||||||
|
let count = ref 0 in
|
||||||
|
from (fun _ ->
|
||||||
|
let c = !count in
|
||||||
|
if c < Bytes.length s
|
||||||
|
then (incr count; Some (Bytes.get s c))
|
||||||
|
else None)
|
||||||
|
|
||||||
|
|
||||||
|
let of_channel ic =
|
||||||
|
Some {count = 0;
|
||||||
|
data = Sbuffio {ic = ic; buff = Bytes.create 4096; len = 0; ind = 0}}
|
||||||
|
|
||||||
|
|
||||||
|
(* Stream expressions builders *)
|
||||||
|
|
||||||
|
let iapp i s = Some {count = 0; data = Sapp (data i, data s)}
|
||||||
|
let icons i s = Some {count = 0; data = Scons (i, data s)}
|
||||||
|
let ising i = Some {count = 0; data = Scons (i, Sempty)}
|
||||||
|
|
||||||
|
let lapp f s =
|
||||||
|
Some {count = 0; data = Slazy (lazy(Sapp (data (f ()), data s)))}
|
||||||
|
|
||||||
|
let lcons f s = Some {count = 0; data = Slazy (lazy(Scons (f (), data s)))}
|
||||||
|
let lsing f = Some {count = 0; data = Slazy (lazy(Scons (f (), Sempty)))}
|
||||||
|
|
||||||
|
let sempty = None
|
||||||
|
let slazy f = Some {count = 0; data = Slazy (lazy(data (f ())))}
|
||||||
|
|
||||||
|
(* For debugging use *)
|
||||||
|
|
||||||
|
let rec dump : type v. (v -> unit) -> v t -> unit = fun f s ->
|
||||||
|
print_string "{count = ";
|
||||||
|
print_int (count s);
|
||||||
|
print_string "; data = ";
|
||||||
|
dump_data f (data s);
|
||||||
|
print_string "}";
|
||||||
|
print_newline ()
|
||||||
|
and dump_data : type v. (v -> unit) -> v data -> unit = fun f ->
|
||||||
|
function
|
||||||
|
Sempty -> print_string "Sempty"
|
||||||
|
| Scons (a, d) ->
|
||||||
|
print_string "Scons (";
|
||||||
|
f a;
|
||||||
|
print_string ", ";
|
||||||
|
dump_data f d;
|
||||||
|
print_string ")"
|
||||||
|
| Sapp (d1, d2) ->
|
||||||
|
print_string "Sapp (";
|
||||||
|
dump_data f d1;
|
||||||
|
print_string ", ";
|
||||||
|
dump_data f d2;
|
||||||
|
print_string ")"
|
||||||
|
| Slazy _ -> print_string "Slazy"
|
||||||
|
| Sgen _ -> print_string "Sgen"
|
||||||
|
| Sbuffio _ -> print_string "Sbuffio"
|
|
@ -0,0 +1,111 @@
|
||||||
|
(**************************************************************************)
|
||||||
|
(* *)
|
||||||
|
(* OCaml *)
|
||||||
|
(* *)
|
||||||
|
(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)
|
||||||
|
(* *)
|
||||||
|
(* Copyright 1997 Institut National de Recherche en Informatique et *)
|
||||||
|
(* en Automatique. *)
|
||||||
|
(* *)
|
||||||
|
(* All rights reserved. This file is distributed under the terms of *)
|
||||||
|
(* the GNU Lesser General Public License version 2.1, with the *)
|
||||||
|
(* special exception on linking described in the file LICENSE. *)
|
||||||
|
(* *)
|
||||||
|
(**************************************************************************)
|
||||||
|
|
||||||
|
(** Streams and parsers. *)
|
||||||
|
|
||||||
|
type 'a t
|
||||||
|
(** The type of streams holding values of type ['a]. *)
|
||||||
|
|
||||||
|
exception Failure
|
||||||
|
(** Raised by parsers when none of the first components of the stream
|
||||||
|
patterns is accepted. *)
|
||||||
|
|
||||||
|
exception Error of string
|
||||||
|
(** Raised by parsers when the first component of a stream pattern is
|
||||||
|
accepted, but one of the following components is rejected. *)
|
||||||
|
|
||||||
|
|
||||||
|
(** {1 Stream builders} *)
|
||||||
|
|
||||||
|
val from : (int -> 'a option) -> 'a t
|
||||||
|
(** [Stream.from f] returns a stream built from the function [f].
|
||||||
|
To create a new stream element, the function [f] is called with
|
||||||
|
the current stream count. The user function [f] must return either
|
||||||
|
[Some <value>] for a value or [None] to specify the end of the
|
||||||
|
stream.
|
||||||
|
|
||||||
|
Do note that the indices passed to [f] may not start at [0] in the
|
||||||
|
general case. For example, [[< '0; '1; Stream.from f >]] would call
|
||||||
|
[f] the first time with count [2].
|
||||||
|
*)
|
||||||
|
|
||||||
|
val of_list : 'a list -> 'a t
|
||||||
|
(** Return the stream holding the elements of the list in the same
|
||||||
|
order. *)
|
||||||
|
|
||||||
|
val of_string : string -> char t
|
||||||
|
(** Return the stream of the characters of the string parameter. *)
|
||||||
|
|
||||||
|
val of_bytes : bytes -> char t
|
||||||
|
(** Return the stream of the characters of the bytes parameter.
|
||||||
|
@since 4.02.0 *)
|
||||||
|
|
||||||
|
val of_channel : in_channel -> char t
|
||||||
|
(** Return the stream of the characters read from the input channel. *)
|
||||||
|
|
||||||
|
|
||||||
|
(** {1 Stream iterator} *)
|
||||||
|
|
||||||
|
val iter : ('a -> unit) -> 'a t -> unit
|
||||||
|
(** [Stream.iter f s] scans the whole stream s, applying function [f]
|
||||||
|
in turn to each stream element encountered. *)
|
||||||
|
|
||||||
|
|
||||||
|
(** {1 Predefined parsers} *)
|
||||||
|
|
||||||
|
val next : 'a t -> 'a
|
||||||
|
(** Return the first element of the stream and remove it from the
|
||||||
|
stream.
|
||||||
|
@raise Cpdfstream.Failure if the stream is empty. *)
|
||||||
|
|
||||||
|
val empty : 'a t -> unit
|
||||||
|
(** Return [()] if the stream is empty, else raise {!Cpdfstream.Failure}. *)
|
||||||
|
|
||||||
|
|
||||||
|
(** {1 Useful functions} *)
|
||||||
|
|
||||||
|
val peek : 'a t -> 'a option
|
||||||
|
(** Return [Some] of "the first element" of the stream, or [None] if
|
||||||
|
the stream is empty. *)
|
||||||
|
|
||||||
|
val junk : 'a t -> unit
|
||||||
|
(** Remove the first element of the stream, possibly unfreezing
|
||||||
|
it before. *)
|
||||||
|
|
||||||
|
val count : 'a t -> int
|
||||||
|
(** Return the current count of the stream elements, i.e. the number
|
||||||
|
of the stream elements discarded. *)
|
||||||
|
|
||||||
|
val npeek : int -> 'a t -> 'a list
|
||||||
|
(** [npeek n] returns the list of the [n] first elements of
|
||||||
|
the stream, or all its remaining elements if less than [n]
|
||||||
|
elements are available. *)
|
||||||
|
|
||||||
|
(**/**)
|
||||||
|
|
||||||
|
(* The following is for system use only. Do not call directly. *)
|
||||||
|
|
||||||
|
val iapp : 'a t -> 'a t -> 'a t
|
||||||
|
val icons : 'a -> 'a t -> 'a t
|
||||||
|
val ising : 'a -> 'a t
|
||||||
|
|
||||||
|
val lapp : (unit -> 'a t) -> 'a t -> 'a t
|
||||||
|
val lcons : (unit -> 'a) -> 'a t -> 'a t
|
||||||
|
val lsing : (unit -> 'a) -> 'a t
|
||||||
|
|
||||||
|
val sempty : 'a t
|
||||||
|
val slazy : (unit -> 'a t) -> 'a t
|
||||||
|
|
||||||
|
val dump : ('a -> unit) -> 'a t -> unit
|
|
@ -65,9 +65,9 @@ let rec from n = Cons (n, lazy (from (n+1)))
|
||||||
(* llist <--> stream *)
|
(* llist <--> stream *)
|
||||||
let rec of_stream str =
|
let rec of_stream str =
|
||||||
try
|
try
|
||||||
Cons (Stream.next str, lazy (of_stream str))
|
Cons (Cpdfstream.next str, lazy (of_stream str))
|
||||||
with
|
with
|
||||||
| Stream.Failure -> Nil
|
| Cpdfstream.Failure -> Nil
|
||||||
|
|
||||||
let sllist ?(items:int=20) delim show l =
|
let sllist ?(items:int=20) delim show l =
|
||||||
let fin = take items l in
|
let fin = take items l in
|
||||||
|
@ -78,7 +78,7 @@ let sllist ?(items:int=20) delim show l =
|
||||||
|
|
||||||
(* string -> llist *)
|
(* string -> llist *)
|
||||||
let of_string =
|
let of_string =
|
||||||
of_stream $ Stream.of_string
|
of_stream $ Cpdfstream.of_string
|
||||||
|
|
||||||
let of_function f =
|
let of_function f =
|
||||||
let buf = Bytes.create 1024 in
|
let buf = Bytes.create 1024 in
|
||||||
|
|
|
@ -17,7 +17,7 @@ val continually : (unit -> 'a) -> 'a llist
|
||||||
|
|
||||||
val from : int -> int llist
|
val from : int -> int llist
|
||||||
|
|
||||||
val of_stream : 'a Stream.t -> 'a llist
|
val of_stream : 'a Cpdfstream.t -> 'a llist
|
||||||
val sllist : ?items:int -> string -> ('a -> string) -> 'a llist -> string
|
val sllist : ?items:int -> string -> ('a -> string) -> 'a llist -> string
|
||||||
val of_string : string -> char llist
|
val of_string : string -> char llist
|
||||||
|
|
||||||
|
|
|
@ -150,7 +150,7 @@ let run p state ts =
|
||||||
let init_state = (1, 0, ([],'_',[]))
|
let init_state = (1, 0, ([],'_',[]))
|
||||||
|
|
||||||
let run_ch p ch =
|
let run_ch p ch =
|
||||||
run p init_state (of_stream (Stream.of_channel ch))
|
run p init_state (of_stream (Cpdfstream.of_channel ch))
|
||||||
|
|
||||||
let run_stdin p = run_ch p stdin
|
let run_stdin p = run_ch p stdin
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue