Vendor in streams for tjjson
This commit is contained in:
parent
31588000f8
commit
d02ff84dd8
3
Changes
3
Changes
|
@ -1,6 +1,7 @@
|
|||
2.5 (Upcoming 2022)
|
||||
|
||||
o TODO: Vendor in camlp-streams module before deprecation
|
||||
o Vendored in camlp-streams module before deprecation
|
||||
o Can read as well as write PDFs in JSON format with -input-json
|
||||
o Environment variable CPDF_REPRODUCIBLE_DATES for testing
|
||||
o Environment variable CPDF_DEBUG for -debug
|
||||
o Effectively make stderr unbuffered
|
||||
|
|
5
Makefile
5
Makefile
|
@ -1,6 +1,7 @@
|
|||
# Build the cpdf command line tools and top level
|
||||
MODS = tjutil tjutf16 tjllist tjparserMonad tjjson xmlm \
|
||||
cpdfwriteJSON cpdfstrftime cpdfcoord cpdfpagespec cpdfposition cpdf cpdfcommand
|
||||
MODS = cpdfstream tjutil tjutf16 tjllist tjparserMonad tjjson xmlm \
|
||||
cpdfwriteJSON cpdfreadJSON cpdfstrftime cpdfcoord \
|
||||
cpdfpagespec cpdfposition cpdf cpdfcommand
|
||||
|
||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
(* Read JSON into a PDF *)
|
||||
let read i = Pdf.empty ()
|
|
@ -0,0 +1 @@
|
|||
val read : Pdfio.input -> Pdf.t
|
|
@ -0,0 +1,236 @@
|
|||
(**************************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 1997 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. *)
|
||||
(* *)
|
||||
(* All rights reserved. This file is distributed under the terms of *)
|
||||
(* the GNU Lesser General Public License version 2.1, with the *)
|
||||
(* special exception on linking described in the file LICENSE. *)
|
||||
(* *)
|
||||
(**************************************************************************)
|
||||
|
||||
type 'a t = 'a cell option
|
||||
and 'a cell = { mutable count : int; mutable data : 'a data }
|
||||
and 'a data =
|
||||
Sempty
|
||||
| Scons of 'a * 'a data
|
||||
| Sapp of 'a data * 'a data
|
||||
| Slazy of 'a data Lazy.t
|
||||
| Sgen of 'a gen
|
||||
| Sbuffio : buffio -> char data
|
||||
and 'a gen = { mutable curr : 'a option option; func : int -> 'a option }
|
||||
and buffio =
|
||||
{ ic : in_channel; buff : bytes; mutable len : int; mutable ind : int }
|
||||
|
||||
exception Failure
|
||||
exception Error of string
|
||||
|
||||
let count = function
|
||||
| None -> 0
|
||||
| Some { count } -> count
|
||||
let data = function
|
||||
| None -> Sempty
|
||||
| Some { data } -> data
|
||||
|
||||
let fill_buff b =
|
||||
b.len <- input b.ic b.buff 0 (Bytes.length b.buff); b.ind <- 0
|
||||
|
||||
|
||||
let rec get_data : type v. int -> v data -> v data = fun count d -> match d with
|
||||
(* Returns either Sempty or Scons(a, _) even when d is a generator
|
||||
or a buffer. In those cases, the item a is seen as extracted from
|
||||
the generator/buffer.
|
||||
The count parameter is used for calling `Sgen-functions'. *)
|
||||
Sempty | Scons (_, _) -> d
|
||||
| Sapp (d1, d2) ->
|
||||
begin match get_data count d1 with
|
||||
Scons (a, d11) -> Scons (a, Sapp (d11, d2))
|
||||
| Sempty -> get_data count d2
|
||||
| _ -> assert false
|
||||
end
|
||||
| Sgen {curr = Some None} -> Sempty
|
||||
| Sgen ({curr = Some(Some a)} as g) ->
|
||||
g.curr <- None; Scons(a, d)
|
||||
| Sgen g ->
|
||||
begin match g.func count with
|
||||
None -> g.curr <- Some(None); Sempty
|
||||
| Some a -> Scons(a, d)
|
||||
(* Warning: anyone using g thinks that an item has been read *)
|
||||
end
|
||||
| Sbuffio b ->
|
||||
if b.ind >= b.len then fill_buff b;
|
||||
if b.len == 0 then Sempty else
|
||||
let r = Bytes.unsafe_get b.buff b.ind in
|
||||
(* Warning: anyone using g thinks that an item has been read *)
|
||||
b.ind <- succ b.ind; Scons(r, d)
|
||||
| Slazy f -> get_data count (Lazy.force f)
|
||||
|
||||
|
||||
let rec peek_data : type v. v cell -> v option = fun s ->
|
||||
(* consult the first item of s *)
|
||||
match s.data with
|
||||
Sempty -> None
|
||||
| Scons (a, _) -> Some a
|
||||
| Sapp (_, _) ->
|
||||
begin match get_data s.count s.data with
|
||||
Scons(a, _) as d -> s.data <- d; Some a
|
||||
| Sempty -> None
|
||||
| _ -> assert false
|
||||
end
|
||||
| Slazy f -> s.data <- (Lazy.force f); peek_data s
|
||||
| Sgen {curr = Some a} -> a
|
||||
| Sgen g -> let x = g.func s.count in g.curr <- Some x; x
|
||||
| Sbuffio b ->
|
||||
if b.ind >= b.len then fill_buff b;
|
||||
if b.len == 0 then begin s.data <- Sempty; None end
|
||||
else Some (Bytes.unsafe_get b.buff b.ind)
|
||||
|
||||
|
||||
let peek = function
|
||||
| None -> None
|
||||
| Some s -> peek_data s
|
||||
|
||||
|
||||
let rec junk_data : type v. v cell -> unit = fun s ->
|
||||
match s.data with
|
||||
Scons (_, d) -> s.count <- (succ s.count); s.data <- d
|
||||
| Sgen ({curr = Some _} as g) -> s.count <- (succ s.count); g.curr <- None
|
||||
| Sbuffio b ->
|
||||
if b.ind >= b.len then fill_buff b;
|
||||
if b.len == 0 then s.data <- Sempty
|
||||
else (s.count <- (succ s.count); b.ind <- succ b.ind)
|
||||
| _ ->
|
||||
match peek_data s with
|
||||
None -> ()
|
||||
| Some _ -> junk_data s
|
||||
|
||||
|
||||
let junk = function
|
||||
| None -> ()
|
||||
| Some data -> junk_data data
|
||||
|
||||
let rec nget_data n s =
|
||||
if n <= 0 then [], s.data, 0
|
||||
else
|
||||
match peek_data s with
|
||||
Some a ->
|
||||
junk_data s;
|
||||
let (al, d, k) = nget_data (pred n) s in a :: al, Scons (a, d), succ k
|
||||
| None -> [], s.data, 0
|
||||
|
||||
|
||||
let npeek_data n s =
|
||||
let (al, d, len) = nget_data n s in
|
||||
s.count <- (s.count - len);
|
||||
s.data <- d;
|
||||
al
|
||||
|
||||
|
||||
let npeek n = function
|
||||
| None -> []
|
||||
| Some d -> npeek_data n d
|
||||
|
||||
let next s =
|
||||
match peek s with
|
||||
Some a -> junk s; a
|
||||
| None -> raise Failure
|
||||
|
||||
|
||||
let empty s =
|
||||
match peek s with
|
||||
Some _ -> raise Failure
|
||||
| None -> ()
|
||||
|
||||
|
||||
let iter f strm =
|
||||
let rec do_rec () =
|
||||
match peek strm with
|
||||
Some a -> junk strm; ignore(f a); do_rec ()
|
||||
| None -> ()
|
||||
in
|
||||
do_rec ()
|
||||
|
||||
|
||||
(* Stream building functions *)
|
||||
|
||||
let from f = Some {count = 0; data = Sgen {curr = None; func = f}}
|
||||
|
||||
let of_list l =
|
||||
Some {count = 0; data = List.fold_right (fun x l -> Scons (x, l)) l Sempty}
|
||||
|
||||
|
||||
let of_string s =
|
||||
let count = ref 0 in
|
||||
from (fun _ ->
|
||||
(* We cannot use the index passed by the [from] function directly
|
||||
because it returns the current stream count, with absolutely no
|
||||
guarantee that it will start from 0. For example, in the case
|
||||
of [Stream.icons 'c' (Stream.from_string "ab")], the first
|
||||
access to the string will be made with count [1] already.
|
||||
*)
|
||||
let c = !count in
|
||||
if c < String.length s
|
||||
then (incr count; Some s.[c])
|
||||
else None)
|
||||
|
||||
|
||||
let of_bytes s =
|
||||
let count = ref 0 in
|
||||
from (fun _ ->
|
||||
let c = !count in
|
||||
if c < Bytes.length s
|
||||
then (incr count; Some (Bytes.get s c))
|
||||
else None)
|
||||
|
||||
|
||||
let of_channel ic =
|
||||
Some {count = 0;
|
||||
data = Sbuffio {ic = ic; buff = Bytes.create 4096; len = 0; ind = 0}}
|
||||
|
||||
|
||||
(* Stream expressions builders *)
|
||||
|
||||
let iapp i s = Some {count = 0; data = Sapp (data i, data s)}
|
||||
let icons i s = Some {count = 0; data = Scons (i, data s)}
|
||||
let ising i = Some {count = 0; data = Scons (i, Sempty)}
|
||||
|
||||
let lapp f s =
|
||||
Some {count = 0; data = Slazy (lazy(Sapp (data (f ()), data s)))}
|
||||
|
||||
let lcons f s = Some {count = 0; data = Slazy (lazy(Scons (f (), data s)))}
|
||||
let lsing f = Some {count = 0; data = Slazy (lazy(Scons (f (), Sempty)))}
|
||||
|
||||
let sempty = None
|
||||
let slazy f = Some {count = 0; data = Slazy (lazy(data (f ())))}
|
||||
|
||||
(* For debugging use *)
|
||||
|
||||
let rec dump : type v. (v -> unit) -> v t -> unit = fun f s ->
|
||||
print_string "{count = ";
|
||||
print_int (count s);
|
||||
print_string "; data = ";
|
||||
dump_data f (data s);
|
||||
print_string "}";
|
||||
print_newline ()
|
||||
and dump_data : type v. (v -> unit) -> v data -> unit = fun f ->
|
||||
function
|
||||
Sempty -> print_string "Sempty"
|
||||
| Scons (a, d) ->
|
||||
print_string "Scons (";
|
||||
f a;
|
||||
print_string ", ";
|
||||
dump_data f d;
|
||||
print_string ")"
|
||||
| Sapp (d1, d2) ->
|
||||
print_string "Sapp (";
|
||||
dump_data f d1;
|
||||
print_string ", ";
|
||||
dump_data f d2;
|
||||
print_string ")"
|
||||
| Slazy _ -> print_string "Slazy"
|
||||
| Sgen _ -> print_string "Sgen"
|
||||
| Sbuffio _ -> print_string "Sbuffio"
|
|
@ -0,0 +1,111 @@
|
|||
(**************************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 1997 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. *)
|
||||
(* *)
|
||||
(* All rights reserved. This file is distributed under the terms of *)
|
||||
(* the GNU Lesser General Public License version 2.1, with the *)
|
||||
(* special exception on linking described in the file LICENSE. *)
|
||||
(* *)
|
||||
(**************************************************************************)
|
||||
|
||||
(** Streams and parsers. *)
|
||||
|
||||
type 'a t
|
||||
(** The type of streams holding values of type ['a]. *)
|
||||
|
||||
exception Failure
|
||||
(** Raised by parsers when none of the first components of the stream
|
||||
patterns is accepted. *)
|
||||
|
||||
exception Error of string
|
||||
(** Raised by parsers when the first component of a stream pattern is
|
||||
accepted, but one of the following components is rejected. *)
|
||||
|
||||
|
||||
(** {1 Stream builders} *)
|
||||
|
||||
val from : (int -> 'a option) -> 'a t
|
||||
(** [Stream.from f] returns a stream built from the function [f].
|
||||
To create a new stream element, the function [f] is called with
|
||||
the current stream count. The user function [f] must return either
|
||||
[Some <value>] for a value or [None] to specify the end of the
|
||||
stream.
|
||||
|
||||
Do note that the indices passed to [f] may not start at [0] in the
|
||||
general case. For example, [[< '0; '1; Stream.from f >]] would call
|
||||
[f] the first time with count [2].
|
||||
*)
|
||||
|
||||
val of_list : 'a list -> 'a t
|
||||
(** Return the stream holding the elements of the list in the same
|
||||
order. *)
|
||||
|
||||
val of_string : string -> char t
|
||||
(** Return the stream of the characters of the string parameter. *)
|
||||
|
||||
val of_bytes : bytes -> char t
|
||||
(** Return the stream of the characters of the bytes parameter.
|
||||
@since 4.02.0 *)
|
||||
|
||||
val of_channel : in_channel -> char t
|
||||
(** Return the stream of the characters read from the input channel. *)
|
||||
|
||||
|
||||
(** {1 Stream iterator} *)
|
||||
|
||||
val iter : ('a -> unit) -> 'a t -> unit
|
||||
(** [Stream.iter f s] scans the whole stream s, applying function [f]
|
||||
in turn to each stream element encountered. *)
|
||||
|
||||
|
||||
(** {1 Predefined parsers} *)
|
||||
|
||||
val next : 'a t -> 'a
|
||||
(** Return the first element of the stream and remove it from the
|
||||
stream.
|
||||
@raise Cpdfstream.Failure if the stream is empty. *)
|
||||
|
||||
val empty : 'a t -> unit
|
||||
(** Return [()] if the stream is empty, else raise {!Cpdfstream.Failure}. *)
|
||||
|
||||
|
||||
(** {1 Useful functions} *)
|
||||
|
||||
val peek : 'a t -> 'a option
|
||||
(** Return [Some] of "the first element" of the stream, or [None] if
|
||||
the stream is empty. *)
|
||||
|
||||
val junk : 'a t -> unit
|
||||
(** Remove the first element of the stream, possibly unfreezing
|
||||
it before. *)
|
||||
|
||||
val count : 'a t -> int
|
||||
(** Return the current count of the stream elements, i.e. the number
|
||||
of the stream elements discarded. *)
|
||||
|
||||
val npeek : int -> 'a t -> 'a list
|
||||
(** [npeek n] returns the list of the [n] first elements of
|
||||
the stream, or all its remaining elements if less than [n]
|
||||
elements are available. *)
|
||||
|
||||
(**/**)
|
||||
|
||||
(* The following is for system use only. Do not call directly. *)
|
||||
|
||||
val iapp : 'a t -> 'a t -> 'a t
|
||||
val icons : 'a -> 'a t -> 'a t
|
||||
val ising : 'a -> 'a t
|
||||
|
||||
val lapp : (unit -> 'a t) -> 'a t -> 'a t
|
||||
val lcons : (unit -> 'a) -> 'a t -> 'a t
|
||||
val lsing : (unit -> 'a) -> 'a t
|
||||
|
||||
val sempty : 'a t
|
||||
val slazy : (unit -> 'a t) -> 'a t
|
||||
|
||||
val dump : ('a -> unit) -> 'a t -> unit
|
|
@ -65,9 +65,9 @@ let rec from n = Cons (n, lazy (from (n+1)))
|
|||
(* llist <--> stream *)
|
||||
let rec of_stream str =
|
||||
try
|
||||
Cons (Stream.next str, lazy (of_stream str))
|
||||
Cons (Cpdfstream.next str, lazy (of_stream str))
|
||||
with
|
||||
| Stream.Failure -> Nil
|
||||
| Cpdfstream.Failure -> Nil
|
||||
|
||||
let sllist ?(items:int=20) delim show l =
|
||||
let fin = take items l in
|
||||
|
@ -78,7 +78,7 @@ let sllist ?(items:int=20) delim show l =
|
|||
|
||||
(* string -> llist *)
|
||||
let of_string =
|
||||
of_stream $ Stream.of_string
|
||||
of_stream $ Cpdfstream.of_string
|
||||
|
||||
let of_function f =
|
||||
let buf = Bytes.create 1024 in
|
||||
|
|
|
@ -17,7 +17,7 @@ val continually : (unit -> 'a) -> 'a llist
|
|||
|
||||
val from : int -> int llist
|
||||
|
||||
val of_stream : 'a Stream.t -> 'a llist
|
||||
val of_stream : 'a Cpdfstream.t -> 'a llist
|
||||
val sllist : ?items:int -> string -> ('a -> string) -> 'a llist -> string
|
||||
val of_string : string -> char llist
|
||||
|
||||
|
|
|
@ -150,7 +150,7 @@ let run p state ts =
|
|||
let init_state = (1, 0, ([],'_',[]))
|
||||
|
||||
let run_ch p ch =
|
||||
run p init_state (of_stream (Stream.of_channel ch))
|
||||
run p init_state (of_stream (Cpdfstream.of_channel ch))
|
||||
|
||||
let run_stdin p = run_ch p stdin
|
||||
|
||||
|
|
Loading…
Reference in New Issue