Vendor in streams for tjjson

This commit is contained in:
John Whitington 2021-09-30 19:55:52 +01:00
parent 31588000f8
commit d02ff84dd8
9 changed files with 360 additions and 8 deletions

View File

@ -1,6 +1,7 @@
2.5 (Upcoming 2022)
o TODO: Vendor in camlp-streams module before deprecation
o Vendored in camlp-streams module before deprecation
o Can read as well as write PDFs in JSON format with -input-json
o Environment variable CPDF_REPRODUCIBLE_DATES for testing
o Environment variable CPDF_DEBUG for -debug
o Effectively make stderr unbuffered

View File

@ -1,6 +1,7 @@
# Build the cpdf command line tools and top level
MODS = tjutil tjutf16 tjllist tjparserMonad tjjson xmlm \
cpdfwriteJSON cpdfstrftime cpdfcoord cpdfpagespec cpdfposition cpdf cpdfcommand
MODS = cpdfstream tjutil tjutf16 tjllist tjparserMonad tjjson xmlm \
cpdfwriteJSON cpdfreadJSON cpdfstrftime cpdfcoord \
cpdfpagespec cpdfposition cpdf cpdfcommand
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml

2
cpdfreadJSON.ml Normal file
View File

@ -0,0 +1,2 @@
(* Read JSON into a PDF *)
let read i = Pdf.empty ()

1
cpdfreadJSON.mli Normal file
View File

@ -0,0 +1 @@
val read : Pdfio.input -> Pdf.t

236
cpdfstream.ml Normal file
View File

@ -0,0 +1,236 @@
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1997 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
type 'a t = 'a cell option
and 'a cell = { mutable count : int; mutable data : 'a data }
and 'a data =
Sempty
| Scons of 'a * 'a data
| Sapp of 'a data * 'a data
| Slazy of 'a data Lazy.t
| Sgen of 'a gen
| Sbuffio : buffio -> char data
and 'a gen = { mutable curr : 'a option option; func : int -> 'a option }
and buffio =
{ ic : in_channel; buff : bytes; mutable len : int; mutable ind : int }
exception Failure
exception Error of string
let count = function
| None -> 0
| Some { count } -> count
let data = function
| None -> Sempty
| Some { data } -> data
let fill_buff b =
b.len <- input b.ic b.buff 0 (Bytes.length b.buff); b.ind <- 0
let rec get_data : type v. int -> v data -> v data = fun count d -> match d with
(* Returns either Sempty or Scons(a, _) even when d is a generator
or a buffer. In those cases, the item a is seen as extracted from
the generator/buffer.
The count parameter is used for calling `Sgen-functions'. *)
Sempty | Scons (_, _) -> d
| Sapp (d1, d2) ->
begin match get_data count d1 with
Scons (a, d11) -> Scons (a, Sapp (d11, d2))
| Sempty -> get_data count d2
| _ -> assert false
end
| Sgen {curr = Some None} -> Sempty
| Sgen ({curr = Some(Some a)} as g) ->
g.curr <- None; Scons(a, d)
| Sgen g ->
begin match g.func count with
None -> g.curr <- Some(None); Sempty
| Some a -> Scons(a, d)
(* Warning: anyone using g thinks that an item has been read *)
end
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
if b.len == 0 then Sempty else
let r = Bytes.unsafe_get b.buff b.ind in
(* Warning: anyone using g thinks that an item has been read *)
b.ind <- succ b.ind; Scons(r, d)
| Slazy f -> get_data count (Lazy.force f)
let rec peek_data : type v. v cell -> v option = fun s ->
(* consult the first item of s *)
match s.data with
Sempty -> None
| Scons (a, _) -> Some a
| Sapp (_, _) ->
begin match get_data s.count s.data with
Scons(a, _) as d -> s.data <- d; Some a
| Sempty -> None
| _ -> assert false
end
| Slazy f -> s.data <- (Lazy.force f); peek_data s
| Sgen {curr = Some a} -> a
| Sgen g -> let x = g.func s.count in g.curr <- Some x; x
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
if b.len == 0 then begin s.data <- Sempty; None end
else Some (Bytes.unsafe_get b.buff b.ind)
let peek = function
| None -> None
| Some s -> peek_data s
let rec junk_data : type v. v cell -> unit = fun s ->
match s.data with
Scons (_, d) -> s.count <- (succ s.count); s.data <- d
| Sgen ({curr = Some _} as g) -> s.count <- (succ s.count); g.curr <- None
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
if b.len == 0 then s.data <- Sempty
else (s.count <- (succ s.count); b.ind <- succ b.ind)
| _ ->
match peek_data s with
None -> ()
| Some _ -> junk_data s
let junk = function
| None -> ()
| Some data -> junk_data data
let rec nget_data n s =
if n <= 0 then [], s.data, 0
else
match peek_data s with
Some a ->
junk_data s;
let (al, d, k) = nget_data (pred n) s in a :: al, Scons (a, d), succ k
| None -> [], s.data, 0
let npeek_data n s =
let (al, d, len) = nget_data n s in
s.count <- (s.count - len);
s.data <- d;
al
let npeek n = function
| None -> []
| Some d -> npeek_data n d
let next s =
match peek s with
Some a -> junk s; a
| None -> raise Failure
let empty s =
match peek s with
Some _ -> raise Failure
| None -> ()
let iter f strm =
let rec do_rec () =
match peek strm with
Some a -> junk strm; ignore(f a); do_rec ()
| None -> ()
in
do_rec ()
(* Stream building functions *)
let from f = Some {count = 0; data = Sgen {curr = None; func = f}}
let of_list l =
Some {count = 0; data = List.fold_right (fun x l -> Scons (x, l)) l Sempty}
let of_string s =
let count = ref 0 in
from (fun _ ->
(* We cannot use the index passed by the [from] function directly
because it returns the current stream count, with absolutely no
guarantee that it will start from 0. For example, in the case
of [Stream.icons 'c' (Stream.from_string "ab")], the first
access to the string will be made with count [1] already.
*)
let c = !count in
if c < String.length s
then (incr count; Some s.[c])
else None)
let of_bytes s =
let count = ref 0 in
from (fun _ ->
let c = !count in
if c < Bytes.length s
then (incr count; Some (Bytes.get s c))
else None)
let of_channel ic =
Some {count = 0;
data = Sbuffio {ic = ic; buff = Bytes.create 4096; len = 0; ind = 0}}
(* Stream expressions builders *)
let iapp i s = Some {count = 0; data = Sapp (data i, data s)}
let icons i s = Some {count = 0; data = Scons (i, data s)}
let ising i = Some {count = 0; data = Scons (i, Sempty)}
let lapp f s =
Some {count = 0; data = Slazy (lazy(Sapp (data (f ()), data s)))}
let lcons f s = Some {count = 0; data = Slazy (lazy(Scons (f (), data s)))}
let lsing f = Some {count = 0; data = Slazy (lazy(Scons (f (), Sempty)))}
let sempty = None
let slazy f = Some {count = 0; data = Slazy (lazy(data (f ())))}
(* For debugging use *)
let rec dump : type v. (v -> unit) -> v t -> unit = fun f s ->
print_string "{count = ";
print_int (count s);
print_string "; data = ";
dump_data f (data s);
print_string "}";
print_newline ()
and dump_data : type v. (v -> unit) -> v data -> unit = fun f ->
function
Sempty -> print_string "Sempty"
| Scons (a, d) ->
print_string "Scons (";
f a;
print_string ", ";
dump_data f d;
print_string ")"
| Sapp (d1, d2) ->
print_string "Sapp (";
dump_data f d1;
print_string ", ";
dump_data f d2;
print_string ")"
| Slazy _ -> print_string "Slazy"
| Sgen _ -> print_string "Sgen"
| Sbuffio _ -> print_string "Sbuffio"

111
cpdfstream.mli Normal file
View File

@ -0,0 +1,111 @@
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1997 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(** Streams and parsers. *)
type 'a t
(** The type of streams holding values of type ['a]. *)
exception Failure
(** Raised by parsers when none of the first components of the stream
patterns is accepted. *)
exception Error of string
(** Raised by parsers when the first component of a stream pattern is
accepted, but one of the following components is rejected. *)
(** {1 Stream builders} *)
val from : (int -> 'a option) -> 'a t
(** [Stream.from f] returns a stream built from the function [f].
To create a new stream element, the function [f] is called with
the current stream count. The user function [f] must return either
[Some <value>] for a value or [None] to specify the end of the
stream.
Do note that the indices passed to [f] may not start at [0] in the
general case. For example, [[< '0; '1; Stream.from f >]] would call
[f] the first time with count [2].
*)
val of_list : 'a list -> 'a t
(** Return the stream holding the elements of the list in the same
order. *)
val of_string : string -> char t
(** Return the stream of the characters of the string parameter. *)
val of_bytes : bytes -> char t
(** Return the stream of the characters of the bytes parameter.
@since 4.02.0 *)
val of_channel : in_channel -> char t
(** Return the stream of the characters read from the input channel. *)
(** {1 Stream iterator} *)
val iter : ('a -> unit) -> 'a t -> unit
(** [Stream.iter f s] scans the whole stream s, applying function [f]
in turn to each stream element encountered. *)
(** {1 Predefined parsers} *)
val next : 'a t -> 'a
(** Return the first element of the stream and remove it from the
stream.
@raise Cpdfstream.Failure if the stream is empty. *)
val empty : 'a t -> unit
(** Return [()] if the stream is empty, else raise {!Cpdfstream.Failure}. *)
(** {1 Useful functions} *)
val peek : 'a t -> 'a option
(** Return [Some] of "the first element" of the stream, or [None] if
the stream is empty. *)
val junk : 'a t -> unit
(** Remove the first element of the stream, possibly unfreezing
it before. *)
val count : 'a t -> int
(** Return the current count of the stream elements, i.e. the number
of the stream elements discarded. *)
val npeek : int -> 'a t -> 'a list
(** [npeek n] returns the list of the [n] first elements of
the stream, or all its remaining elements if less than [n]
elements are available. *)
(**/**)
(* The following is for system use only. Do not call directly. *)
val iapp : 'a t -> 'a t -> 'a t
val icons : 'a -> 'a t -> 'a t
val ising : 'a -> 'a t
val lapp : (unit -> 'a t) -> 'a t -> 'a t
val lcons : (unit -> 'a) -> 'a t -> 'a t
val lsing : (unit -> 'a) -> 'a t
val sempty : 'a t
val slazy : (unit -> 'a t) -> 'a t
val dump : ('a -> unit) -> 'a t -> unit

View File

@ -65,9 +65,9 @@ let rec from n = Cons (n, lazy (from (n+1)))
(* llist <--> stream *)
let rec of_stream str =
try
Cons (Stream.next str, lazy (of_stream str))
Cons (Cpdfstream.next str, lazy (of_stream str))
with
| Stream.Failure -> Nil
| Cpdfstream.Failure -> Nil
let sllist ?(items:int=20) delim show l =
let fin = take items l in
@ -78,7 +78,7 @@ let sllist ?(items:int=20) delim show l =
(* string -> llist *)
let of_string =
of_stream $ Stream.of_string
of_stream $ Cpdfstream.of_string
let of_function f =
let buf = Bytes.create 1024 in

View File

@ -17,7 +17,7 @@ val continually : (unit -> 'a) -> 'a llist
val from : int -> int llist
val of_stream : 'a Stream.t -> 'a llist
val of_stream : 'a Cpdfstream.t -> 'a llist
val sllist : ?items:int -> string -> ('a -> string) -> 'a llist -> string
val of_string : string -> char llist

View File

@ -150,7 +150,7 @@ let run p state ts =
let init_state = (1, 0, ([],'_',[]))
let run_ch p ch =
run p init_state (of_stream (Stream.of_channel ch))
run p init_state (of_stream (Cpdfstream.of_channel ch))
let run_stdin p = run_ch p stdin