Vendored in JSON library, other updates to begin v2.4

This commit is contained in:
John Whitington 2020-01-24 16:20:40 +08:00
parent 6bb20e8ba2
commit 02574b57d4
14 changed files with 750 additions and 4 deletions

View File

@ -1,3 +1,7 @@
Version 2.4 (to come)
o Vendored in tiny_json from Yoshihiro Imai via Jan Furuse
Version 2.3 (patchlevel 1, December 2019)
o Fixed bug which prevented -info working on encrypted files

View File

@ -1,5 +1,7 @@
# Build the cpdf command line tools and top level
MODS = xmlm cpdfstrftime cpdfcoord cpdf cpdfcommand
MODS = tjutil tjutf16 tjllist tjparserMonad tjjson \
xmlm \
cpdfstrftime cpdfcoord cpdf cpdfcommand
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml

View File

@ -1,9 +1,9 @@
(* cpdf command line tools *)
let demo = false
let noncomp = false
let noncomp = true
let major_version = 2
let minor_version = 3
let version_date = "(patchlevel 1, 11th December 2019)"
let minor_version = 4
let version_date = "(devel, 24th January 2020)"
open Pdfutil
open Pdfio

288
tjjson.ml Normal file
View File

@ -0,0 +1,288 @@
(* With regard to tjjson.ml(i), tjllist.ml(i), tjbase64.ml(i),
* tjparserMonad.ml(i), tjutil.ml(i), tjutf16.ml(i): *)
(* Copyright (c) 2011 Yoshihiro Imai
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.*)
open Tjutil
open TjparserMonad
module P = TjparserMonad
type t =
| String of string
| Number of string (* float is not appropriate for decoding 64bit int *)
| Object of obj
| Array of t list
| Bool of bool
| Null
and obj = (string * t) list
exception JSON_NotObject of t
exception JSON_InvalidField of (string)
exception JSON_CastErr of string
exception JSON_UnknownErr of string
(* CR jfuruse: it uses string concat. Very bad efficiency. *)
let show =
let rec show_aux depth = function
| String s -> "str(" ^s^ ")"
| Number x -> !%"num(%s)" x
| Object fs ->
let indent d = String.make d '\t' in
"{\n"
^indent depth^ slist (",\n"^ indent depth) (fun (k,v) -> k^":"^ (show_aux (depth+1)) v) fs
^"\n"^indent(depth-1)^"}"
| Array xs -> "[" ^slist "," (show_aux depth) xs ^ "]"
| Bool true -> "TRUE"
| Bool false -> "FALSE"
| Null -> "NULL"
in
show_aux 1
let rec format_list sep f ppf = function
| [] -> ()
| [x] -> f ppf x
| x::xs -> f ppf x; Format.fprintf ppf sep; format_list sep f ppf xs
(* CR jfuruse: Need test! *)
let rec format ppf =
let open Format in
function
| String s ->
let buf = Buffer.create (String.length s * 2) in
Buffer.add_char buf '"';
for i = 0 to String.length s - 1 do
let c = String.unsafe_get s i in
match c with
| '"' -> Buffer.add_string buf "\\\""
| '\\' -> Buffer.add_string buf "\\\\"
| '\b' -> Buffer.add_string buf "\\b"
| '\012' -> Buffer.add_string buf "\\f"
| '\n' -> Buffer.add_string buf "\\n"
| '\r' -> Buffer.add_string buf "\\r"
| '\t' -> Buffer.add_string buf "\\t"
| _ when Char.code c <= 32 && c <> ' ' ->
Printf.ksprintf (Buffer.add_string buf) "\\u%04X" (Char.code c)
| _ -> Buffer.add_char buf c
done;
Buffer.add_char buf '"';
pp_print_string ppf (Buffer.contents buf)
| Number s -> fprintf ppf "%s" s
| Object o ->
fprintf ppf "{ @[%a }@]"
(format_list ",@ " (fun ppf (s,v) -> fprintf ppf "@[\"%s\": @[<2>%a@]@]" s format v)) o
| Array ts ->
fprintf ppf "[ @[%a ]@]"
(format_list ",@ " format) ts
| Bool b -> fprintf ppf "%b" b
| Null -> fprintf ppf "null"
let getf field t =
match t with
| Object o ->
begin try List.assoc field o with
| _ -> raise (JSON_InvalidField (field))
end
| _ -> raise (JSON_NotObject t)
let getf_opt field t =
match t with
| Object o ->
begin try Some (List.assoc field o) with
| _ -> None
end
| _ -> None
let as_bool = function
| Bool true -> true
| Bool false -> false
| v -> raise (JSON_CastErr ("as_bool:" ^ show v))
let as_object = function
| Object obj -> obj
| v -> raise (JSON_CastErr ("as_object:" ^ show v))
let as_float = function
| Number s -> float_of_string s (* may fail, or returns wrong result *)
| v -> raise (JSON_CastErr ("as_float:" ^ show v))
let as_string = function
| String s -> s
| v -> raise (JSON_CastErr ("as_string:" ^ show v))
let as_list = function
| Array l -> l
| v -> raise (JSON_CastErr ("as_list:" ^ show v))
let as_int = function
| Number s -> int_of_string s (* may fail, or returns wrong result *)
| v -> raise (JSON_CastErr ("as_int:" ^ show v))
(*parser*)
let whitespace = many (char '\n' <|> char ' ' <|> char '\t' <|> char '\r')
let string s =
let rec iter i =
if i < String.length s then
char s.[i] >> iter (i+1)
else return s
in
iter 0
(*
let alp =
char1 >>= fun c -> if c<>' ' && c<>'\n' && c<>'\t' && c<>'\r' then return c else error""
let alps0 = many alp
let alps = alp >>= fun c -> many alp >>= fun cs -> return (string_of_chars (c::cs))
*)
type token =
| ObjOpen
| ObjClose
| ListOpen
| ListClose
| Comma
| Colon
| TTrue
| TFalse
| TNull
| TString of string
| TNumber of string (* we keep the string repr. *)
let lit_string =
let four_hex_digits =
let hex = char1 >>= function
| '0'..'9' | 'A'..'F' | 'a'..'f' as c -> return c
| _ -> error ""
in
hex >>= fun d1 -> hex >>= fun d2 -> hex >>= fun d3 -> hex >>= fun d4 ->
let s = string_of_chars [d1;d2;d3;d4] in
let n = int_of_string ("0x" ^ Tjutf16.utf16c_to_utf8c s) in
let m, n1 = n / (16*16), n mod (16*16) in
let n3,n2 = m / (16*16), m mod (16*16) in
let cs = List.map char_of_int
begin match [n3;n2;n1] with
| [0; 0; _] -> [n1]
| [0; _; _] -> [n2; n1]
| _ -> [n3; n2; n1]
end
in
return (string_of_chars cs)
in
let lit_char =
char1 >>= function
| '\"' -> error ""
| '\\' -> char1 >>=
begin function
| '\"' | '\\' | '/' as c -> return (string1 c)
| 'b' -> return "\b"
(* | 'f' -> return "\f"*)
| 'n' -> return "\n"
| 'r' -> return "\r"
| 't' -> return "\t"
| 'u' -> four_hex_digits
| _ -> error ""
end
| c -> return (string1 c)
in
char '\"' >> many lit_char >>= fun ss -> char '\"' >> return (TString (slist "" id ss))
let digits =
let digit =
char1 >>= function
| '0'..'9' | '-' | '.' | 'e' | 'E' | '+' as c -> return c
| _ -> error "digit"
in
many1 digit >>= (return $ string_of_chars)
let lit_number = (* TODO *)
(* We cannot simply use [float_of_string] here, if we want to handle int64.
int64 and double are both 64bits, which means double cannot express all the int64!!!
*)
digits >>= fun x -> return (TNumber x)
let token1 =
let aux =
(char '{' >> return ObjOpen)
<|>
(char '}' >> return ObjClose)
<|>
(char '[' >> return ListOpen)
<|>
(char ']' >> return ListClose)
<|>
(char ',' >> return Comma)
<|>
(char ':' >> return Colon)
<|>
(string "true" >> return TTrue)
<|>
(string "false" >> return TFalse)
<|>
(string "null" >> return TNull)
<|>
lit_string
<|>
lit_number
in
whitespace >> aux
let token t =
token1 >>= fun x -> if t = x then return t else error "token"
let json_string =
token1 >>= function TString s -> return s | _ -> error "json_string"
let json_number =
token1 >>= function TNumber x -> return x | _ -> error "json_number"
let rec json (): t P.t =
begin
let field =
json_string >>= fun key -> token Colon >> json () >>= fun v -> return (key, v)
in
(token ObjOpen >> sep (token Comma) field >>= fun fields -> token ObjClose >>
return @@ Object fields)
<|>
(token ListOpen >>= (fun _ -> sep (token Comma) (json()) >>= fun vs -> opt (token Comma) >> token ListClose >>
return @@ Array vs))
<|>
(token TTrue >> return (Bool true))
<|>
(token TFalse >> return (Bool false))
<|>
(token TNull >> return Null)
<|>
(json_string >>= fun s -> return @@ String s)
<|>
(json_number >>= fun x -> return @@ Number x)
end
let parse_ch ch = run_ch (json()) ch
let parse s = run_string (json()) s
let parse_function f = run_function (json()) f

38
tjjson.mli Normal file
View File

@ -0,0 +1,38 @@
type t =
String of string
| Number of string (* We keep its string repr. *)
| Object of obj
| Array of t list
| Bool of bool
| Null
and obj = (string * t) list
exception JSON_NotObject of t
exception JSON_InvalidField of string
exception JSON_CastErr of string
exception JSON_UnknownErr of string
(** {6 Printer and formatter} *)
val show : t -> string
val format : Format.formatter -> t -> unit
(** {6 Object field access} *)
val getf : string -> t -> t
(** Get field from an object. Failure raises an exception. *)
val getf_opt : string -> t -> t option
(** Get field from an object. Failure is reported as [None] *)
(** {6 Coercions. They may fail and raise JSON_CastErr.} *)
val as_bool : t -> bool
val as_object : t -> obj
val as_float : t -> float
val as_string : t -> string
val as_list : t -> t list
val as_int : t -> int
(** {6 Parsers} *)
val parse_ch : in_channel -> t
val parse : string -> t
val parse_function : (bytes -> int -> int) -> t

102
tjllist.ml Normal file
View File

@ -0,0 +1,102 @@
open Tjutil
type 'a llist = Nil | Cons of 'a * 'a llist Lazy.t
type 'a t = 'a llist Lazy.t
let to_list ll =
let rec to_list st = function
| Nil -> List.rev st
| Cons (x, xs) ->
to_list (x :: st) (Lazy.force xs)
in
to_list [] ll
let hd = function | Nil -> failwith "hd" | Cons (x, _xs) -> x
let tl = function | Nil -> failwith "tl" | Cons (_x, xs) -> !$xs
let rec take n l =
match n, l with
| 0, _ -> []
| _n, Nil -> []
| n, Cons (x, xs) -> x :: take (n-1) !$xs
let rec map f = function
| Nil -> Nil
| Cons (x, xs) -> Cons (f x, lazy (map f !$xs))
let rec repeat x = Cons (x, lazy (repeat x));;
let rec app xs ys =
match xs with
| Nil -> ys
| Cons (x, xs) -> Cons (x, lazy (app (!$ xs) ys))
let rec combine xs ys =
match (xs,ys) with
| Cons(x,xs),Cons(y,ys) -> Cons((x,y), lazy (combine !$xs !$ys))
| _ -> Nil
let rec filter f xs =
match xs with
| Nil -> Nil
| Cons(x, xs) when f x -> Cons (x, lazy (filter f !$xs))
| Cons(_x, xs) -> filter f !$xs
let rec concat xss =
match xss with
| Nil -> Nil
| Cons(Nil, xss') -> concat !$xss'
| Cons(Cons(x,lazy xs'), xss') -> Cons(x, lazy (concat (Cons(xs', xss'))))
let rec unfoldr f b =
match f b with
| Some (a, new_b) -> Cons(a, lazy (unfoldr f new_b))
| None -> Nil
let continually make =
let f () = try Some(make (), ()) with _ -> None in
unfoldr f ()
(* int llist *)
let rec from n = Cons (n, lazy (from (n+1)))
(* llist <--> stream *)
let rec of_stream str =
try
Cons (Stream.next str, lazy (of_stream str))
with
| Stream.Failure -> Nil
let sllist ?(items:int=20) delim show l =
let fin = take items l in
if List.length fin <= items then
slist delim show fin
else
slist delim show fin ^ "..."
(* string -> llist *)
let of_string =
of_stream $ Stream.of_string
let of_function f =
let buf = Bytes.create 1024 in
let rec fill () =
let read = f buf 1024 in
if read = 0 then Nil
else Cons (Bytes.unsafe_get buf 0, lazy (use read 1))
and use read pos =
if read <= pos then fill ()
else Cons (Bytes.unsafe_get buf pos, lazy (use read (pos+1)))
in
fill ()

29
tjllist.mli Normal file
View File

@ -0,0 +1,29 @@
type 'a llist = Nil | Cons of 'a * 'a llist Lazy.t
(** It is a lazy list, but the first element is already forced *)
type 'a t = 'a llist Lazy.t
val hd : 'a llist -> 'a
val tl : 'a llist -> 'a llist
val take : int -> 'a llist -> 'a list
val map : ('a -> 'b) -> 'a llist -> 'b llist
val repeat : 'a -> 'a llist
val app : 'a llist -> 'a llist -> 'a llist
val combine : 'a llist -> 'b llist -> ('a * 'b) llist
val filter : ('a -> bool) -> 'a llist -> 'a llist
val concat : 'a llist llist -> 'a llist
val unfoldr : ('b -> ('a * 'b) option) -> 'b -> 'a llist
val continually : (unit -> 'a) -> 'a llist
val from : int -> int llist
val of_stream : 'a Stream.t -> 'a llist
val sllist : ?items:int -> string -> ('a -> string) -> 'a llist -> string
val of_string : string -> char llist
val of_function : (bytes -> int -> int) -> char llist
(** [of_function f]: [f buf len] is a filler, a function to fill [buf]
with at most [len] chars. If it reaches the end of the input it returns [0].
*)
val to_list : 'a llist -> 'a list

165
tjparserMonad.ml Normal file
View File

@ -0,0 +1,165 @@
open Tjutil
open Tjllist
type ts = char llist
type state = int * int * (char list * char * char list)
type error = state * string
type 'a t = state -> ts -> ('a * state * ts, error) either
exception ParseError of string
let lt_pos (l1,p1,_) (l2,p2,_) =
if l1 < l2 then true
else if l1 = l2 then p1 < p2
else false
let eplus (st1,msg1) (st2,msg2) =
if lt_pos st1 st2 then (st2,msg2) else (st1,msg1)
let showerr ((line,pos,(pre,c,post)),msg) =
!%"line %d, %d: %s: pre=%S char=%C post=%S" line pos msg
(string_of_chars pre)
c
(string_of_chars post)
let return : 'a -> 'a t =
fun x ->
fun state code -> Inl (x, state, code)
let error msg = fun state _code -> Inr (state, msg)
let (>>=) : 'a t -> ('a -> 'b t) -> 'b t =
fun p f ->
fun state code ->
match p state code with
| Inl (x, state', ts) -> f x state' ts
| Inr err -> Inr err
let (>>) : 'a t -> 'b t -> 'b t =
fun p1 p2 ->
p1 >>= fun _ -> p2
let (<.<) : 'a t -> 'b t -> 'a t =
fun p1 p2 ->
p1 >>= fun x -> p2 >> return x
let ( ^? ) : 'a t -> string -> 'a t =
fun p msg ->
fun state code ->
match p state code with
| Inl l -> Inl l
| Inr (st,msg0) -> Inr (st,msg ^": "^msg0)
(* (<|>) : 'a m -> 'a m -> 'a m *)
let (<|>) : 'a t -> 'a t -> 'a t =
fun p1 p2 ->
fun state code ->
match p1 state code with
| Inl (x1, state', ts) -> Inl (x1, state', ts)
| Inr err1 ->
begin match p2 state code with
| Inl (x2, state', ts) -> Inl (x2,state',ts)
| Inr err2 -> Inr (eplus err1 err2)
end
(*
let (<|?>) p1 p2 = fun state code ->
match p1 state code with
| Inl (x1, state', ts) -> Inl (x1, state', ts)
| Inr err1 ->
print_endline err1;
begin match p2 state code with
| Inl (x2, state', ts) -> Inl (x2,state',ts)
| Inr err2 -> Inr (eplus err1 err2)
end
*)
let rec many : 'a t -> ('a list) t =
fun p ->
(p >>= fun x -> many p >>= fun xs -> return (x::xs))
<|> (return [])
let many1 p =
p >>= fun x -> many p >>= fun xs -> return (x::xs)
let sep separator p =
(p >>= fun x -> many (separator >> p) >>= fun xs -> return (x::xs))
<|> (return [])
let opt : 'a t -> ('a option) t =
fun p ->
(p >>= fun x -> return (Some x)) <|> (return None)
let _char1_with_debug state = function
| Nil -> Inr (state,"(Nil)")
| Cons (x,xs) ->
let next (pre,x0, _) =
let pre' = if List.length pre < 100 then pre @ [x0]
else List.tl pre @ [x0]
in
(pre' , x, take 100 !$xs)
in
match x, state with
| '\n', (line,_pos,cs) ->
Inl (x,(line+1,-1, next cs), !$xs)
| _, (line,pos,cs) ->
Inl (x,(line, pos+1, next cs),!$xs)
let char1_without_debug state = function
| Nil -> Inr (state,"(Nil)")
| Cons (x,xs) -> Inl (x, state, !$xs)
let char1 = char1_without_debug
let char_when f = char1 >>= fun c ->
if f c then return c
else error (!%"(char:'%c')" c)
let char c = char_when ((=) c)
let keyword w =
let rec iter i =
if i < String.length w then
char w.[i] >> iter (i+1)
else return w
in
iter 0
let make_ident f =
many1 (char_when f) >>= fun cs ->
return (string_of_chars cs)
let int =
opt (char '-') >>= fun minus ->
make_ident (function '0'..'9' -> true | _ -> false) >>= fun s ->
return
begin match minus with
| None -> int_of_string s
| Some _ -> - int_of_string s
end
let run p state ts =
match p state ts with
| Inl (x,_state',_xs) -> x
| Inr err ->
raise (ParseError (showerr err))
let init_state = (1, 0, ([],'_',[]))
let run_ch p ch =
run p init_state (of_stream (Stream.of_channel ch))
let run_stdin p = run_ch p stdin
let run_file p filename =
open_in_with filename (fun ch -> run_ch p ch)
let run_string p s =
run p init_state (of_string s)
let run_function p f =
run p init_state (of_function f)

38
tjparserMonad.mli Normal file
View File

@ -0,0 +1,38 @@
type ts
type state
type error
type 'a t
val error : string -> 'a t
val showerr : error -> string
val return : 'a -> 'a t
val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t
val ( >> ) : 'a t -> 'b t -> 'b t
val ( <.< ) : 'a t -> 'b t -> 'a t
val ( ^? ) : 'a t -> string -> 'a t
val ( <|> ) : 'a t -> 'a t -> 'a t
val many : 'a t -> 'a list t
val many1 : 'a t -> 'a list t
val sep : 'a t -> 'b t -> 'b list t
val opt : 'a t -> 'a option t
val char1 : char t
exception ParseError of string
val char_when : (char -> bool) -> char t
val char : char -> char t
val keyword : string -> string t
val make_ident : (char -> bool) -> string t
val int : int t
val init_state : state
(* They may raise ParseError exception *)
val run_ch : 'a t -> in_channel -> 'a
val run_stdin : 'a t -> 'a
val run_file : 'a t -> string -> 'a
val run_string : 'a t -> string -> 'a
val run_function : 'a t -> (bytes -> int -> int) -> 'a

23
tjutf16.ml Normal file
View File

@ -0,0 +1,23 @@
open Tjutil
let rec (>>) x n =
if n > 0 then (x >> (n-1)) / 2
else x
(* CR jfuruse: I bet it is equivalent with (lsr) *)
let (>>) x n =
let res = x >> n in
assert (x lsr n = res);
res
let utf16c_to_utf8c(x) =
let n = int_of_string("0x" ^ x) in
if n < 0x80 then
to_hex n
else begin
(if n < 0x800 then
to_hex((n >> 6) land 0x1f lor 0xc0)
else
to_hex ((n >> 12) lor 0xe0) ^ to_hex((n >> 6) land 0x3f lor 0x80))
^ to_hex (n land 0x3f lor 0x80)
end

1
tjutf16.mli Normal file
View File

@ -0,0 +1 @@
val utf16c_to_utf8c : string -> string

45
tjutil.ml Normal file
View File

@ -0,0 +1,45 @@
external ( @@ ) : ('a -> 'b) -> 'a -> 'b = "%apply"
let ($) g f = fun x -> g (f x)
let id x = x
let tee f x = ignore @@ f x; x
let (!%) = Printf.sprintf
let (!$) x = Lazy.force x
let slist delim show l =
String.concat delim @@ List.map show l
let string_of_chars = slist "" (String.make 1)
let string1 c = String.make 1 c
type ('l, 'r) either = Inl of 'l | Inr of 'r
let maybe f x =
try Inl (f x) with e -> Inr e
let value = function
Inl v -> v | Inr e -> raise e
let open_with (opn, close) filepath f =
let ch = opn filepath in
value @@ tee (fun _ -> close ch) (maybe f ch)
let open_in_with filepath f = open_with (open_in, close_in) filepath f
let to_hex n =
let to_char = function
| x when 0<=x && x<=9 -> (string_of_int x).[0]
| x when 10<=x && x<=15 -> char_of_int (int_of_char 'A'+(x-10))
| _ -> failwith"tohex MNH"
in
let rec iter store n =
if n < 16 then
to_char n :: store
else
let r,q = n / 16, n mod 16 in
iter (to_char q :: store) r
in
if n < 0 then raise (Invalid_argument (!%"to_hex: (%d)" n))
else string_of_chars @@ iter [] n

11
tjutil.mli Normal file
View File

@ -0,0 +1,11 @@
external ( @@ ) : ('a -> 'b) -> 'a -> 'b = "%apply"
val ( $ ) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
val id : 'a -> 'a
val ( !% ) : ('a, unit, string) format -> 'a
val ( !$ ) : 'a Lazy.t -> 'a
val slist : string -> ('a -> string) -> 'a list -> string
val string_of_chars : char list -> string
val string1 : char -> string
type ('a, 'b) either = Inl of 'a | Inr of 'b
val open_in_with : string -> (in_channel -> 'a) -> 'a
val to_hex : int -> string

0
xmlm.mli Executable file → Normal file
View File