From 122285db59335aadb6b8d918c1e71f0a08621987 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Tue, 31 Oct 2023 16:51:57 +0000 Subject: [PATCH] Implement listing of page info in JSON --- cpdfcommand.ml | 2 +- cpdfpage.ml | 50 ++++++++++++++++++++++++++++++++++++-------------- cpdfpage.mli | 2 +- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 1be56d6..ee8eaf2 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3489,7 +3489,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf args.op true in let range = parse_pagespec_allow_empty pdf pagespec in - Cpdfpage.output_page_info pdf range + Cpdfpage.output_page_info ~json:args.format_json pdf range | _ -> error "list-bookmarks: bad command line" end | Some Metadata -> diff --git a/cpdfpage.ml b/cpdfpage.ml index e30a026..672b8c1 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -154,7 +154,7 @@ let change_pattern_matrices_page pdf tr page = {page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources used} (* Output information for each page *) -let output_page_info pdf range = +let output_page_info ?(json=false) pdf range = let pages = Pdfpage.pages_of_pagetree pdf and labels = Pdfpagelabels.read pdf in let getbox page box = @@ -173,19 +173,41 @@ let output_page_info pdf range = and rotation page = Pdfpage.int_of_rotation page.Pdfpage.rotate in - iter - (fun pnum -> - let page = select pnum pages in - Printf.printf "Page %i:\n" pnum; - Printf.printf "Label: %s\n" - (try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> ""); - Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox"); - Printf.printf "CropBox: %s\n" (getbox page "/CropBox"); - Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox"); - Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox"); - Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox"); - Printf.printf "Rotation: %i\n" (rotation page)) - range + let json_entry_of_pnum pnum = + let getbox_json page box = + match getbox page box with + | "" -> `Null + | s -> + let a, b, c, d = Cpdfcoord.parse_rectangle (Pdf.empty ()) s in + `List [`Float a; `Float b; `Float c; `Float d] + in + let page = select pnum pages in + `Assoc + [("Page", `Int pnum); + ("Label", (`String (try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> ""))); + ("MediaBox", getbox_json page "/MediaBox"); + ("CropBox", getbox_json page "/CropBox"); + ("BleedBox", getbox_json page "/BleedBox"); + ("TrimBox", getbox_json page "/TrimBox"); + ("ArtBox", getbox_json page "/ArtBox"); + ("Rotation", `Int (rotation page))] + in + if json then + flprint (Cpdfyojson.Safe.pretty_to_string (`List (map json_entry_of_pnum range))) + else + iter + (fun pnum -> + let page = select pnum pages in + Printf.printf "Page %i:\n" pnum; + Printf.printf "Label: %s\n" + (try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> ""); + Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox"); + Printf.printf "CropBox: %s\n" (getbox page "/CropBox"); + Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox"); + Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox"); + Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox"); + Printf.printf "Rotation: %i\n" (rotation page)) + range let process_pages f pdf range = let pages = Pdfpage.pages_of_pagetree pdf in diff --git a/cpdfpage.mli b/cpdfpage.mli index e890f0f..b445fae 100644 --- a/cpdfpage.mli +++ b/cpdfpage.mli @@ -1,7 +1,7 @@ (** Working with pages *) (** Print page info (Mediabox etc) to standard output. *) -val output_page_info : Pdf.t -> int list -> unit +val output_page_info : ?json:bool -> Pdf.t -> int list -> unit (** Given a function from page number and page to page, a document, and a list of page numbers to apply it to, apply the function to all those pages. *)