From 67a66a8226c1ba60a98c3008d18d392990170448 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 13 Jan 2023 13:51:43 +0800 Subject: [PATCH] more --- Changes | 2 ++ cpdfannot.ml | 21 ++++++++++----------- cpdfannot.mli | 19 +++++++++++++------ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/Changes b/Changes index 1176fea..aea0403 100644 --- a/Changes +++ b/Changes @@ -15,7 +15,9 @@ o Allow -utf8 with -split-bookmarks -o @B.pdf to produce UTF8 filenames o -merge-add-bookmarks now works with unicode filenames o Better transformation of some annotation types o -list-annotations[-json] now respects page range +o Merge now merges structure trees (tagged PDF) o Merge now rewrites clashing name tree entries +o UTF8 option for JSON output Fixes: diff --git a/cpdfannot.ml b/cpdfannot.ml index 75b9458..d33691c 100644 --- a/cpdfannot.ml +++ b/cpdfannot.ml @@ -21,15 +21,6 @@ let list_page_annotations encoding pdf num page = iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots) | _ -> () -(* In the future, we will allow round-tripping of JSON annotations, but this - will be complicated. For now, we just turn some indirect things into direct - things, so that the output contains all the pertinent information, not for - round-tripping, but for mere extraction. *) -let make_direct pdf annot = - match Pdf.lookup_direct pdf "/A" annot with - | None -> annot - | Some d -> Pdf.add_dict_entry annot "/A" d - let rewrite_destination calculate_pagenumber d = match d with | Pdf.Array (Pdf.Indirect i::r) -> @@ -55,17 +46,20 @@ let rewrite_destinations pdf annot = end | None -> annot +let extra = ref [] + let annotations_json_page pdf page pagenum = match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with | Some (Pdf.Array annots) -> map (fun annot -> - let annot = make_direct pdf annot in - `List [`Int pagenum; Cpdfjson.json_of_object ~clean_strings:true pdf (fun _ -> ()) false false annot]) + extra := Pdf.objects_referenced [] [] pdf annot @ !extra; + `List [`Int pagenum; Cpdfjson.json_of_object ~clean_strings:true pdf (fun _ -> ()) false false annot]) (map (Pdf.direct pdf) annots) | _ -> [] let list_annotations_json range pdf = + extra := []; let module J = Cpdfyojson.Safe in let pages = Pdfpage.pages_of_pagetree pdf in let pagenums = indx pages in @@ -73,6 +67,8 @@ let list_annotations_json range pdf = let pairs = option_map (fun (p, n) -> if mem n range then Some (p, n) else None) pairs in let pages, pagenums = split pairs in let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in + let extra = setify !extra in + Printf.printf "%i extra objects needed\n" (length extra); J.pretty_to_channel stdout json let list_annotations ~json range encoding pdf = @@ -103,6 +99,9 @@ let get_annotations_json pdf = let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in Pdfio.bytes_of_string (J.to_string json) +(** Set annotations from JSON. Existing annotations will be removed. *) +let set_annotations_json pdf json = () + (* Equalise the page lengths of two PDFs by chopping or extending the first one. *) let equalise_lengths a b = diff --git a/cpdfannot.mli b/cpdfannot.mli index 79ce1e4..49d0437 100644 --- a/cpdfannot.mli +++ b/cpdfannot.mli @@ -1,16 +1,23 @@ (** Annotations *) -(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *) -val list_annotations : json:bool -> int list -> Cpdfmetadata.encoding -> Pdf.t -> unit +(** Old fashioned functions first, still in use for backward-compatibilty *) -(** Return the annotations as a (pagenumber, content) list *) +(** Return the annotations as a (pagenumber, content) list. *) val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list -(** Get annotations as JSON *) -val get_annotations_json : Pdf.t -> Pdfio.bytes - (** Copy the annotations on a given set of pages from a to b. b is returned. *) val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t +(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *) +val list_annotations : json:bool -> int list -> Cpdfmetadata.encoding -> Pdf.t -> unit + (** Remove the annotations on given pages. *) val remove_annotations : int list -> Pdf.t -> Pdf.t + +(** Modern functions *) + +(** Get annotations as JSON *) +val get_annotations_json : Pdf.t -> Pdfio.bytes + +(** Set annotations from JSON. Existing annotations will be removed. *) +val set_annotations_json : Pdf.t -> Pdfio.bytes -> unit