This commit is contained in:
John Whitington 2023-01-13 13:51:43 +08:00
parent 1273c47864
commit 67a66a8226
3 changed files with 25 additions and 17 deletions

View File

@ -15,7 +15,9 @@ o Allow -utf8 with -split-bookmarks -o @B.pdf to produce UTF8 filenames
o -merge-add-bookmarks now works with unicode filenames o -merge-add-bookmarks now works with unicode filenames
o Better transformation of some annotation types o Better transformation of some annotation types
o -list-annotations[-json] now respects page range o -list-annotations[-json] now respects page range
o Merge now merges structure trees (tagged PDF)
o Merge now rewrites clashing name tree entries o Merge now rewrites clashing name tree entries
o UTF8 option for JSON output
Fixes: Fixes:

View File

@ -21,15 +21,6 @@ let list_page_annotations encoding pdf num page =
iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots) iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots)
| _ -> () | _ -> ()
(* In the future, we will allow round-tripping of JSON annotations, but this
will be complicated. For now, we just turn some indirect things into direct
things, so that the output contains all the pertinent information, not for
round-tripping, but for mere extraction. *)
let make_direct pdf annot =
match Pdf.lookup_direct pdf "/A" annot with
| None -> annot
| Some d -> Pdf.add_dict_entry annot "/A" d
let rewrite_destination calculate_pagenumber d = let rewrite_destination calculate_pagenumber d =
match d with match d with
| Pdf.Array (Pdf.Indirect i::r) -> | Pdf.Array (Pdf.Indirect i::r) ->
@ -55,17 +46,20 @@ let rewrite_destinations pdf annot =
end end
| None -> annot | None -> annot
let extra = ref []
let annotations_json_page pdf page pagenum = let annotations_json_page pdf page pagenum =
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
| Some (Pdf.Array annots) -> | Some (Pdf.Array annots) ->
map map
(fun annot -> (fun annot ->
let annot = make_direct pdf annot in extra := Pdf.objects_referenced [] [] pdf annot @ !extra;
`List [`Int pagenum; Cpdfjson.json_of_object ~clean_strings:true pdf (fun _ -> ()) false false annot]) `List [`Int pagenum; Cpdfjson.json_of_object ~clean_strings:true pdf (fun _ -> ()) false false annot])
(map (Pdf.direct pdf) annots) (map (Pdf.direct pdf) annots)
| _ -> [] | _ -> []
let list_annotations_json range pdf = let list_annotations_json range pdf =
extra := [];
let module J = Cpdfyojson.Safe in let module J = Cpdfyojson.Safe in
let pages = Pdfpage.pages_of_pagetree pdf in let pages = Pdfpage.pages_of_pagetree pdf in
let pagenums = indx pages in let pagenums = indx pages in
@ -73,6 +67,8 @@ let list_annotations_json range pdf =
let pairs = option_map (fun (p, n) -> if mem n range then Some (p, n) else None) pairs in let pairs = option_map (fun (p, n) -> if mem n range then Some (p, n) else None) pairs in
let pages, pagenums = split pairs in let pages, pagenums = split pairs in
let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in
let extra = setify !extra in
Printf.printf "%i extra objects needed\n" (length extra);
J.pretty_to_channel stdout json J.pretty_to_channel stdout json
let list_annotations ~json range encoding pdf = let list_annotations ~json range encoding pdf =
@ -103,6 +99,9 @@ let get_annotations_json pdf =
let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in
Pdfio.bytes_of_string (J.to_string json) Pdfio.bytes_of_string (J.to_string json)
(** Set annotations from JSON. Existing annotations will be removed. *)
let set_annotations_json pdf json = ()
(* Equalise the page lengths of two PDFs by chopping or extending the first one. (* Equalise the page lengths of two PDFs by chopping or extending the first one.
*) *)
let equalise_lengths a b = let equalise_lengths a b =

View File

@ -1,16 +1,23 @@
(** Annotations *) (** Annotations *)
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *) (** Old fashioned functions first, still in use for backward-compatibilty *)
val list_annotations : json:bool -> int list -> Cpdfmetadata.encoding -> Pdf.t -> unit
(** Return the annotations as a (pagenumber, content) list *) (** Return the annotations as a (pagenumber, content) list. *)
val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list
(** Get annotations as JSON *)
val get_annotations_json : Pdf.t -> Pdfio.bytes
(** Copy the annotations on a given set of pages from a to b. b is returned. *) (** Copy the annotations on a given set of pages from a to b. b is returned. *)
val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *)
val list_annotations : json:bool -> int list -> Cpdfmetadata.encoding -> Pdf.t -> unit
(** Remove the annotations on given pages. *) (** Remove the annotations on given pages. *)
val remove_annotations : int list -> Pdf.t -> Pdf.t val remove_annotations : int list -> Pdf.t -> Pdf.t
(** Modern functions *)
(** Get annotations as JSON *)
val get_annotations_json : Pdf.t -> Pdfio.bytes
(** Set annotations from JSON. Existing annotations will be removed. *)
val set_annotations_json : Pdf.t -> Pdfio.bytes -> unit