2021-12-21 14:44:46 +01:00
|
|
|
open Pdfutil
|
|
|
|
|
2021-12-31 17:47:28 +01:00
|
|
|
(* List annotations *)
|
2021-12-21 14:44:46 +01:00
|
|
|
let get_annotation_string encoding pdf annot =
|
|
|
|
match Pdf.lookup_direct pdf "/Contents" annot with
|
|
|
|
| Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s
|
|
|
|
| _ -> ""
|
|
|
|
|
|
|
|
let print_annotation encoding pdf num s =
|
|
|
|
let s = get_annotation_string encoding pdf s in
|
|
|
|
match s with
|
|
|
|
| "" -> ()
|
|
|
|
| s ->
|
|
|
|
flprint (Printf.sprintf "Page %d: " num);
|
|
|
|
flprint s;
|
|
|
|
flprint "\n"
|
|
|
|
|
|
|
|
let list_page_annotations encoding pdf num page =
|
|
|
|
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
|
|
|
| Some (Pdf.Array annots) ->
|
|
|
|
iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots)
|
|
|
|
| _ -> ()
|
|
|
|
|
2021-12-31 17:47:28 +01:00
|
|
|
(* In the future, we will allow round-tripping of JSON annotations, but this
|
|
|
|
will be complicated. For now, we just turn some indirect things into direct
|
|
|
|
things, so that the output contains all the pertinent information, not for
|
|
|
|
round-tripping, but for mere extraction. *)
|
|
|
|
let make_direct pdf annot =
|
|
|
|
match Pdf.lookup_direct pdf "/A" annot with
|
|
|
|
| None -> annot
|
|
|
|
| Some d -> Pdf.add_dict_entry annot "/A" d
|
|
|
|
|
2021-12-31 20:28:42 +01:00
|
|
|
let rewrite_destination calculate_pagenumber d =
|
|
|
|
match d with
|
|
|
|
| Pdf.Array (Pdf.Indirect i::r) ->
|
|
|
|
Pdf.Array (Pdf.Indirect (calculate_pagenumber (Pdfdest.Fit (Pdfdest.PageObject i)))::r)
|
|
|
|
| x -> x
|
|
|
|
|
|
|
|
let rewrite_destinations pdf annot =
|
|
|
|
let refnums = Pdf.page_reference_numbers pdf in
|
|
|
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
|
|
|
let calculate_pagenumber = Pdfpage.pagenumber_of_target ~fastrefnums pdf in
|
|
|
|
(* Deal with /Dest in annotation *)
|
|
|
|
match Pdf.lookup_direct pdf "/Dest" annot with
|
|
|
|
| Some d -> Pdf.add_dict_entry annot "/Dest" (rewrite_destination calculate_pagenumber d)
|
|
|
|
| None ->
|
|
|
|
(* Deal with /A --> /D dest when /A --> /S = /GoTo *)
|
|
|
|
match Pdf.lookup_direct pdf "/A" annot with
|
|
|
|
| Some action ->
|
|
|
|
begin match Pdf.lookup_direct pdf "/D" action with
|
|
|
|
| Some d ->
|
|
|
|
Pdf.add_dict_entry
|
|
|
|
annot "/A" (Pdf.add_dict_entry action "/D" (rewrite_destination calculate_pagenumber d))
|
|
|
|
| None -> annot
|
|
|
|
end
|
|
|
|
| None -> annot
|
|
|
|
|
2021-12-21 14:44:46 +01:00
|
|
|
let annotations_json_page pdf page pagenum =
|
|
|
|
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
|
|
|
| Some (Pdf.Array annots) ->
|
|
|
|
map
|
|
|
|
(fun annot ->
|
2021-12-31 17:47:28 +01:00
|
|
|
let annot = make_direct pdf annot in
|
|
|
|
`List [`Int pagenum; Cpdfjson.json_of_object ~clean_strings:true pdf (fun _ -> ()) false false annot])
|
2021-12-21 14:44:46 +01:00
|
|
|
(map (Pdf.direct pdf) annots)
|
|
|
|
| _ -> []
|
|
|
|
|
|
|
|
let list_annotations_json pdf =
|
|
|
|
let module J = Cpdfyojson.Safe in
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
let pagenums = indx pages in
|
|
|
|
let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in
|
|
|
|
J.pretty_to_channel stdout json
|
|
|
|
|
|
|
|
let list_annotations ~json encoding pdf =
|
|
|
|
let range = Cpdfpagespec.parse_pagespec pdf "all" in
|
|
|
|
if json
|
|
|
|
then list_annotations_json pdf
|
|
|
|
else Cpdfpage.iter_pages (list_page_annotations encoding pdf) pdf range
|
|
|
|
|
2021-12-31 17:47:28 +01:00
|
|
|
(* Return annotations *)
|
2021-12-21 14:44:46 +01:00
|
|
|
let get_annotations encoding pdf =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
flatten
|
|
|
|
(map2
|
|
|
|
(fun page pagenumber ->
|
|
|
|
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
|
|
|
| Some (Pdf.Array annots) ->
|
|
|
|
let strings =
|
|
|
|
map (get_annotation_string encoding pdf) (map (Pdf.direct pdf) annots)
|
|
|
|
in
|
|
|
|
combine (many pagenumber (length strings)) strings
|
|
|
|
| _ -> [])
|
|
|
|
pages
|
|
|
|
(ilist 1 (length pages)))
|
|
|
|
|
|
|
|
(* Equalise the page lengths of two PDFs by chopping or extending the first one.
|
|
|
|
*)
|
|
|
|
let equalise_lengths a b =
|
|
|
|
let a' =
|
|
|
|
if Pdfpage.endpage a < Pdfpage.endpage b then
|
|
|
|
Pdfpage.change_pages false a
|
|
|
|
(Pdfpage.pages_of_pagetree a @
|
|
|
|
many (Pdfpage.blankpage Pdfpaper.a4) (Pdfpage.endpage b - Pdfpage.endpage a))
|
|
|
|
else if Pdfpage.endpage a > Pdfpage.endpage b then
|
|
|
|
Pdfpage.change_pages false a
|
|
|
|
(take (Pdfpage.pages_of_pagetree a) (Pdfpage.endpage b))
|
|
|
|
else a
|
|
|
|
in
|
|
|
|
a', b
|
|
|
|
|
|
|
|
(* Copy annotations *)
|
|
|
|
|
|
|
|
(* FIXME: Why does this chop the files to the same length? Should be able to
|
|
|
|
apply annotations from a longer file to a shorter? *)
|
|
|
|
|
|
|
|
(* Rewrite any annotation destinations to point to pages in the
|
|
|
|
destination file. This prevents pages being copied, and ensures the links are
|
|
|
|
correct Any Indirect link inside a /Dest is rewritten if in the table. If not
|
|
|
|
inside a /Dest, nothing is rewritten. *)
|
|
|
|
let rec renumber_in_dest table indest = function
|
|
|
|
Pdf.Indirect i ->
|
|
|
|
begin
|
|
|
|
try Pdf.Indirect (Hashtbl.find table i) with _ -> Pdf.Indirect i
|
|
|
|
end
|
|
|
|
| Pdf.Array a ->
|
|
|
|
Pdf.recurse_array (renumber_in_dest table indest) a
|
|
|
|
| Pdf.Dictionary d ->
|
|
|
|
Pdf.Dictionary
|
|
|
|
(map
|
|
|
|
(function
|
|
|
|
("/Dest", v) -> ("/Dest", renumber_in_dest table true v)
|
|
|
|
| (k, v) -> (k, renumber_in_dest table indest v))
|
|
|
|
d)
|
|
|
|
| x -> x
|
|
|
|
|
|
|
|
let renumber_in_object pdf objnum table =
|
|
|
|
Pdf.addobj_given_num
|
|
|
|
pdf (objnum, (renumber_in_dest table false (Pdf.lookup_obj pdf objnum)))
|
|
|
|
|
|
|
|
let copy_annotations_page topdf frompdf frompage topage =
|
|
|
|
match Pdf.lookup_direct frompdf "/Annots" frompage.Pdfpage.rest with
|
|
|
|
Some (Pdf.Array frompage_annots as annots) ->
|
|
|
|
let table =
|
|
|
|
hashtable_of_dictionary
|
|
|
|
(combine
|
|
|
|
(Pdf.page_reference_numbers frompdf)
|
|
|
|
(Pdf.page_reference_numbers topdf))
|
|
|
|
in
|
|
|
|
iter
|
|
|
|
(function
|
|
|
|
(* FIXME: We assume they are indirects. Must also do direct, though rare.*)
|
|
|
|
Pdf.Indirect x ->
|
|
|
|
(*Printf.printf "Copying annotation %s which is\n%s\n"
|
|
|
|
(Pdfwrite.string_of_pdf (Pdf.Indirect x))
|
|
|
|
(Pdfwrite.string_of_pdf (Pdf.direct frompdf (Pdf.Indirect
|
|
|
|
x)));*)
|
|
|
|
renumber_in_object frompdf x table
|
|
|
|
| _ -> ())
|
|
|
|
frompage_annots;
|
|
|
|
let objects_to_copy = Pdf.objects_referenced [] [] frompdf annots in
|
|
|
|
iter
|
|
|
|
(fun n ->
|
|
|
|
ignore (Pdf.addobj_given_num topdf (n, Pdf.lookup_obj frompdf n)))
|
|
|
|
objects_to_copy;
|
|
|
|
let topage_annots =
|
|
|
|
match Pdf.lookup_direct frompdf "/Annots" topage.Pdfpage.rest with
|
|
|
|
| Some (Pdf.Array annots) -> annots
|
|
|
|
| _ -> []
|
|
|
|
in
|
|
|
|
let merged_dict = Pdf.Array (frompage_annots @ topage_annots) in
|
|
|
|
let topage' =
|
|
|
|
{topage with Pdfpage.rest =
|
|
|
|
Pdf.add_dict_entry topage.Pdfpage.rest "/Annots" merged_dict}
|
|
|
|
in
|
|
|
|
topdf, topage'
|
|
|
|
| Some x -> topdf, topage
|
|
|
|
| None -> topdf, topage
|
|
|
|
|
|
|
|
let copy_annotations range frompdf topdf =
|
|
|
|
let frompdf, topdf = equalise_lengths frompdf topdf in
|
|
|
|
match Pdf.renumber_pdfs [frompdf; topdf] with
|
|
|
|
| [frompdf; topdf] ->
|
|
|
|
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
|
|
|
|
let topdf_pages = Pdfpage.pages_of_pagetree topdf in
|
|
|
|
let pdf = ref topdf
|
|
|
|
and pages = ref []
|
|
|
|
and pnum = ref 1
|
|
|
|
and frompdf_pages = ref frompdf_pages
|
|
|
|
and topdf_pages = ref topdf_pages in
|
|
|
|
(* Go through, updating pdf and collecting new pages. *)
|
|
|
|
while not (isnull !frompdf_pages) do
|
|
|
|
let frompdf_page = hd !frompdf_pages
|
|
|
|
and topdf_page = hd !topdf_pages in
|
|
|
|
let pdf', page =
|
|
|
|
if mem !pnum range
|
|
|
|
then copy_annotations_page !pdf frompdf frompdf_page topdf_page
|
|
|
|
else !pdf, topdf_page
|
|
|
|
in
|
|
|
|
pdf := pdf';
|
|
|
|
pages =| page;
|
|
|
|
incr pnum;
|
|
|
|
frompdf_pages := tl !frompdf_pages;
|
|
|
|
topdf_pages := tl !topdf_pages
|
|
|
|
done;
|
|
|
|
Pdfpage.change_pages true !pdf (rev !pages)
|
|
|
|
| _ -> assert false
|
|
|
|
|
2021-12-31 17:47:28 +01:00
|
|
|
(* Remove annotations *)
|
2021-12-21 14:44:46 +01:00
|
|
|
let remove_annotations range pdf =
|
|
|
|
let remove_annotations_page pagenum page =
|
|
|
|
if mem pagenum range then
|
|
|
|
let rest' =
|
|
|
|
Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"
|
|
|
|
in
|
|
|
|
{page with Pdfpage.rest = rest'}
|
|
|
|
else
|
|
|
|
page
|
|
|
|
in
|
2021-12-27 16:20:07 +01:00
|
|
|
Cpdfpage.process_pages (Cpdfutil.ppstub remove_annotations_page) pdf range
|