From 909de09aee7b3b2c20911d7e34b3dc8858b02653 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 3 Jul 2019 14:40:32 +0100 Subject: [PATCH] Add bookmarks when merging --- cpdf.ml | 19 ++++++++++++++++ cpdf.mli | 2 ++ cpdfcommand.ml | 61 ++++++++++++++++++++++++++++++++++++++++++++++---- cpdfmanual.tex | 1 + 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/cpdf.ml b/cpdf.ml index b48bd6d..b47dcd6 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -3386,6 +3386,25 @@ let output_xmp_info encoding pdf = with _ -> () +(* Get XMP info equivalent of an old metadata field e.g Title. For now just title, used by Cpdfcommand.add_bookmark_title *) +let check = function + "/Title" -> [(adobe, "Title"); (dc, "title")] +| _ -> failwith "Cpdf.check_name not /Title" + +let get_xmp_info pdf name = + let tocheck = check name in + match get_metadata pdf with + None -> "" + | Some metadata -> + try + let _, tree = xmltree_of_bytes metadata in + let results = List.map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in + match lose (eq "") results with + x::_ -> x + | [] -> "" + with + _ -> "" + (* Set XMP info *) let rec set_xml_field kind fieldname value = function D data -> D data diff --git a/cpdf.mli b/cpdf.mli index e28e4d7..a51747f 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -98,6 +98,8 @@ val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t the PDF minor version to [version].*) val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t +val get_xmp_info : Pdf.t -> string -> string + (** [set_pdf_info (key, value, version)] sets the entry [key] in the /ViewerPreferences directory, updating the PDF minor version to [version].*) val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 69a4902..6d51adc 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -412,7 +412,9 @@ type args = mutable padwith : string option; mutable alsosetxml : bool; mutable justsetxml : bool; - mutable gs_malformed : bool} + mutable gs_malformed : bool; + mutable merge_add_bookmarks : bool; + mutable merge_add_bookmarks_use_titles : bool} let args = {op = None; @@ -501,7 +503,9 @@ let args = padwith = None; alsosetxml = false; justsetxml = false; - gs_malformed = false} + gs_malformed = false; + merge_add_bookmarks = false; + merge_add_bookmarks_use_titles = false} let reset_arguments () = args.op <- None; @@ -581,7 +585,9 @@ let reset_arguments () = args.extract_text_font_size <- None; args.padwith <- None; args.alsosetxml <- false; - args.justsetxml <- false + args.justsetxml <- false; + args.merge_add_bookmarks <- false; + args.merge_add_bookmarks_use_titles <- false (* Do not reset original_filename or cpdflin or was_encrypted or * was_decrypted_with_owner or recrypt or producer or creator or * path_to_ghostscript or gs_malformed, since we want these to work across @@ -1606,6 +1612,12 @@ let setsetmetadatadate d = let setgsmalformed () = args.gs_malformed <- true +let setmergeaddbookmarks () = + args.merge_add_bookmarks <- true + +let setmergeaddbookmarksusetitles () = + args.merge_add_bookmarks_use_titles <- true + (* Parse a control file, make an argv, and then make Arg parse it. *) let rec make_control_argv_and_parse filename = control_args := !control_args @ parse_control_file filename @@ -1689,6 +1701,12 @@ and specs = ("-retain-numbering", Arg.Unit set_retain_numbering, " Don't renumber pages when merging"); + ("-merge-add-bookmarks", + Arg.Unit setmergeaddbookmarks, + " Add bookmarks for each file to merged file"); + ("-merge-add-bookmarks-use-titles", + Arg.Unit setmergeaddbookmarksusetitles, + " Use title of document rather than filename"); ("-remove-duplicate-fonts", Arg.Unit set_remove_duplicate_fonts, " Remove duplicate fonts when merging"); @@ -3448,6 +3466,31 @@ let remove_clipping pdf range = in Cpdf.process_pages remove_clipping_page pdf range + +(* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *) +let add_bookmark_title filename use_title pdf = + let title = + if use_title then + match Cpdf.get_info_utf8 pdf "/Title", Cpdf.get_xmp_info pdf "/Title" with + "", x | x, "" | _, x -> x + else + Filename.basename filename + in + let marks = Pdfmarks.read_bookmarks pdf in + let page1objnum = + match Pdfpage.page_object_number pdf 1 with + None -> error "add_bookmark_title: page not found" + | Some x -> x + in + let newmarks = + {Pdfmarks.level = 0; + Pdfmarks.text = title; + Pdfmarks.target = Pdfdest.XYZ (Pdfdest.PageObject page1objnum, None, None, None); + Pdfmarks.isopen = false} + ::map (function m -> {m with Pdfmarks.level = m.Pdfmarks.level + 1}) marks + in + Pdfmarks.add_bookmarks newmarks pdf + (* Main function *) let go () = match args.op with @@ -3509,10 +3552,20 @@ let go () = then soft_error "Merge requires the owner password for all encrypted files." else + let pdfs = + if args.merge_add_bookmarks then + List.map2 + (fun filename pdf -> add_bookmark_title filename args.merge_add_bookmarks_use_titles pdf) + (List.map (function InFile s -> s | StdIn -> "" | AlreadyInMemory _ -> "") names) + pdfs + else + pdfs + in (* If args.keep_this_id is set, change the ID to the one from the kept one *) let rangenums = map2 parse_pagespec pdfs ranges in let outpdf = - Pdfmerge.merge_pdfs args.retain_numbering args.remove_duplicate_fonts + Pdfmerge.merge_pdfs + args.retain_numbering args.remove_duplicate_fonts (map string_of_input_kind names) pdfs rangenums in write_pdf false outpdf diff --git a/cpdfmanual.tex b/cpdfmanual.tex index 2a01d1c..d91e08b 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -20,6 +20,7 @@ %FIXME: Document the rotate dance for adding rotated text %FIXME: Document -gs gs -gs-malformed %FIXME: Document -gs gs -gs-embed-fonts +%FIXME: Document -merge-add-bookmarks, -merge-add-bookmarks-use-titles \documentclass{book} \usepackage{palatino}