From 3d595a14dea914f2585e34887f84eda5f030abb2 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 1 Jul 2019 14:40:22 +0100 Subject: [PATCH] -create-metadata finished --- cpdf.ml | 95 ++++++++++++++++++++++++++++++++++++++++++-------- cpdf.mli | 8 +++-- cpdfcommand.ml | 48 ++++++------------------- cpdfmanual.tex | 7 +++- 4 files changed, 102 insertions(+), 56 deletions(-) diff --git a/cpdf.ml b/cpdf.ml index cf0ec27..fa5bd6d 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -4,6 +4,45 @@ open Pdfio let debug = ref false +let xmp_template = +{| + + + + + CREATEDATE + MODDATE + PRODUCER + CREATOR + TITLE + SUBJECT + AUTHOR + KEYWORDS + TRAPPED + + + + CREATEDATE + CREATOR + MODDATE + METADATADATE + + + + TITLE + + + + +|} + (* For debugging *) let report_pdf_size pdf = Pdf.remove_unreferenced pdf; @@ -1171,6 +1210,8 @@ let set_metadata keepversion filename pdf = done; set_metadata_from_bytes keepversion data pdf + + (* \section{Remove metadata} *) let remove_metadata pdf = match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with @@ -3212,7 +3253,9 @@ let get_info raw pdf = match Pdf.lookup_direct pdf name infodict with | Some (Pdf.String s) -> if raw then s else crude_de_unicode s - | _ -> "" + | Some (Pdf.Boolean false) -> "False" + | Some (Pdf.Boolean true) -> "True" + | _ -> if name = "/Trapped" then "False" else "" in getstring @@ -3225,7 +3268,9 @@ let get_info_utf8 pdf = (function name -> match Pdf.lookup_direct pdf name infodict with | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s - | _ -> "") + | Some (Pdf.Boolean false) -> "False" + | Some (Pdf.Boolean true) -> "True" + | _ -> if name = "/Trapped" then "False" else "") let getstring encoding pdf = match encoding with @@ -3244,7 +3289,8 @@ let output_info encoding pdf = Printf.printf "Creator: %s\n" (getstring "/Creator"); Printf.printf "Producer: %s\n" (getstring "/Producer"); Printf.printf "Created: %s\n" (getstring "/CreationDate"); - Printf.printf "Modified: %s\n" (getstring "/ModDate") + Printf.printf "Modified: %s\n" (getstring "/ModDate"); + Printf.printf "Trapped: %s\n" (getstring "/Trapped") type xmltree = E of Xmlm.tag * xmltree list @@ -3353,7 +3399,6 @@ let output_xmp_info encoding pdf = try let dtd, tree = xmltree_of_bytes metadata in print_out tree "XMP pdf:Keywords" adobe "Keywords"; - print_out tree "XMP pdf:PDFVersion" adobe "PDFVersion"; print_out tree "XMP pdf:Producer" adobe "Producer"; print_out tree "XMP pdf:Trapped" adobe "Trapped"; print_out tree "XMP pdf:Title" adobe "Title"; @@ -3373,13 +3418,13 @@ let output_xmp_info encoding pdf = _ -> () (* Set XMP info *) -let rec set_xml_field only_when_present kind fieldname value = function +let rec set_xml_field kind fieldname value = function D data -> D data | E (((n, n'), m), [D _]) when n = kind && n' = fieldname -> E (((n, n'), m), [D value]) -| E (x, ts) -> E (x, List.map (set_xml_field only_when_present kind fieldname value) ts) +| E (x, ts) -> E (x, List.map (set_xml_field kind fieldname value) ts) -let set_pdf_info_xml only_when_present kind fieldname value xmldata pdf = +let set_pdf_info_xml kind fieldname value xmldata pdf = let dtd, tree = xmltree_of_bytes xmldata in let str = match value with @@ -3388,14 +3433,14 @@ let set_pdf_info_xml only_when_present kind fieldname value xmldata pdf = | Pdf.Boolean false -> "False" | _ -> failwith "set_pdf_info_xml: not a string" in - let newtree = set_xml_field only_when_present kind fieldname str tree in + let newtree = set_xml_field kind fieldname str tree in bytes_of_xmltree (dtd, newtree) -let set_pdf_info_xml_many only_when_present changes value xmldata pdf = +let set_pdf_info_xml_many changes value xmldata pdf = let xmldata = ref xmldata in List.iter (fun (kind, fieldname) -> - xmldata := set_pdf_info_xml only_when_present kind fieldname value !xmldata pdf) + xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf) changes; !xmldata @@ -3508,7 +3553,7 @@ let xmp_date date = with Exit -> make_xmp_date_from_components d -let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set=false) (key, value, version) pdf = +let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf = let infodict = match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some d -> d @@ -3523,7 +3568,7 @@ let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set pdf.Pdf.minor <- max pdf.Pdf.minor version end; - if xmp_also || xmp_also_when_present then + if xmp_also then begin match get_metadata pdf with None -> pdf | Some xmldata -> @@ -3543,14 +3588,14 @@ let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set in set_metadata_from_bytes true - (set_pdf_info_xml_many xmp_also_when_present changes value xmldata pdf) + (set_pdf_info_xml_many changes value xmldata pdf) pdf end else pdf (* Set metadata date *) -let set_metadata_date pdf date only_when_present = +let set_metadata_date pdf date = match get_metadata pdf with None -> pdf | Some xmldata -> @@ -3558,9 +3603,29 @@ let set_metadata_date pdf date only_when_present = let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in set_metadata_from_bytes true - (set_pdf_info_xml_many only_when_present changes (Pdf.String value) xmldata pdf) + (set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf) pdf +let replacements pdf = + let info = get_info_utf8 pdf in + [("CREATEDATE", xmp_date (info "/CreationDate")); + ("MODDATE", xmp_date (info "/ModDate")); + ("PRODUCER", info "/Producer"); + ("CREATOR", info "/Creator"); + ("TITLE", info "/Title"); + ("SUBJECT", info "/Subject"); + ("AUTHOR", info "/Author"); + ("KEYWORDS", info "/Keywords"); + ("TRAPPED", info "/Trapped"); + ("METADATADATE", xmp_date (expand_date "now"))] + +let create_metadata pdf = + let xmp = ref xmp_template in + List.iter + (fun (s, r) -> xmp := string_replace_all s r !xmp) + (replacements pdf); + set_metadata_from_bytes false (bytes_of_string !xmp) pdf + (* \section{Blacken text} *) (* diff --git a/cpdf.mli b/cpdf.mli index 0b88bd9..b69f771 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -112,7 +112,7 @@ val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t (** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating the PDF minor version to [version].*) -val set_pdf_info : ?xmp_also:bool -> ?xmp_also_when_present:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t +val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t (** [set_pdf_info (key, value, version)] sets the entry [key] in the /ViewerPreferences directory, updating the PDF minor version to [version].*) @@ -195,7 +195,11 @@ val get_metadata : Pdf.t -> Pdfio.bytes option (** Print metadate to stdout *) val print_metadata : Pdf.t -> unit -val set_metadata_date : Pdf.t -> string -> bool -> Pdf.t +(** Set the metadata date *) +val set_metadata_date : Pdf.t -> string -> Pdf.t + +(** Create XMP metadata from scratch *) +val create_metadata : Pdf.t -> Pdf.t (** {2 Stamping} *) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index a0d7291..9f1e06c 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -179,6 +179,7 @@ type op = | ListSpotColours | RemoveClipping | SetMetadataDate of string + | CreateMetadata let string_of_op = function | CopyFont _ -> "CopyFont" @@ -291,6 +292,7 @@ let string_of_op = function | RemoveTrim -> "RemoveTrim" | RemoveBleed -> "RemoveBleed" | SetMetadataDate _ -> "SetMetadataDate" + | CreateMetadata -> "CreateMetadata" (* Inputs: filename, pagespec. *) type input_kind = @@ -405,7 +407,6 @@ type args = mutable extract_text_font_size : float option; mutable padwith : string option; mutable alsosetxml : bool; - mutable alsosetxmlwhenpresent : bool; mutable justsetxml : bool; mutable gs_malformed : bool} @@ -495,7 +496,6 @@ let args = extract_text_font_size = None; padwith = None; alsosetxml = false; - alsosetxmlwhenpresent = false; justsetxml = false; gs_malformed = false} @@ -577,7 +577,6 @@ let reset_arguments () = args.extract_text_font_size <- None; args.padwith <- None; args.alsosetxml <- false; - args.alsosetxmlwhenpresent <- false; args.justsetxml <- false (* Do not reset original_filename or cpdflin or was_encrypted or * was_decrypted_with_owner or recrypt or producer or creator or @@ -652,7 +651,7 @@ let banned banlist = function AddText _|ScaleContents _|AttachFile _|CopyAnnotations _|SetMetadata _| ThinLines _|SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _| SetModify _|SetCreator _|SetProducer _|SetVersion _|RemoveDictEntry _ | - RemoveClipping | SetMetadataDate _ -> + RemoveClipping | SetMetadataDate _ | CreateMetadata -> mem Pdfcrypt.NoEdit banlist let operation_allowed pdf banlist op = @@ -1591,9 +1590,6 @@ let sethardbox box = let setalsosetxml () = args.alsosetxml <- true -let setalsosetxmlwhenpresent () = - args.alsosetxmlwhenpresent <- true - let setjustsetxml () = args.justsetxml <- true @@ -2026,12 +2022,12 @@ and specs = ("-also-set-xml", Arg.Unit setalsosetxml, " Also set XML metadata"); - ("-also-set-xml-when-present", - Arg.Unit setalsosetxmlwhenpresent, - " Also set XML metadata, but only if field already present"); ("-just-set-xml", Arg.Unit setjustsetxml, " Just set XML metadata, not old-fashioned metadata"); + ("-create-metadata", + Arg.Unit (setop CreateMetadata), + " Create XML metadata from scratch."); ("-set-page-layout", Arg.String setpagelayout, " Set page layout upon document opening"); @@ -3334,32 +3330,6 @@ let remove_clipping pdf range = in Cpdf.process_pages remove_clipping_page pdf range -let change_font_size_ops (r, g, b) dx dy source_size target_size pdf resources content = - let ops = Pdfops.parse_operators pdf resources content in - let tr = Pdftransform.mktranslate dx dy in - let rec process a = function - Pdfops.Op_Tf (fontname, size)::t when fabs (size -. source_size) < 0.01 -> - process - (Pdfops.Op_rg (r, g, b)::Pdfops.Op_Tf (fontname, target_size)::Pdfops.Op_cm tr::a) - t - | h::t -> process (h::a) t - | [] -> rev a - in - [Pdfops.stream_of_ops (process [] ops)] - -let change_font_size pdf range (r, g, b) dx dy source_size target_size = - let change_font_size_page _ page = - let content' = - change_font_size_ops - (r, g, b) dx dy source_size target_size pdf - page.Pdfpage.resources page.Pdfpage.content - in - Cpdf.process_xobjects - pdf page (change_font_size_ops (r, g, b) dx dy source_size target_size); - {page with Pdfpage.content = content'} - in - Cpdf.process_pages change_font_size_page pdf range - (* Main function *) let go () = match args.op with @@ -3751,11 +3721,10 @@ let go () = write_pdf false (Cpdf.set_pdf_info ~xmp_also:args.alsosetxml - ~xmp_also_when_present:args.alsosetxmlwhenpresent ~xmp_just_set:args.justsetxml (key, value, version) pdf) | Some (SetMetadataDate date) -> - write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date args.alsosetxmlwhenpresent) + write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date) | Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _ | FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) -> begin match args.out with @@ -4200,6 +4169,9 @@ let go () = let pdf = get_single_pdf args.op false in let range = parse_pagespec pdf (get_pagespec ()) in write_pdf false (remove_clipping pdf range) + | Some CreateMetadata -> + let pdf = get_single_pdf args.op false in + write_pdf false (Cpdf.create_metadata pdf) let parse_argv () = if args.debug then diff --git a/cpdfmanual.tex b/cpdfmanual.tex index e422417..b72a22f 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -10,7 +10,12 @@ %FIXME: Activate documentation for -extract-images (when done) %FIXME: Document new -artbox, -trimbox, -bleedbox and -remove-artbox, -remove-trimbox, -remove-bleedbox %FIXME: Document -cropbox and -remove-cropbox as synonyms of -crop and -remove-crop -%FIXME: Document new XMP metadata stuff including setmetadata date and its format +%FIXME: Document new XMP metadata stuff including setmetadata date and its format +%FIXME: Document new -gs-malformed flag. +%FIXME: Document new -create-metadata +%FIXME: Document -remove-clipping +%FIXME: Document new -list-spot-colours + \documentclass{book} \usepackage{palatino} \usepackage{microtype}