-create-metadata finished

This commit is contained in:
John Whitington
2019-07-01 14:40:22 +01:00
parent 5897b263f4
commit 3d595a14de
4 changed files with 102 additions and 56 deletions

95
cpdf.ml
View File

@@ -4,6 +4,45 @@ open Pdfio
let debug = ref false
let xmp_template =
{|<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
xmlns:iX='http://ns.adobe.com/iX/1.0/'>
<rdf:Description about=''
xmlns='http://ns.adobe.com/pdf/1.3/'
xmlns:pdf='http://ns.adobe.com/pdf/1.3/'>
<pdf:CreationDate>CREATEDATE</pdf:CreationDate>
<pdf:ModDate>MODDATE</pdf:ModDate>
<pdf:Producer>PRODUCER</pdf:Producer>
<pdf:Creator>CREATOR</pdf:Creator>
<pdf:Title>TITLE</pdf:Title>
<pdf:Subject>SUBJECT</pdf:Subject>
<pdf:Author>AUTHOR</pdf:Author>
<pdf:Keywords>KEYWORDS</pdf:Keywords>
<pdf:Trapped>TRAPPED</pdf:Trapped>
</rdf:Description>
<rdf:Description about=''
xmlns='http://ns.adobe.com/xap/1.0/'
xmlns:xap='http://ns.adobe.com/xap/1.0/'>
<xap:CreateDate>CREATEDATE</xap:CreateDate>
<xap:CreatorTool>CREATOR</xap:CreatorTool>
<xap:ModifyDate>MODDATE</xap:ModifyDate>
<xap:MetadataDate>METADATADATE</xap:MetadataDate>
</rdf:Description>
<rdf:Description about=''
xmlns='http://purl.org/dc/elements/1.1/'
xmlns:dc='http://purl.org/dc/elements/1.1/'>
<dc:title>TITLE</dc:title>
</rdf:Description>
</rdf:RDF>
<?xpacket end='r'?>|}
(* For debugging *)
let report_pdf_size pdf =
Pdf.remove_unreferenced pdf;
@@ -1171,6 +1210,8 @@ let set_metadata keepversion filename pdf =
done;
set_metadata_from_bytes keepversion data pdf
(* \section{Remove metadata} *)
let remove_metadata pdf =
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
@@ -3212,7 +3253,9 @@ let get_info raw pdf =
match Pdf.lookup_direct pdf name infodict with
| Some (Pdf.String s) ->
if raw then s else crude_de_unicode s
| _ -> ""
| Some (Pdf.Boolean false) -> "False"
| Some (Pdf.Boolean true) -> "True"
| _ -> if name = "/Trapped" then "False" else ""
in
getstring
@@ -3225,7 +3268,9 @@ let get_info_utf8 pdf =
(function name ->
match Pdf.lookup_direct pdf name infodict with
| Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
| _ -> "")
| Some (Pdf.Boolean false) -> "False"
| Some (Pdf.Boolean true) -> "True"
| _ -> if name = "/Trapped" then "False" else "")
let getstring encoding pdf =
match encoding with
@@ -3244,7 +3289,8 @@ let output_info encoding pdf =
Printf.printf "Creator: %s\n" (getstring "/Creator");
Printf.printf "Producer: %s\n" (getstring "/Producer");
Printf.printf "Created: %s\n" (getstring "/CreationDate");
Printf.printf "Modified: %s\n" (getstring "/ModDate")
Printf.printf "Modified: %s\n" (getstring "/ModDate");
Printf.printf "Trapped: %s\n" (getstring "/Trapped")
type xmltree =
E of Xmlm.tag * xmltree list
@@ -3353,7 +3399,6 @@ let output_xmp_info encoding pdf =
try
let dtd, tree = xmltree_of_bytes metadata in
print_out tree "XMP pdf:Keywords" adobe "Keywords";
print_out tree "XMP pdf:PDFVersion" adobe "PDFVersion";
print_out tree "XMP pdf:Producer" adobe "Producer";
print_out tree "XMP pdf:Trapped" adobe "Trapped";
print_out tree "XMP pdf:Title" adobe "Title";
@@ -3373,13 +3418,13 @@ let output_xmp_info encoding pdf =
_ -> ()
(* Set XMP info *)
let rec set_xml_field only_when_present kind fieldname value = function
let rec set_xml_field kind fieldname value = function
D data -> D data
| E (((n, n'), m), [D _]) when n = kind && n' = fieldname ->
E (((n, n'), m), [D value])
| E (x, ts) -> E (x, List.map (set_xml_field only_when_present kind fieldname value) ts)
| E (x, ts) -> E (x, List.map (set_xml_field kind fieldname value) ts)
let set_pdf_info_xml only_when_present kind fieldname value xmldata pdf =
let set_pdf_info_xml kind fieldname value xmldata pdf =
let dtd, tree = xmltree_of_bytes xmldata in
let str =
match value with
@@ -3388,14 +3433,14 @@ let set_pdf_info_xml only_when_present kind fieldname value xmldata pdf =
| Pdf.Boolean false -> "False"
| _ -> failwith "set_pdf_info_xml: not a string"
in
let newtree = set_xml_field only_when_present kind fieldname str tree in
let newtree = set_xml_field kind fieldname str tree in
bytes_of_xmltree (dtd, newtree)
let set_pdf_info_xml_many only_when_present changes value xmldata pdf =
let set_pdf_info_xml_many changes value xmldata pdf =
let xmldata = ref xmldata in
List.iter
(fun (kind, fieldname) ->
xmldata := set_pdf_info_xml only_when_present kind fieldname value !xmldata pdf)
xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf)
changes;
!xmldata
@@ -3508,7 +3553,7 @@ let xmp_date date =
with
Exit -> make_xmp_date_from_components d
let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set=false) (key, value, version) pdf =
let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf =
let infodict =
match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
| Some d -> d
@@ -3523,7 +3568,7 @@ let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set
pdf.Pdf.minor <-
max pdf.Pdf.minor version
end;
if xmp_also || xmp_also_when_present then
if xmp_also then
begin match get_metadata pdf with
None -> pdf
| Some xmldata ->
@@ -3543,14 +3588,14 @@ let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set
in
set_metadata_from_bytes
true
(set_pdf_info_xml_many xmp_also_when_present changes value xmldata pdf)
(set_pdf_info_xml_many changes value xmldata pdf)
pdf
end
else
pdf
(* Set metadata date *)
let set_metadata_date pdf date only_when_present =
let set_metadata_date pdf date =
match get_metadata pdf with
None -> pdf
| Some xmldata ->
@@ -3558,9 +3603,29 @@ let set_metadata_date pdf date only_when_present =
let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in
set_metadata_from_bytes
true
(set_pdf_info_xml_many only_when_present changes (Pdf.String value) xmldata pdf)
(set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf)
pdf
let replacements pdf =
let info = get_info_utf8 pdf in
[("CREATEDATE", xmp_date (info "/CreationDate"));
("MODDATE", xmp_date (info "/ModDate"));
("PRODUCER", info "/Producer");
("CREATOR", info "/Creator");
("TITLE", info "/Title");
("SUBJECT", info "/Subject");
("AUTHOR", info "/Author");
("KEYWORDS", info "/Keywords");
("TRAPPED", info "/Trapped");
("METADATADATE", xmp_date (expand_date "now"))]
let create_metadata pdf =
let xmp = ref xmp_template in
List.iter
(fun (s, r) -> xmp := string_replace_all s r !xmp)
(replacements pdf);
set_metadata_from_bytes false (bytes_of_string !xmp) pdf
(* \section{Blacken text} *)
(*

View File

@@ -112,7 +112,7 @@ val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
the PDF minor version to [version].*)
val set_pdf_info : ?xmp_also:bool -> ?xmp_also_when_present:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
(** [set_pdf_info (key, value, version)] sets the entry [key] in the
/ViewerPreferences directory, updating the PDF minor version to [version].*)
@@ -195,7 +195,11 @@ val get_metadata : Pdf.t -> Pdfio.bytes option
(** Print metadate to stdout *)
val print_metadata : Pdf.t -> unit
val set_metadata_date : Pdf.t -> string -> bool -> Pdf.t
(** Set the metadata date *)
val set_metadata_date : Pdf.t -> string -> Pdf.t
(** Create XMP metadata from scratch *)
val create_metadata : Pdf.t -> Pdf.t
(** {2 Stamping} *)

View File

@@ -179,6 +179,7 @@ type op =
| ListSpotColours
| RemoveClipping
| SetMetadataDate of string
| CreateMetadata
let string_of_op = function
| CopyFont _ -> "CopyFont"
@@ -291,6 +292,7 @@ let string_of_op = function
| RemoveTrim -> "RemoveTrim"
| RemoveBleed -> "RemoveBleed"
| SetMetadataDate _ -> "SetMetadataDate"
| CreateMetadata -> "CreateMetadata"
(* Inputs: filename, pagespec. *)
type input_kind =
@@ -405,7 +407,6 @@ type args =
mutable extract_text_font_size : float option;
mutable padwith : string option;
mutable alsosetxml : bool;
mutable alsosetxmlwhenpresent : bool;
mutable justsetxml : bool;
mutable gs_malformed : bool}
@@ -495,7 +496,6 @@ let args =
extract_text_font_size = None;
padwith = None;
alsosetxml = false;
alsosetxmlwhenpresent = false;
justsetxml = false;
gs_malformed = false}
@@ -577,7 +577,6 @@ let reset_arguments () =
args.extract_text_font_size <- None;
args.padwith <- None;
args.alsosetxml <- false;
args.alsosetxmlwhenpresent <- false;
args.justsetxml <- false
(* Do not reset original_filename or cpdflin or was_encrypted or
* was_decrypted_with_owner or recrypt or producer or creator or
@@ -652,7 +651,7 @@ let banned banlist = function
AddText _|ScaleContents _|AttachFile _|CopyAnnotations _|SetMetadata _|
ThinLines _|SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _|
SetModify _|SetCreator _|SetProducer _|SetVersion _|RemoveDictEntry _ |
RemoveClipping | SetMetadataDate _ ->
RemoveClipping | SetMetadataDate _ | CreateMetadata ->
mem Pdfcrypt.NoEdit banlist
let operation_allowed pdf banlist op =
@@ -1591,9 +1590,6 @@ let sethardbox box =
let setalsosetxml () =
args.alsosetxml <- true
let setalsosetxmlwhenpresent () =
args.alsosetxmlwhenpresent <- true
let setjustsetxml () =
args.justsetxml <- true
@@ -2026,12 +2022,12 @@ and specs =
("-also-set-xml",
Arg.Unit setalsosetxml,
" Also set XML metadata");
("-also-set-xml-when-present",
Arg.Unit setalsosetxmlwhenpresent,
" Also set XML metadata, but only if field already present");
("-just-set-xml",
Arg.Unit setjustsetxml,
" Just set XML metadata, not old-fashioned metadata");
("-create-metadata",
Arg.Unit (setop CreateMetadata),
" Create XML metadata from scratch.");
("-set-page-layout",
Arg.String setpagelayout,
" Set page layout upon document opening");
@@ -3334,32 +3330,6 @@ let remove_clipping pdf range =
in
Cpdf.process_pages remove_clipping_page pdf range
let change_font_size_ops (r, g, b) dx dy source_size target_size pdf resources content =
let ops = Pdfops.parse_operators pdf resources content in
let tr = Pdftransform.mktranslate dx dy in
let rec process a = function
Pdfops.Op_Tf (fontname, size)::t when fabs (size -. source_size) < 0.01 ->
process
(Pdfops.Op_rg (r, g, b)::Pdfops.Op_Tf (fontname, target_size)::Pdfops.Op_cm tr::a)
t
| h::t -> process (h::a) t
| [] -> rev a
in
[Pdfops.stream_of_ops (process [] ops)]
let change_font_size pdf range (r, g, b) dx dy source_size target_size =
let change_font_size_page _ page =
let content' =
change_font_size_ops
(r, g, b) dx dy source_size target_size pdf
page.Pdfpage.resources page.Pdfpage.content
in
Cpdf.process_xobjects
pdf page (change_font_size_ops (r, g, b) dx dy source_size target_size);
{page with Pdfpage.content = content'}
in
Cpdf.process_pages change_font_size_page pdf range
(* Main function *)
let go () =
match args.op with
@@ -3751,11 +3721,10 @@ let go () =
write_pdf false
(Cpdf.set_pdf_info
~xmp_also:args.alsosetxml
~xmp_also_when_present:args.alsosetxmlwhenpresent
~xmp_just_set:args.justsetxml
(key, value, version) pdf)
| Some (SetMetadataDate date) ->
write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date args.alsosetxmlwhenpresent)
write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date)
| Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _
| FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) ->
begin match args.out with
@@ -4200,6 +4169,9 @@ let go () =
let pdf = get_single_pdf args.op false in
let range = parse_pagespec pdf (get_pagespec ()) in
write_pdf false (remove_clipping pdf range)
| Some CreateMetadata ->
let pdf = get_single_pdf args.op false in
write_pdf false (Cpdf.create_metadata pdf)
let parse_argv () =
if args.debug then

View File

@@ -10,7 +10,12 @@
%FIXME: Activate documentation for -extract-images (when done)
%FIXME: Document new -artbox, -trimbox, -bleedbox and -remove-artbox, -remove-trimbox, -remove-bleedbox
%FIXME: Document -cropbox and -remove-cropbox as synonyms of -crop and -remove-crop
%FIXME: Document new XMP metadata stuff including setmetadata date and its format
%FIXME: Document new XMP metadata stuff including setmetadata date and its format
%FIXME: Document new -gs-malformed flag.
%FIXME: Document new -create-metadata
%FIXME: Document -remove-clipping
%FIXME: Document new -list-spot-colours
\documentclass{book}
\usepackage{palatino}
\usepackage{microtype}