mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-06-05 22:09:39 +02:00
-create-metadata finished
This commit is contained in:
95
cpdf.ml
95
cpdf.ml
@@ -4,6 +4,45 @@ open Pdfio
|
||||
|
||||
let debug = ref false
|
||||
|
||||
let xmp_template =
|
||||
{|<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||||
|
||||
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
||||
xmlns:iX='http://ns.adobe.com/iX/1.0/'>
|
||||
|
||||
<rdf:Description about=''
|
||||
xmlns='http://ns.adobe.com/pdf/1.3/'
|
||||
xmlns:pdf='http://ns.adobe.com/pdf/1.3/'>
|
||||
<pdf:CreationDate>CREATEDATE</pdf:CreationDate>
|
||||
<pdf:ModDate>MODDATE</pdf:ModDate>
|
||||
<pdf:Producer>PRODUCER</pdf:Producer>
|
||||
<pdf:Creator>CREATOR</pdf:Creator>
|
||||
<pdf:Title>TITLE</pdf:Title>
|
||||
<pdf:Subject>SUBJECT</pdf:Subject>
|
||||
<pdf:Author>AUTHOR</pdf:Author>
|
||||
<pdf:Keywords>KEYWORDS</pdf:Keywords>
|
||||
<pdf:Trapped>TRAPPED</pdf:Trapped>
|
||||
</rdf:Description>
|
||||
|
||||
<rdf:Description about=''
|
||||
xmlns='http://ns.adobe.com/xap/1.0/'
|
||||
xmlns:xap='http://ns.adobe.com/xap/1.0/'>
|
||||
<xap:CreateDate>CREATEDATE</xap:CreateDate>
|
||||
<xap:CreatorTool>CREATOR</xap:CreatorTool>
|
||||
<xap:ModifyDate>MODDATE</xap:ModifyDate>
|
||||
<xap:MetadataDate>METADATADATE</xap:MetadataDate>
|
||||
</rdf:Description>
|
||||
|
||||
<rdf:Description about=''
|
||||
xmlns='http://purl.org/dc/elements/1.1/'
|
||||
xmlns:dc='http://purl.org/dc/elements/1.1/'>
|
||||
<dc:title>TITLE</dc:title>
|
||||
</rdf:Description>
|
||||
|
||||
</rdf:RDF>
|
||||
|
||||
<?xpacket end='r'?>|}
|
||||
|
||||
(* For debugging *)
|
||||
let report_pdf_size pdf =
|
||||
Pdf.remove_unreferenced pdf;
|
||||
@@ -1171,6 +1210,8 @@ let set_metadata keepversion filename pdf =
|
||||
done;
|
||||
set_metadata_from_bytes keepversion data pdf
|
||||
|
||||
|
||||
|
||||
(* \section{Remove metadata} *)
|
||||
let remove_metadata pdf =
|
||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||
@@ -3212,7 +3253,9 @@ let get_info raw pdf =
|
||||
match Pdf.lookup_direct pdf name infodict with
|
||||
| Some (Pdf.String s) ->
|
||||
if raw then s else crude_de_unicode s
|
||||
| _ -> ""
|
||||
| Some (Pdf.Boolean false) -> "False"
|
||||
| Some (Pdf.Boolean true) -> "True"
|
||||
| _ -> if name = "/Trapped" then "False" else ""
|
||||
in
|
||||
getstring
|
||||
|
||||
@@ -3225,7 +3268,9 @@ let get_info_utf8 pdf =
|
||||
(function name ->
|
||||
match Pdf.lookup_direct pdf name infodict with
|
||||
| Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
|
||||
| _ -> "")
|
||||
| Some (Pdf.Boolean false) -> "False"
|
||||
| Some (Pdf.Boolean true) -> "True"
|
||||
| _ -> if name = "/Trapped" then "False" else "")
|
||||
|
||||
let getstring encoding pdf =
|
||||
match encoding with
|
||||
@@ -3244,7 +3289,8 @@ let output_info encoding pdf =
|
||||
Printf.printf "Creator: %s\n" (getstring "/Creator");
|
||||
Printf.printf "Producer: %s\n" (getstring "/Producer");
|
||||
Printf.printf "Created: %s\n" (getstring "/CreationDate");
|
||||
Printf.printf "Modified: %s\n" (getstring "/ModDate")
|
||||
Printf.printf "Modified: %s\n" (getstring "/ModDate");
|
||||
Printf.printf "Trapped: %s\n" (getstring "/Trapped")
|
||||
|
||||
type xmltree =
|
||||
E of Xmlm.tag * xmltree list
|
||||
@@ -3353,7 +3399,6 @@ let output_xmp_info encoding pdf =
|
||||
try
|
||||
let dtd, tree = xmltree_of_bytes metadata in
|
||||
print_out tree "XMP pdf:Keywords" adobe "Keywords";
|
||||
print_out tree "XMP pdf:PDFVersion" adobe "PDFVersion";
|
||||
print_out tree "XMP pdf:Producer" adobe "Producer";
|
||||
print_out tree "XMP pdf:Trapped" adobe "Trapped";
|
||||
print_out tree "XMP pdf:Title" adobe "Title";
|
||||
@@ -3373,13 +3418,13 @@ let output_xmp_info encoding pdf =
|
||||
_ -> ()
|
||||
|
||||
(* Set XMP info *)
|
||||
let rec set_xml_field only_when_present kind fieldname value = function
|
||||
let rec set_xml_field kind fieldname value = function
|
||||
D data -> D data
|
||||
| E (((n, n'), m), [D _]) when n = kind && n' = fieldname ->
|
||||
E (((n, n'), m), [D value])
|
||||
| E (x, ts) -> E (x, List.map (set_xml_field only_when_present kind fieldname value) ts)
|
||||
| E (x, ts) -> E (x, List.map (set_xml_field kind fieldname value) ts)
|
||||
|
||||
let set_pdf_info_xml only_when_present kind fieldname value xmldata pdf =
|
||||
let set_pdf_info_xml kind fieldname value xmldata pdf =
|
||||
let dtd, tree = xmltree_of_bytes xmldata in
|
||||
let str =
|
||||
match value with
|
||||
@@ -3388,14 +3433,14 @@ let set_pdf_info_xml only_when_present kind fieldname value xmldata pdf =
|
||||
| Pdf.Boolean false -> "False"
|
||||
| _ -> failwith "set_pdf_info_xml: not a string"
|
||||
in
|
||||
let newtree = set_xml_field only_when_present kind fieldname str tree in
|
||||
let newtree = set_xml_field kind fieldname str tree in
|
||||
bytes_of_xmltree (dtd, newtree)
|
||||
|
||||
let set_pdf_info_xml_many only_when_present changes value xmldata pdf =
|
||||
let set_pdf_info_xml_many changes value xmldata pdf =
|
||||
let xmldata = ref xmldata in
|
||||
List.iter
|
||||
(fun (kind, fieldname) ->
|
||||
xmldata := set_pdf_info_xml only_when_present kind fieldname value !xmldata pdf)
|
||||
xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf)
|
||||
changes;
|
||||
!xmldata
|
||||
|
||||
@@ -3508,7 +3553,7 @@ let xmp_date date =
|
||||
with
|
||||
Exit -> make_xmp_date_from_components d
|
||||
|
||||
let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set=false) (key, value, version) pdf =
|
||||
let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf =
|
||||
let infodict =
|
||||
match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
||||
| Some d -> d
|
||||
@@ -3523,7 +3568,7 @@ let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set
|
||||
pdf.Pdf.minor <-
|
||||
max pdf.Pdf.minor version
|
||||
end;
|
||||
if xmp_also || xmp_also_when_present then
|
||||
if xmp_also then
|
||||
begin match get_metadata pdf with
|
||||
None -> pdf
|
||||
| Some xmldata ->
|
||||
@@ -3543,14 +3588,14 @@ let set_pdf_info ?(xmp_also=false) ?(xmp_also_when_present=false) ?(xmp_just_set
|
||||
in
|
||||
set_metadata_from_bytes
|
||||
true
|
||||
(set_pdf_info_xml_many xmp_also_when_present changes value xmldata pdf)
|
||||
(set_pdf_info_xml_many changes value xmldata pdf)
|
||||
pdf
|
||||
end
|
||||
else
|
||||
pdf
|
||||
|
||||
(* Set metadata date *)
|
||||
let set_metadata_date pdf date only_when_present =
|
||||
let set_metadata_date pdf date =
|
||||
match get_metadata pdf with
|
||||
None -> pdf
|
||||
| Some xmldata ->
|
||||
@@ -3558,9 +3603,29 @@ let set_metadata_date pdf date only_when_present =
|
||||
let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in
|
||||
set_metadata_from_bytes
|
||||
true
|
||||
(set_pdf_info_xml_many only_when_present changes (Pdf.String value) xmldata pdf)
|
||||
(set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf)
|
||||
pdf
|
||||
|
||||
let replacements pdf =
|
||||
let info = get_info_utf8 pdf in
|
||||
[("CREATEDATE", xmp_date (info "/CreationDate"));
|
||||
("MODDATE", xmp_date (info "/ModDate"));
|
||||
("PRODUCER", info "/Producer");
|
||||
("CREATOR", info "/Creator");
|
||||
("TITLE", info "/Title");
|
||||
("SUBJECT", info "/Subject");
|
||||
("AUTHOR", info "/Author");
|
||||
("KEYWORDS", info "/Keywords");
|
||||
("TRAPPED", info "/Trapped");
|
||||
("METADATADATE", xmp_date (expand_date "now"))]
|
||||
|
||||
let create_metadata pdf =
|
||||
let xmp = ref xmp_template in
|
||||
List.iter
|
||||
(fun (s, r) -> xmp := string_replace_all s r !xmp)
|
||||
(replacements pdf);
|
||||
set_metadata_from_bytes false (bytes_of_string !xmp) pdf
|
||||
|
||||
(* \section{Blacken text} *)
|
||||
|
||||
(*
|
||||
|
8
cpdf.mli
8
cpdf.mli
@@ -112,7 +112,7 @@ val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
|
||||
|
||||
(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
|
||||
the PDF minor version to [version].*)
|
||||
val set_pdf_info : ?xmp_also:bool -> ?xmp_also_when_present:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
|
||||
val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
|
||||
|
||||
(** [set_pdf_info (key, value, version)] sets the entry [key] in the
|
||||
/ViewerPreferences directory, updating the PDF minor version to [version].*)
|
||||
@@ -195,7 +195,11 @@ val get_metadata : Pdf.t -> Pdfio.bytes option
|
||||
(** Print metadate to stdout *)
|
||||
val print_metadata : Pdf.t -> unit
|
||||
|
||||
val set_metadata_date : Pdf.t -> string -> bool -> Pdf.t
|
||||
(** Set the metadata date *)
|
||||
val set_metadata_date : Pdf.t -> string -> Pdf.t
|
||||
|
||||
(** Create XMP metadata from scratch *)
|
||||
val create_metadata : Pdf.t -> Pdf.t
|
||||
|
||||
(** {2 Stamping} *)
|
||||
|
||||
|
@@ -179,6 +179,7 @@ type op =
|
||||
| ListSpotColours
|
||||
| RemoveClipping
|
||||
| SetMetadataDate of string
|
||||
| CreateMetadata
|
||||
|
||||
let string_of_op = function
|
||||
| CopyFont _ -> "CopyFont"
|
||||
@@ -291,6 +292,7 @@ let string_of_op = function
|
||||
| RemoveTrim -> "RemoveTrim"
|
||||
| RemoveBleed -> "RemoveBleed"
|
||||
| SetMetadataDate _ -> "SetMetadataDate"
|
||||
| CreateMetadata -> "CreateMetadata"
|
||||
|
||||
(* Inputs: filename, pagespec. *)
|
||||
type input_kind =
|
||||
@@ -405,7 +407,6 @@ type args =
|
||||
mutable extract_text_font_size : float option;
|
||||
mutable padwith : string option;
|
||||
mutable alsosetxml : bool;
|
||||
mutable alsosetxmlwhenpresent : bool;
|
||||
mutable justsetxml : bool;
|
||||
mutable gs_malformed : bool}
|
||||
|
||||
@@ -495,7 +496,6 @@ let args =
|
||||
extract_text_font_size = None;
|
||||
padwith = None;
|
||||
alsosetxml = false;
|
||||
alsosetxmlwhenpresent = false;
|
||||
justsetxml = false;
|
||||
gs_malformed = false}
|
||||
|
||||
@@ -577,7 +577,6 @@ let reset_arguments () =
|
||||
args.extract_text_font_size <- None;
|
||||
args.padwith <- None;
|
||||
args.alsosetxml <- false;
|
||||
args.alsosetxmlwhenpresent <- false;
|
||||
args.justsetxml <- false
|
||||
(* Do not reset original_filename or cpdflin or was_encrypted or
|
||||
* was_decrypted_with_owner or recrypt or producer or creator or
|
||||
@@ -652,7 +651,7 @@ let banned banlist = function
|
||||
AddText _|ScaleContents _|AttachFile _|CopyAnnotations _|SetMetadata _|
|
||||
ThinLines _|SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _|
|
||||
SetModify _|SetCreator _|SetProducer _|SetVersion _|RemoveDictEntry _ |
|
||||
RemoveClipping | SetMetadataDate _ ->
|
||||
RemoveClipping | SetMetadataDate _ | CreateMetadata ->
|
||||
mem Pdfcrypt.NoEdit banlist
|
||||
|
||||
let operation_allowed pdf banlist op =
|
||||
@@ -1591,9 +1590,6 @@ let sethardbox box =
|
||||
let setalsosetxml () =
|
||||
args.alsosetxml <- true
|
||||
|
||||
let setalsosetxmlwhenpresent () =
|
||||
args.alsosetxmlwhenpresent <- true
|
||||
|
||||
let setjustsetxml () =
|
||||
args.justsetxml <- true
|
||||
|
||||
@@ -2026,12 +2022,12 @@ and specs =
|
||||
("-also-set-xml",
|
||||
Arg.Unit setalsosetxml,
|
||||
" Also set XML metadata");
|
||||
("-also-set-xml-when-present",
|
||||
Arg.Unit setalsosetxmlwhenpresent,
|
||||
" Also set XML metadata, but only if field already present");
|
||||
("-just-set-xml",
|
||||
Arg.Unit setjustsetxml,
|
||||
" Just set XML metadata, not old-fashioned metadata");
|
||||
("-create-metadata",
|
||||
Arg.Unit (setop CreateMetadata),
|
||||
" Create XML metadata from scratch.");
|
||||
("-set-page-layout",
|
||||
Arg.String setpagelayout,
|
||||
" Set page layout upon document opening");
|
||||
@@ -3334,32 +3330,6 @@ let remove_clipping pdf range =
|
||||
in
|
||||
Cpdf.process_pages remove_clipping_page pdf range
|
||||
|
||||
let change_font_size_ops (r, g, b) dx dy source_size target_size pdf resources content =
|
||||
let ops = Pdfops.parse_operators pdf resources content in
|
||||
let tr = Pdftransform.mktranslate dx dy in
|
||||
let rec process a = function
|
||||
Pdfops.Op_Tf (fontname, size)::t when fabs (size -. source_size) < 0.01 ->
|
||||
process
|
||||
(Pdfops.Op_rg (r, g, b)::Pdfops.Op_Tf (fontname, target_size)::Pdfops.Op_cm tr::a)
|
||||
t
|
||||
| h::t -> process (h::a) t
|
||||
| [] -> rev a
|
||||
in
|
||||
[Pdfops.stream_of_ops (process [] ops)]
|
||||
|
||||
let change_font_size pdf range (r, g, b) dx dy source_size target_size =
|
||||
let change_font_size_page _ page =
|
||||
let content' =
|
||||
change_font_size_ops
|
||||
(r, g, b) dx dy source_size target_size pdf
|
||||
page.Pdfpage.resources page.Pdfpage.content
|
||||
in
|
||||
Cpdf.process_xobjects
|
||||
pdf page (change_font_size_ops (r, g, b) dx dy source_size target_size);
|
||||
{page with Pdfpage.content = content'}
|
||||
in
|
||||
Cpdf.process_pages change_font_size_page pdf range
|
||||
|
||||
(* Main function *)
|
||||
let go () =
|
||||
match args.op with
|
||||
@@ -3751,11 +3721,10 @@ let go () =
|
||||
write_pdf false
|
||||
(Cpdf.set_pdf_info
|
||||
~xmp_also:args.alsosetxml
|
||||
~xmp_also_when_present:args.alsosetxmlwhenpresent
|
||||
~xmp_just_set:args.justsetxml
|
||||
(key, value, version) pdf)
|
||||
| Some (SetMetadataDate date) ->
|
||||
write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date args.alsosetxmlwhenpresent)
|
||||
write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date)
|
||||
| Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _
|
||||
| FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) ->
|
||||
begin match args.out with
|
||||
@@ -4200,6 +4169,9 @@ let go () =
|
||||
let pdf = get_single_pdf args.op false in
|
||||
let range = parse_pagespec pdf (get_pagespec ()) in
|
||||
write_pdf false (remove_clipping pdf range)
|
||||
| Some CreateMetadata ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
write_pdf false (Cpdf.create_metadata pdf)
|
||||
|
||||
let parse_argv () =
|
||||
if args.debug then
|
||||
|
@@ -10,7 +10,12 @@
|
||||
%FIXME: Activate documentation for -extract-images (when done)
|
||||
%FIXME: Document new -artbox, -trimbox, -bleedbox and -remove-artbox, -remove-trimbox, -remove-bleedbox
|
||||
%FIXME: Document -cropbox and -remove-cropbox as synonyms of -crop and -remove-crop
|
||||
%FIXME: Document new XMP metadata stuff including setmetadata date and its format
|
||||
%FIXME: Document new XMP metadata stuff including setmetadata date and its format
|
||||
%FIXME: Document new -gs-malformed flag.
|
||||
%FIXME: Document new -create-metadata
|
||||
%FIXME: Document -remove-clipping
|
||||
%FIXME: Document new -list-spot-colours
|
||||
|
||||
\documentclass{book}
|
||||
\usepackage{palatino}
|
||||
\usepackage{microtype}
|
||||
|
Reference in New Issue
Block a user