diff --git a/Makefile b/Makefile
index 20e857f..3d9ebbb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# Build the cpdf command line tools and top level
MODS = cpdfyojson cpdfxmlm \
cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
- cpdfpagespec cpdfposition cpdf cpdfpresent cpdffont cpdftype \
+ cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdf cpdffont cpdftype \
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfcommand
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
diff --git a/cpdf.ml b/cpdf.ml
index b01d95f..53ea33c 100644
--- a/cpdf.ml
+++ b/cpdf.ml
@@ -10,67 +10,11 @@ type color =
let debug = ref false
-let xmp_template =
-{|
-
-
-
-
- CREATEDATE
- MODDATE
- PRODUCER
- CREATOR
- TITLE
- SUBJECT
- AUTHOR
- KEYWORDS
- TRAPPED
-
-
-
- CREATEDATE
- CREATOR
- MODDATE
- METADATADATE
-
-
-
- TITLE
-
-
-
-
-|}
-
-type encoding =
- | Raw
- | UTF8
- | Stripped
-
-(* Just strip everything which isn't 7 bit ASCII *)
-let crude_de_unicode s =
- implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s)))
-
-let encode_output enc s =
- match enc with
- | Raw -> s
- | UTF8 -> Pdftext.utf8_of_pdfdocstring s
- | Stripped -> crude_de_unicode s
-
(* Get the number of pages in file. Doesn't need decryption. *)
let endpage_io ?revision i user_pw owner_pw =
let pdf = Pdfread.pdf_of_input_lazy ?revision user_pw owner_pw i in
Pdfpage.endpage pdf
-
-
let print_pdf_objs pdf =
Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict);
Printf.printf "Root: %i\n" pdf.Pdf.root;
@@ -103,14 +47,6 @@ let rec process_text time text m =
| [] -> Cpdfstrftime.strftime ~time text
| (s, r)::t -> process_text time (string_replace_all_lazy s r text) t
-let expand_date = function
- | "now" ->
- begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with
- | Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S"
- | _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S"
- end
- | x -> x
-
(* For uses of process_pages which don't need to deal with matrices, this
function transforms into one which returns the identity matrix *)
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
@@ -205,17 +141,6 @@ let combine_pdf_resources pdf a b =
(Pdf.Dictionary [])
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)
-(* \section{Copy an /ID from one file to another} *)
-let copy_id keepversion copyfrom copyto =
- match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with
- | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *)
- | Some id ->
- copyto.Pdf.trailerdict <-
- Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id;
- copyto.Pdf.minor <-
- if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1;
- copyto
-
(* \section{Remove bookmarks} *)
(* \section{Add bookmarks} *)
@@ -377,155 +302,6 @@ let add_bookmarks ~json verify input pdf =
(*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*)
Pdfmarks.add_bookmarks parsed pdf
-(* \section{Set page mode} *)
-let set_page_mode pdf s =
- match s with
- | "UseNone" | "UseOutlines" | "UseThumbs"
- | "FullScreen" | "UseOC" | "UseAttachments" ->
- begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
- | Some catalog ->
- let catalog' =
- Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s))
- in
- let catalognum = Pdf.addobj pdf catalog' in
- let trailerdict' =
- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
- in
- {pdf with
- Pdf.root = catalognum;
- Pdf.trailerdict = trailerdict'}
- | None -> error "bad root"
- end
- | _ -> error "Unknown page mode"
-
-(* Set open action *)
-let set_open_action pdf fit pagenumber =
- if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then
- raise (error "set_open_action: invalid page number")
- else
- let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in
- let destination =
- if fit then
- Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"]
- else
- Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null]
- in
- let open_action =
- Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")]
- in
- match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
- | Some catalog ->
- let catalog' =
- Pdf.add_dict_entry catalog "/OpenAction" open_action
- in
- let catalognum = Pdf.addobj pdf catalog' in
- let trailerdict' =
- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
- in
- {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'}
- | None -> error "bad root"
-
-(* \section{Set viewer preferences} *)
-let set_viewer_preference (key, value, version) pdf =
- match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
- | Some catalog ->
- let viewer_preferences =
- match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
- | Some d -> d
- | None -> Pdf.Dictionary []
- in
- let viewer_preferences' =
- Pdf.add_dict_entry viewer_preferences key value
- in
- let catalog' =
- Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences'
- in
- let catalognum = Pdf.addobj pdf catalog' in
- let trailerdict' =
- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
- in
- {pdf with
- Pdf.minor = max pdf.Pdf.minor version;
- Pdf.root = catalognum;
- Pdf.trailerdict = trailerdict'}
- | None -> error "bad root"
-
-
-
-(* \section{Set page layout} *)
-let set_page_layout pdf s =
- match s with
- | "SinglePage" | "OneColumn" | "TwoColumnLeft"
- | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" ->
- begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
- | Some catalog ->
- let catalog' =
- Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s))
- in
- let catalognum = Pdf.addobj pdf catalog' in
- let trailerdict' =
- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
- in
- {pdf with
- Pdf.root = catalognum;
- Pdf.trailerdict = trailerdict'}
- | None -> error "bad root"
- end
- | _ -> error "Unknown page layout"
-
-(* \section{Set or replace metadata} *)
-let set_metadata_from_bytes keepversion data pdf =
- let metadata_stream =
- Pdf.Stream
- {contents =
- (Pdf.Dictionary
- ["/Length", Pdf.Integer (bytes_size data);
- "/Type", Pdf.Name "/Metadata";
- "/Subtype", Pdf.Name "/XML"],
- Pdf.Got data)}
- in
- let objnum = Pdf.addobj pdf metadata_stream in
- let document_catalog =
- match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
- | Some s -> s
- | None -> error "Malformed PDF: No root."
- in
- let document_catalog' =
- Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum)
- in
- let rootnum = Pdf.addobj pdf document_catalog' in
- let trailerdict =
- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)
- in
- {pdf with
- Pdf.trailerdict = trailerdict;
- Pdf.root = rootnum;
- Pdf.minor =
- if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor}
-
-let set_metadata keepversion filename pdf =
- let ch = open_in_bin filename in
- let data = mkbytes (in_channel_length ch) in
- for x = 0 to bytes_size data - 1 do
- bset data x (input_byte ch)
- done;
- set_metadata_from_bytes keepversion data pdf
-
-
-
-(* \section{Remove metadata} *)
-let remove_metadata pdf =
- match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
- | None -> error "malformed file"
- | Some root ->
- let root' = Pdf.remove_dict_entry root "/Metadata" in
- let rootnum = Pdf.addobj pdf root' in
- {pdf with
- Pdf.trailerdict =
- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum);
- Pdf.root =
- rootnum}
-
(* List bookmarks *)
let output_string_of_target pdf fastrefnums x =
match Pdfdest.pdfobject_of_destination x with
@@ -584,9 +360,9 @@ let list_bookmarks ~json encoding range pdf output =
replace q bs q (replace nl bs n (replace bs bs bs codepoints))
in
match encoding with
- | UTF8 -> Pdftext.utf8_of_codepoints escaped
- | Stripped -> process_stripped escaped
- | Raw -> s
+ | Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped
+ | Cpdfmetadata.Stripped -> process_stripped escaped
+ | Cpdfmetadata.Raw -> s
in
let bookmarks = Pdfmarks.read_bookmarks pdf in
let refnums = Pdf.page_reference_numbers pdf in
@@ -719,27 +495,6 @@ let hasbox pdf page boxname =
| _ -> false
-(* Print metadata *)
-let get_metadata pdf =
- match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
- | None -> error "malformed file"
- | Some root ->
- match Pdf.lookup_direct pdf "/Metadata" root with
- | Some ((Pdf.Stream _) as s) ->
- Pdfcodec.decode_pdfstream pdf s;
- begin match s with
- | Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data
- | _ -> assert false
- end
- | _ -> None
-
-let print_metadata pdf =
- match get_metadata pdf with
- None -> ()
- | Some data ->
- for x = 0 to bytes_size data - 1 do
- Printf.printf "%c" (char_of_int (bget data x))
- done
(* List fonts *)
let list_font pdf page (name, dict) =
@@ -1728,7 +1483,7 @@ let stamp relative_to_cropbox position topline midline fast scale_to_fit isover
let merged =
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
in
- let merged = copy_id true pdf merged in
+ let merged = Cpdfmetadata.copy_id true pdf merged in
let merged_pages = Pdfpage.pages_of_pagetree merged in
let under_pages, over_page =
all_but_last merged_pages, last merged_pages
@@ -1819,7 +1574,7 @@ let stamp_as_xobject pdf range over =
let merged =
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
in
- let merged = copy_id true pdf merged in
+ let merged = Cpdfmetadata.copy_id true pdf merged in
let merged_pages = Pdfpage.pages_of_pagetree merged in
let under_pages, over_page =
all_but_last merged_pages, last merged_pages
@@ -2185,7 +1940,7 @@ let scale_contents ?(fast=false) position scale pdf range =
(* \section{List annotations} *)
let get_annotation_string encoding pdf annot =
match Pdf.lookup_direct pdf "/Contents" annot with
- | Some (Pdf.String s) -> encode_output encoding s
+ | Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s
| _ -> ""
let print_annotation encoding pdf num s =
@@ -2696,415 +2451,6 @@ let twoup fast pdf =
let pdf = upright all (rotate_pdf ~-90 pdf all) in
scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all
-(* \section{Output info} *)
-let get_info raw pdf =
- let infodict =
- match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
- | Some infodict -> infodict
- | _ -> Pdf.Dictionary []
- in
- let getstring name =
- match Pdf.lookup_direct pdf name infodict with
- | Some (Pdf.String s) ->
- if raw then s else crude_de_unicode s
- | Some (Pdf.Boolean false) -> "False"
- | Some (Pdf.Boolean true) -> "True"
- | _ -> if name = "/Trapped" then "False" else ""
- in
- getstring
-
-let get_info_utf8 pdf =
- let infodict =
- match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
- | Some infodict -> infodict
- | _ -> Pdf.Dictionary []
- in
- (function name ->
- match Pdf.lookup_direct pdf name infodict with
- | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
- | Some (Pdf.Boolean false) -> "False"
- | Some (Pdf.Boolean true) -> "True"
- | _ -> if name = "/Trapped" then "False" else "")
-
-let getstring encoding pdf =
- match encoding with
- | Raw -> get_info true pdf
- | Stripped -> get_info false pdf
- | UTF8 -> get_info_utf8 pdf
-
-let output_info encoding pdf =
- let getstring = getstring encoding pdf in
- Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor;
- Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf);
- Printf.printf "Title: %s\n" (getstring "/Title");
- Printf.printf "Author: %s\n" (getstring "/Author");
- Printf.printf "Subject: %s\n" (getstring "/Subject");
- Printf.printf "Keywords: %s\n" (getstring "/Keywords");
- Printf.printf "Creator: %s\n" (getstring "/Creator");
- Printf.printf "Producer: %s\n" (getstring "/Producer");
- Printf.printf "Created: %s\n" (getstring "/CreationDate");
- Printf.printf "Modified: %s\n" (getstring "/ModDate");
- Printf.printf "Trapped: %s\n" (getstring "/Trapped")
-
-type xmltree =
- E of Cpdfxmlm.tag * xmltree list
- | D of string
-
-let xmltree_of_bytes b =
- let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in
- let el tag childs = E (tag, childs)
- and data d = D d in
- Cpdfxmlm.input_doc_tree ~el ~data i
-
-let bytes_of_xmltree t =
- let buf = Buffer.create 1024 in
- let o = Cpdfxmlm.make_output (`Buffer buf) in
- let frag = function
- E (tag, childs) -> `El (tag, childs)
- | D d -> `Data d
- in
- Cpdfxmlm.output_doc_tree frag o t;
- bytes_of_string (Buffer.contents buf)
-
-let rec string_of_xmltree = function
- D d ->
- Printf.sprintf "DATA {%s}" d
- | E (tag, trees) ->
- Printf.sprintf "ELT (%s, %s)"
- (string_of_tag tag)
- (string_of_xmltrees trees)
-
-and string_of_tag ((n, n'), attributes) =
- Printf.sprintf
- "NAME |%s| |%s|, ATTRIBUTES {%s}" n n'
- (string_of_attributes attributes)
-
-and string_of_attribute ((n, n'), str) =
- Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str
-
-and string_of_attributes attrs =
- fold_left
- (fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs)
-
-and string_of_xmltrees trees =
- fold_left
- (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
-
-let adobe = "http://ns.adobe.com/pdf/1.3/"
-
-let xmp = "http://ns.adobe.com/xap/1.0/"
-
-let dc = "http://purl.org/dc/elements/1.1/"
-
-let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-
-let combine_with_spaces strs =
- String.trim
- (fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs)
-
-(* Collect all
elements inside a seq, bag, or alt. Combine with commas. If
-none found, return empty string instead. *)
-let collect_list_items = function
- E (((n, n'), _), elts) when
- n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag")
- ->
- combine_with_spaces
- (option_map
- (function
- E (((n, n'), _), [D d]) when n = rdf && n' = "li" ->
- Some d
- | _ -> None)
- elts)
- | _ -> ""
-
-let collect_list_items_all all =
- match keep (function E _ -> true | _ -> false) all with
- h::_ -> Some (collect_list_items h)
- | [] -> None
-
-let rec get_data_for namespace name = function
- D _ -> None
- | E (((n, n'), _), [D d]) when n = namespace && n' = name ->
- Some d
- | E (((n, n'), _), e) when n = namespace && n' = name ->
- collect_list_items_all e
- | E (_, l) ->
- match option_map (get_data_for namespace name) l with
- x :: _ -> Some x
- | _ -> None
-
-let output_xmp_info encoding pdf =
- let print_out tree title namespace name =
- match get_data_for namespace name tree with
- None -> ()
- | Some data ->
- Printf.printf "%s: " title;
- print_endline data
- in
- match get_metadata pdf with
- None -> ()
- | Some metadata ->
- try
- let dtd, tree = xmltree_of_bytes metadata in
- print_out tree "XMP pdf:Keywords" adobe "Keywords";
- print_out tree "XMP pdf:Producer" adobe "Producer";
- print_out tree "XMP pdf:Trapped" adobe "Trapped";
- print_out tree "XMP pdf:Title" adobe "Title";
- print_out tree "XMP pdf:Creator" adobe "Creator";
- print_out tree "XMP pdf:Subject" adobe "Subject";
- print_out tree "XMP pdf:Author" adobe "Author";
- print_out tree "XMP pdf:CreationDate" adobe "CreationDate";
- print_out tree "XMP pdf:ModDate" adobe "ModDate";
- print_out tree "XMP xmp:CreateDate" xmp "CreateDate";
- print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool";
- print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate";
- print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate";
- print_out tree "XMP dc:title" dc "title";
- print_out tree "XMP dc:creator" dc "creator";
- print_out tree "XMP dc:subject" dc "subject";
- print_out tree "XMP dc:description" dc "description"
- with
- _ -> ()
-
-(* Get XMP info equivalent of an old metadata field *)
-let check = function
- "/Title" -> [(adobe, "Title"); (dc, "title")]
-| "/Author" -> [(adobe, "Author"); (dc, "creator")]
-| "/Subject" -> [(adobe, "Subject"); (dc, "subject")]
-| "/Keywords" -> [(adobe, "Keywords")]
-| "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")]
-| "/Producer" -> [(adobe, "Producer")]
-| "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")]
-| "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")]
-| _ -> failwith "Cpdf.check_name not known"
-
-let get_xmp_info pdf name =
- let tocheck = check name in
- match get_metadata pdf with
- None -> ""
- | Some metadata ->
- try
- let _, tree = xmltree_of_bytes metadata in
- let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in
- match lose (eq "") results with
- x::_ -> x
- | [] -> ""
- with
- _ -> ""
-
-(* Set XMP info *)
-let rec set_xml_field kind fieldname value = function
- D data -> D data
-| E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e *)
- E (((n, n'), m), [D value])
-| E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts)
-
-let set_pdf_info_xml kind fieldname value xmldata pdf =
- let dtd, tree = xmltree_of_bytes xmldata in
- let str =
- match value with
- Pdf.String s -> s
- | Pdf.Boolean true -> "True"
- | Pdf.Boolean false -> "False"
- | _ -> failwith "set_pdf_info_xml: not a string"
- in
- let newtree = set_xml_field kind fieldname str tree in
- bytes_of_xmltree (dtd, newtree)
-
-let set_pdf_info_xml_many changes value xmldata pdf =
- let xmldata = ref xmldata in
- iter
- (fun (kind, fieldname) ->
- xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf)
- changes;
- !xmldata
-
-
-(* \section{Set an entry in the /Info dictionary} *)
-
-(* We must parse the date to get its components, then use strftime to build the
- * new string in XMP format *)
-
-type date =
- {mutable year : int;
- mutable month : int; (* 1 - 12 *)
- mutable day : int; (* 1 - 31 *)
- mutable hour : int; (* 0 - 23 *)
- mutable minute : int; (* 0 - 59 *)
- mutable second : int; (* 0 - 59 *)
- mutable ut_relationship : int; (* -1, 0, +1 *)
- mutable offset_hours : int; (* 0 - 59 *)
- mutable offset_minutes : int (* 0 - 59 *)}
-
-let default_date () =
- {year = 0;
- month = 1;
- day = 1;
- hour = 0;
- minute = 0;
- second = 0;
- ut_relationship = 0;
- offset_hours = 0;
- offset_minutes = 0}
-
-(* XMP date format is YYYY-MM-DDThh:mm:ssTZD *)
-let make_xmp_date_from_components d =
- let tzd =
- if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else
- (if d.ut_relationship >=0 then "+" else "-") ^
- Printf.sprintf "%02i" d.offset_hours ^
- ":" ^
- Printf.sprintf "%02i" d.offset_minutes
- in
- Cpdfstrftime.strftime
- ~time:{Cpdfstrftime._tm_sec = d.second;
- Cpdfstrftime._tm_min = d.minute;
- Cpdfstrftime._tm_hour = d.hour;
- Cpdfstrftime._tm_mday = d.day;
- Cpdfstrftime._tm_mon = d.month - 1;
- Cpdfstrftime._tm_year = d.year - 1900;
- Cpdfstrftime._tm_wday = 0;
- Cpdfstrftime._tm_yday = 0;
- Cpdfstrftime._tm_isdst = false}
- "%Y-%m-%dT%H:%M:%S"
- ^
- tzd
-
-let xmp_date date =
- let d = default_date () in
- try
- match explode date with
- 'D'::':'::r ->
- begin match r with
- y1::y2::y3::y4::r ->
- d.year <- int_of_string (implode [y1; y2; y3; y4]);
- begin match r with
- m1::m2::r ->
- d.month <- int_of_string (implode [m1; m2]);
- begin match r with
- d1::d2::r ->
- d.day <- int_of_string (implode [d1; d2]);
- begin match r with
- h1::h2::r ->
- d.hour <- int_of_string (implode [h1; h2]);
- begin match r with
- m1::m2::r ->
- d.minute <- int_of_string (implode [m1; m2]);
- begin match r with
- s1::s2::r ->
- d.second <- int_of_string (implode [s1; s2]);
- begin match r with
- o::r ->
- d.ut_relationship <-
- if o = '+' then 1 else
- if o = '-' then -1 else
- 0;
- begin match r with
- h1::h2::'\''::r ->
- d.offset_hours <- int_of_string (implode [h1; h2]);
- begin match r with
- m1::m2::_ ->
- d.offset_minutes <- int_of_string (implode [m1; m2]);
- raise Exit
- | _ -> raise Exit
- end
- | _ -> raise Exit
- end
- | _ -> raise Exit
- end
- | _ -> raise Exit
- end
- | _ -> raise Exit
- end
- | _ -> raise Exit
- end
- | _ -> raise Exit
- end
- | _ -> raise Exit
- end
- | _ ->
- Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date;
- make_xmp_date_from_components d
- end
- | _ ->
- Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date;
- make_xmp_date_from_components d
- with
- Exit -> make_xmp_date_from_components d
-
-let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf =
- let infodict =
- match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
- | Some d -> d
- | None -> Pdf.Dictionary []
- in
- let infodict' = Pdf.add_dict_entry infodict key value in
- let objnum = Pdf.addobj pdf infodict' in
- if not xmp_just_set then
- begin
- pdf.Pdf.trailerdict <-
- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum);
- pdf.Pdf.minor <-
- max pdf.Pdf.minor version
- end;
- if xmp_also then
- begin match get_metadata pdf with
- None -> pdf
- | Some xmldata ->
- let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in
- let changes, value =
- match key with
- | "/Producer" -> [(adobe, "Producer")], value
- | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value
- | "/Author" -> [(adobe, "Author")], value
- | "/Title" -> [(adobe, "Title"); (dc, "title")], value
- | "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value
- | "/Keywords" -> [(adobe, "Keywords")], value
- | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value
- | "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value
- | "/Trapped" -> [(adobe, "Trapped")], value
- | _ -> failwith "Unknown call to set_pdf_info"
- in
- set_metadata_from_bytes
- true
- (set_pdf_info_xml_many changes value xmldata pdf)
- pdf
- end
- else
- pdf
-
-(* Set metadata date *)
-let set_metadata_date pdf date =
- match get_metadata pdf with
- None -> pdf
- | Some xmldata ->
- let changes= [(xmp, "MetadataDate")] in
- let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in
- set_metadata_from_bytes
- true
- (set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf)
- pdf
-
-let replacements pdf =
- let info = get_info_utf8 pdf in
- [("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i));
- ("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i));
- ("PRODUCER", info "/Producer");
- ("CREATOR", info "/Creator");
- ("TITLE", info "/Title");
- ("SUBJECT", info "/Subject");
- ("AUTHOR", info "/Author");
- ("KEYWORDS", info "/Keywords");
- ("TRAPPED", info "/Trapped");
- ("METADATADATE", xmp_date (expand_date "now"))]
-
-let create_metadata pdf =
- let xmp = ref xmp_template in
- iter
- (fun (s, r) -> xmp := string_replace_all s r !xmp)
- (replacements pdf);
- set_metadata_from_bytes false (bytes_of_string !xmp) pdf
-
(* \section{Blacken text} *)
(*
@@ -3497,9 +2843,6 @@ let draft onlyremove boxes range pdf =
pagenums;
Pdfpage.change_pages true !pdf (rev !pages')
-let set_version v pdf =
- pdf.Pdf.minor <- v
-
let blank_document width height pages =
let pdf_pages =
map (fun () -> Pdfpage.blankpage (Pdfpaper.make Pdfunits.PdfPoint width height)) (many () pages)
@@ -4022,7 +3365,7 @@ let list_spot_colours pdf =
let add_bookmark_title filename use_title pdf =
let title =
if use_title then
- match get_info_utf8 pdf "/Title", get_xmp_info pdf "/Title" with
+ match Cpdfmetadata.get_info_utf8 pdf "/Title", Cpdfmetadata.get_xmp_info pdf "/Title" with
"", x | x, "" | _, x -> x
else
Filename.basename filename
@@ -4062,13 +3405,13 @@ let create_pdf pages pagesize =
(* Remove characters which might not make good filenames. *)
let remove_unsafe_characters encoding s =
- if encoding = Raw then s else
+ if encoding = Cpdfmetadata.Raw then s else
let chars =
lose
(function x ->
match x with
'/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true
- | x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Stripped) -> true
+ | x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Cpdfmetadata.Stripped) -> true
| _ -> false)
(explode s)
in
diff --git a/cpdf.mli b/cpdf.mli
index c02a444..5326cac 100644
--- a/cpdf.mli
+++ b/cpdf.mli
@@ -1,13 +1,6 @@
(** Coherent PDF Tools Core Routines *)
open Pdfutil
-(** {2 Types and Exceptions} *)
-
-(** Possible output encodings for some function. [Raw] does no processing at
-all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8.
-[Stripped] extracts the unicode codepoints and returns only those which
-correspond to 7 bit ASCII. *)
-type encoding = Raw | UTF8 | Stripped
type color =
Grey of float
@@ -38,45 +31,6 @@ val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list
val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t
-(** {2 Metadata and settings} *)
-
-(** [copy_id keepversion copyfrom copyto] copies the ID, if any, from
-[copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto]
-won't be affected. *)
-val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
-
-(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
-the PDF minor version to [version].*)
-val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
-
-val get_xmp_info : Pdf.t -> string -> string
-
-(** [set_pdf_info (key, value, version)] sets the entry [key] in the
-/ViewerPreferences directory, updating the PDF minor version to [version].*)
-val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
-
-(** Set the page layout to the given name (sans slash) e.g SinglePage *)
-val set_page_layout : Pdf.t -> string -> Pdf.t
-
-(** Set the page layout to the given name (sans slash) e.g SinglePage *)
-val set_page_mode : Pdf.t -> string -> Pdf.t
-
-(** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *)
-val set_open_action : Pdf.t -> bool -> int -> Pdf.t
-
-(** Set the PDF version number *)
-val set_version : int -> Pdf.t -> unit
-
-(** Given a PDF, returns a function which can lookup a given dictionary entry
-from the /Info dictionary, returning it as a UTF8 string *)
-val get_info_utf8 : Pdf.t -> string -> string
-
-(** Output to standard output general information about a PDF. *)
-val output_info : encoding -> Pdf.t -> unit
-
-(** Output to standard output information from any XMP metadata stream in a PDF. *)
-val output_xmp_info : encoding -> Pdf.t -> unit
-
(** {2 Bookmarks} *)
(** [parse_bookmark_file verify pdf input] parses the bookmark file in [input].
@@ -90,30 +44,9 @@ val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
output in the format specified in cpdfmanual.pdf *)
-val list_bookmarks : json:bool -> encoding -> int list -> Pdf.t -> Pdfio.output -> unit
+val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit
-(** {2 XML Metadata} *)
-(** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *)
-val set_metadata : bool -> string -> Pdf.t -> Pdf.t
-
-(** The same, but the content comes from [bytes]. *)
-val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t
-
-(** Remove the metadata from a file *)
-val remove_metadata : Pdf.t -> Pdf.t
-
-(** Extract metadata to a [Pdfio.bytes] *)
-val get_metadata : Pdf.t -> Pdfio.bytes option
-
-(** Print metadate to stdout *)
-val print_metadata : Pdf.t -> unit
-
-(** Set the metadata date *)
-val set_metadata_date : Pdf.t -> string -> Pdf.t
-
-(** Create XMP metadata from scratch *)
-val create_metadata : Pdf.t -> Pdf.t
(** {2 Stamping} *)
@@ -145,9 +78,6 @@ val list_fonts : Pdf.t -> int list -> (int * string * string * string * string)
(** {2 Adding text} *)
-(** Expand the string "now" to a PDF date string, ignoring any other string *)
-val expand_date : string -> string
-
(** Justification of multiline text *)
type justification =
| LeftJustify
@@ -281,10 +211,10 @@ val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t
(** {2 Annotations} *)
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *)
-val list_annotations : json:bool -> encoding -> Pdf.t -> unit
+val list_annotations : json:bool -> Cpdfmetadata.encoding -> Pdf.t -> unit
(** Return the annotations as a (pagenumber, content) list *)
-val get_annotations : encoding -> Pdf.t -> (int * string) list
+val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list
(** Copy the annotations on a given set of pages from a to b. b is returned. *)
val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t
@@ -375,12 +305,12 @@ val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t
val create_pdf : int -> Pdfpaper.t -> Pdf.t
-val name_of_spec : encoding ->
+val name_of_spec : Cpdfmetadata.encoding ->
Pdfmarks.t list ->
Pdf.t -> int -> string -> int -> string -> int -> int -> string
val extract_images : string ->
string ->
- encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
+ Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
diff --git a/cpdfcommand.ml b/cpdfcommand.ml
index 3a3fce4..d1c5a90 100644
--- a/cpdfcommand.ml
+++ b/cpdfcommand.ml
@@ -417,7 +417,7 @@ type args =
mutable retain_numbering : bool;
mutable remove_duplicate_fonts : bool;
mutable remove_duplicate_streams : bool;
- mutable encoding : Cpdf.encoding;
+ mutable encoding : Cpdfmetadata.encoding;
mutable scale : float;
mutable copyfontpage : int;
mutable copyfontname : string option;
@@ -536,7 +536,7 @@ let args =
retain_numbering = false;
remove_duplicate_fonts = false;
remove_duplicate_streams = false;
- encoding = Cpdf.Stripped;
+ encoding = Cpdfmetadata.Stripped;
scale = 1.;
copyfontpage = 1;
copyfontname = None;
@@ -655,7 +655,7 @@ let reset_arguments () =
args.retain_numbering <- false;
args.remove_duplicate_fonts <- false;
args.remove_duplicate_streams <- false;
- args.encoding <- Cpdf.Stripped;
+ args.encoding <- Cpdfmetadata.Stripped;
args.scale <- 1.;
args.copyfontpage <- 1;
args.copyfontname <- None;
@@ -1779,13 +1779,13 @@ and specs =
Arg.Unit setrecrypt,
" Keep this file's encryption when writing");
("-raw",
- Arg.Unit (setencoding Cpdf.Raw),
+ Arg.Unit (setencoding Cpdfmetadata.Raw),
" Do not process text");
("-stripped",
- Arg.Unit (setencoding Cpdf.Stripped),
+ Arg.Unit (setencoding Cpdfmetadata.Stripped),
" Process text by simple stripping to ASCII");
("-utf8",
- Arg.Unit (setencoding Cpdf.UTF8),
+ Arg.Unit (setencoding Cpdfmetadata.UTF8),
" Process text by conversion to UTF8 Unicode");
("-fast",
Arg.Unit setfast,
@@ -2724,15 +2724,15 @@ let unescape_octals s =
implode (unescape_octals [] (explode s))
let process s =
- if args.encoding <> Cpdf.Raw
+ if args.encoding <> Cpdfmetadata.Raw
then Pdftext.pdfdocstring_of_utf8 s
else unescape_octals s
let set_producer s pdf =
- ignore (Cpdf.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf)
+ ignore (Cpdfmetadata.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf)
let set_creator s pdf =
- ignore (Cpdf.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf)
+ ignore (Cpdfmetadata.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf)
let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outname =
if args.producer <> None then set_producer (unopt args.producer) pdf;
@@ -3146,8 +3146,8 @@ let go () =
if inname <> "" then
Printf.printf "Linearized: %b\n" (Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname)));
let pdf = decrypt_if_necessary input (Some Info) pdf in
- Cpdf.output_info args.encoding pdf;
- Cpdf.output_xmp_info args.encoding pdf
+ Cpdfmetadata.output_info args.encoding pdf;
+ Cpdfmetadata.output_xmp_info args.encoding pdf
| Some PageInfo ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
@@ -3157,7 +3157,7 @@ let go () =
| _ -> error "list-bookmarks: bad command line"
end
| Some Metadata ->
- Cpdf.print_metadata (get_single_pdf (Some Metadata) true)
+ Cpdfmetadata.print_metadata (get_single_pdf (Some Metadata) true)
| Some Fonts ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
@@ -3357,14 +3357,14 @@ let go () =
| SetCreate _ | SetModify _ | SetCreator _ | SetProducer _
| SetTrapped | SetUntrapped) as op) ->
let key, value, version =
- let f s = if args.encoding <> Cpdf.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in
+ let f s = if args.encoding <> Cpdfmetadata.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in
match op with
| SetAuthor s -> "/Author", Pdf.String (f s), 0
| SetTitle s -> "/Title", Pdf.String (f s), 1
| SetSubject s -> "/Subject", Pdf.String (f s), 1
| SetKeywords s -> "/Keywords", Pdf.String (f s), 1
- | SetCreate s -> "/CreationDate", Pdf.String (Cpdf.expand_date s), 0
- | SetModify s -> "/ModDate", Pdf.String (Cpdf.expand_date s), 0
+ | SetCreate s -> "/CreationDate", Pdf.String (Cpdfmetadata.expand_date s), 0
+ | SetModify s -> "/ModDate", Pdf.String (Cpdfmetadata.expand_date s), 0
| SetCreator s -> "/Creator", Pdf.String (f s), 0
| SetProducer s -> "/Producer", Pdf.String (f s), 0
| SetTrapped -> "/Trapped", Pdf.Boolean true, 3
@@ -3374,12 +3374,12 @@ let go () =
let pdf = get_single_pdf args.op false in
let version = if args.keepversion then pdf.Pdf.minor else version in
write_pdf false
- (Cpdf.set_pdf_info
+ (Cpdfmetadata.set_pdf_info
~xmp_also:args.alsosetxml
~xmp_just_set:args.justsetxml
(key, value, version) pdf)
| Some (SetMetadataDate date) ->
- write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date)
+ write_pdf false (Cpdfmetadata.set_metadata_date (get_single_pdf args.op false) date)
| Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _
| FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) ->
begin match args.out with
@@ -3396,20 +3396,20 @@ let go () =
in
let pdf = get_single_pdf args.op false in
let version = if args.keepversion then pdf.Pdf.minor else version in
- write_pdf false (Cpdf.set_viewer_preference (key, value, version) pdf)
+ write_pdf false (Cpdfmetadata.set_viewer_preference (key, value, version) pdf)
end
| Some (OpenAtPage str) ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf str in
let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
- write_pdf false (Cpdf.set_open_action pdf false n)
+ write_pdf false (Cpdfmetadata.set_open_action pdf false n)
| Some (OpenAtPageFit str) ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf str in
let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
- write_pdf false (Cpdf.set_open_action pdf true n)
+ write_pdf false (Cpdfmetadata.set_open_action pdf true n)
| Some (SetMetadata metadata_file) ->
- write_pdf false (Cpdf.set_metadata args.keepversion metadata_file (get_single_pdf args.op false))
+ write_pdf false (Cpdfmetadata.set_metadata args.keepversion metadata_file (get_single_pdf args.op false))
| Some (SetVersion v) ->
let pdf = get_single_pdf args.op false in
let pdf =
@@ -3419,9 +3419,9 @@ let go () =
in
write_pdf false pdf
| Some (SetPageLayout s) ->
- write_pdf false (Cpdf.set_page_layout (get_single_pdf args.op false) s)
+ write_pdf false (Cpdfmetadata.set_page_layout (get_single_pdf args.op false) s)
| Some (SetPageMode s) ->
- write_pdf false (Cpdf.set_page_mode (get_single_pdf args.op false) s)
+ write_pdf false (Cpdfmetadata.set_page_mode (get_single_pdf args.op false) s)
| Some Split ->
begin match args.inputs, args.out with
| [(f, ranges, _, _, _, _)], File output_spec ->
@@ -3514,7 +3514,7 @@ let go () =
begin match args.inputs with
| [(k, _, u, o, _, _) as input] ->
let pdf =
- Cpdf.copy_id
+ Cpdfmetadata.copy_id
args.keepversion
(pdfread_pdf_of_file (optstring u) (optstring o) getfrom)
(get_pdf_from_input_kind input args.op k)
@@ -3765,7 +3765,7 @@ let go () =
args.recrypt <- false;
write_pdf false (get_single_pdf args.op false)
| Some RemoveMetadata ->
- write_pdf false (Cpdf.remove_metadata (get_single_pdf args.op false))
+ write_pdf false (Cpdfmetadata.remove_metadata (get_single_pdf args.op false))
| Some ExtractImages ->
let output_spec =
begin match args.out with
@@ -3838,7 +3838,7 @@ let go () =
write_pdf false (Cpdf.remove_clipping pdf range)
| Some CreateMetadata ->
let pdf = get_single_pdf args.op false in
- write_pdf false (Cpdf.create_metadata pdf)
+ write_pdf false (Cpdfmetadata.create_metadata pdf)
| Some EmbedMissingFonts ->
let fi =
match args.inputs with
diff --git a/cpdfmetadata.ml b/cpdfmetadata.ml
new file mode 100644
index 0000000..220c361
--- /dev/null
+++ b/cpdfmetadata.ml
@@ -0,0 +1,661 @@
+open Pdfutil
+open Pdfio
+open Cpdferror
+
+type encoding =
+ | Raw
+ | UTF8
+ | Stripped
+
+(* Just strip everything which isn't 7 bit ASCII *)
+let crude_de_unicode s =
+ implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s)))
+
+let encode_output enc s =
+ match enc with
+ | Raw -> s
+ | UTF8 -> Pdftext.utf8_of_pdfdocstring s
+ | Stripped -> crude_de_unicode s
+
+let xmp_template =
+{|
+
+
+
+
+ CREATEDATE
+ MODDATE
+ PRODUCER
+ CREATOR
+ TITLE
+ SUBJECT
+ AUTHOR
+ KEYWORDS
+ TRAPPED
+
+
+
+ CREATEDATE
+ CREATOR
+ MODDATE
+ METADATADATE
+
+
+
+ TITLE
+
+
+
+
+|}
+
+(* \section{Set or replace metadata} *)
+let set_metadata_from_bytes keepversion data pdf =
+ let metadata_stream =
+ Pdf.Stream
+ {contents =
+ (Pdf.Dictionary
+ ["/Length", Pdf.Integer (bytes_size data);
+ "/Type", Pdf.Name "/Metadata";
+ "/Subtype", Pdf.Name "/XML"],
+ Pdf.Got data)}
+ in
+ let objnum = Pdf.addobj pdf metadata_stream in
+ let document_catalog =
+ match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
+ | Some s -> s
+ | None -> error "Malformed PDF: No root."
+ in
+ let document_catalog' =
+ Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum)
+ in
+ let rootnum = Pdf.addobj pdf document_catalog' in
+ let trailerdict =
+ Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)
+ in
+ {pdf with
+ Pdf.trailerdict = trailerdict;
+ Pdf.root = rootnum;
+ Pdf.minor =
+ if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor}
+
+let set_metadata keepversion filename pdf =
+ let ch = open_in_bin filename in
+ let data = mkbytes (in_channel_length ch) in
+ for x = 0 to bytes_size data - 1 do
+ bset data x (input_byte ch)
+ done;
+ set_metadata_from_bytes keepversion data pdf
+
+
+
+(* \section{Remove metadata} *)
+let remove_metadata pdf =
+ match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
+ | None -> error "malformed file"
+ | Some root ->
+ let root' = Pdf.remove_dict_entry root "/Metadata" in
+ let rootnum = Pdf.addobj pdf root' in
+ {pdf with
+ Pdf.trailerdict =
+ Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum);
+ Pdf.root =
+ rootnum}
+(* Print metadata *)
+let get_metadata pdf =
+ match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
+ | None -> error "malformed file"
+ | Some root ->
+ match Pdf.lookup_direct pdf "/Metadata" root with
+ | Some ((Pdf.Stream _) as s) ->
+ Pdfcodec.decode_pdfstream pdf s;
+ begin match s with
+ | Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data
+ | _ -> assert false
+ end
+ | _ -> None
+
+let print_metadata pdf =
+ match get_metadata pdf with
+ None -> ()
+ | Some data ->
+ for x = 0 to bytes_size data - 1 do
+ Printf.printf "%c" (char_of_int (bget data x))
+ done
+
+
+let get_info raw pdf =
+ let infodict =
+ match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
+ | Some infodict -> infodict
+ | _ -> Pdf.Dictionary []
+ in
+ let getstring name =
+ match Pdf.lookup_direct pdf name infodict with
+ | Some (Pdf.String s) ->
+ if raw then s else crude_de_unicode s
+ | Some (Pdf.Boolean false) -> "False"
+ | Some (Pdf.Boolean true) -> "True"
+ | _ -> if name = "/Trapped" then "False" else ""
+ in
+ getstring
+
+let get_info_utf8 pdf =
+ let infodict =
+ match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
+ | Some infodict -> infodict
+ | _ -> Pdf.Dictionary []
+ in
+ (function name ->
+ match Pdf.lookup_direct pdf name infodict with
+ | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
+ | Some (Pdf.Boolean false) -> "False"
+ | Some (Pdf.Boolean true) -> "True"
+ | _ -> if name = "/Trapped" then "False" else "")
+
+let getstring encoding pdf =
+ match encoding with
+ | Raw -> get_info true pdf
+ | Stripped -> get_info false pdf
+ | UTF8 -> get_info_utf8 pdf
+
+let output_info encoding pdf =
+ let getstring = getstring encoding pdf in
+ Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor;
+ Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf);
+ Printf.printf "Title: %s\n" (getstring "/Title");
+ Printf.printf "Author: %s\n" (getstring "/Author");
+ Printf.printf "Subject: %s\n" (getstring "/Subject");
+ Printf.printf "Keywords: %s\n" (getstring "/Keywords");
+ Printf.printf "Creator: %s\n" (getstring "/Creator");
+ Printf.printf "Producer: %s\n" (getstring "/Producer");
+ Printf.printf "Created: %s\n" (getstring "/CreationDate");
+ Printf.printf "Modified: %s\n" (getstring "/ModDate");
+ Printf.printf "Trapped: %s\n" (getstring "/Trapped")
+
+type xmltree =
+ E of Cpdfxmlm.tag * xmltree list
+ | D of string
+
+let xmltree_of_bytes b =
+ let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in
+ let el tag childs = E (tag, childs)
+ and data d = D d in
+ Cpdfxmlm.input_doc_tree ~el ~data i
+
+let bytes_of_xmltree t =
+ let buf = Buffer.create 1024 in
+ let o = Cpdfxmlm.make_output (`Buffer buf) in
+ let frag = function
+ E (tag, childs) -> `El (tag, childs)
+ | D d -> `Data d
+ in
+ Cpdfxmlm.output_doc_tree frag o t;
+ bytes_of_string (Buffer.contents buf)
+
+let rec string_of_xmltree = function
+ D d ->
+ Printf.sprintf "DATA {%s}" d
+ | E (tag, trees) ->
+ Printf.sprintf "ELT (%s, %s)"
+ (string_of_tag tag)
+ (string_of_xmltrees trees)
+
+and string_of_tag ((n, n'), attributes) =
+ Printf.sprintf
+ "NAME |%s| |%s|, ATTRIBUTES {%s}" n n'
+ (string_of_attributes attributes)
+
+and string_of_attribute ((n, n'), str) =
+ Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str
+
+and string_of_attributes attrs =
+ fold_left
+ (fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs)
+
+and string_of_xmltrees trees =
+ fold_left
+ (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
+
+let adobe = "http://ns.adobe.com/pdf/1.3/"
+
+let xmp = "http://ns.adobe.com/xap/1.0/"
+
+let dc = "http://purl.org/dc/elements/1.1/"
+
+let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+
+let combine_with_spaces strs =
+ String.trim
+ (fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs)
+
+(* Collect all elements inside a seq, bag, or alt. Combine with commas. If
+none found, return empty string instead. *)
+let collect_list_items = function
+ E (((n, n'), _), elts) when
+ n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag")
+ ->
+ combine_with_spaces
+ (option_map
+ (function
+ E (((n, n'), _), [D d]) when n = rdf && n' = "li" ->
+ Some d
+ | _ -> None)
+ elts)
+ | _ -> ""
+
+let collect_list_items_all all =
+ match keep (function E _ -> true | _ -> false) all with
+ h::_ -> Some (collect_list_items h)
+ | [] -> None
+
+let rec get_data_for namespace name = function
+ D _ -> None
+ | E (((n, n'), _), [D d]) when n = namespace && n' = name ->
+ Some d
+ | E (((n, n'), _), e) when n = namespace && n' = name ->
+ collect_list_items_all e
+ | E (_, l) ->
+ match option_map (get_data_for namespace name) l with
+ x :: _ -> Some x
+ | _ -> None
+
+let output_xmp_info encoding pdf =
+ let print_out tree title namespace name =
+ match get_data_for namespace name tree with
+ None -> ()
+ | Some data ->
+ Printf.printf "%s: " title;
+ print_endline data
+ in
+ match get_metadata pdf with
+ None -> ()
+ | Some metadata ->
+ try
+ let dtd, tree = xmltree_of_bytes metadata in
+ print_out tree "XMP pdf:Keywords" adobe "Keywords";
+ print_out tree "XMP pdf:Producer" adobe "Producer";
+ print_out tree "XMP pdf:Trapped" adobe "Trapped";
+ print_out tree "XMP pdf:Title" adobe "Title";
+ print_out tree "XMP pdf:Creator" adobe "Creator";
+ print_out tree "XMP pdf:Subject" adobe "Subject";
+ print_out tree "XMP pdf:Author" adobe "Author";
+ print_out tree "XMP pdf:CreationDate" adobe "CreationDate";
+ print_out tree "XMP pdf:ModDate" adobe "ModDate";
+ print_out tree "XMP xmp:CreateDate" xmp "CreateDate";
+ print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool";
+ print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate";
+ print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate";
+ print_out tree "XMP dc:title" dc "title";
+ print_out tree "XMP dc:creator" dc "creator";
+ print_out tree "XMP dc:subject" dc "subject";
+ print_out tree "XMP dc:description" dc "description"
+ with
+ _ -> ()
+
+(* Get XMP info equivalent of an old metadata field *)
+let check = function
+ "/Title" -> [(adobe, "Title"); (dc, "title")]
+| "/Author" -> [(adobe, "Author"); (dc, "creator")]
+| "/Subject" -> [(adobe, "Subject"); (dc, "subject")]
+| "/Keywords" -> [(adobe, "Keywords")]
+| "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")]
+| "/Producer" -> [(adobe, "Producer")]
+| "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")]
+| "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")]
+| _ -> failwith "Cpdf.check_name not known"
+
+let get_xmp_info pdf name =
+ let tocheck = check name in
+ match get_metadata pdf with
+ None -> ""
+ | Some metadata ->
+ try
+ let _, tree = xmltree_of_bytes metadata in
+ let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in
+ match lose (eq "") results with
+ x::_ -> x
+ | [] -> ""
+ with
+ _ -> ""
+
+(* Set XMP info *)
+let rec set_xml_field kind fieldname value = function
+ D data -> D data
+| E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e *)
+ E (((n, n'), m), [D value])
+| E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts)
+
+let set_pdf_info_xml kind fieldname value xmldata pdf =
+ let dtd, tree = xmltree_of_bytes xmldata in
+ let str =
+ match value with
+ Pdf.String s -> s
+ | Pdf.Boolean true -> "True"
+ | Pdf.Boolean false -> "False"
+ | _ -> failwith "set_pdf_info_xml: not a string"
+ in
+ let newtree = set_xml_field kind fieldname str tree in
+ bytes_of_xmltree (dtd, newtree)
+
+let set_pdf_info_xml_many changes value xmldata pdf =
+ let xmldata = ref xmldata in
+ iter
+ (fun (kind, fieldname) ->
+ xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf)
+ changes;
+ !xmldata
+
+
+(* \section{Set an entry in the /Info dictionary} *)
+
+(* We must parse the date to get its components, then use strftime to build the
+ * new string in XMP format *)
+
+type date =
+ {mutable year : int;
+ mutable month : int; (* 1 - 12 *)
+ mutable day : int; (* 1 - 31 *)
+ mutable hour : int; (* 0 - 23 *)
+ mutable minute : int; (* 0 - 59 *)
+ mutable second : int; (* 0 - 59 *)
+ mutable ut_relationship : int; (* -1, 0, +1 *)
+ mutable offset_hours : int; (* 0 - 59 *)
+ mutable offset_minutes : int (* 0 - 59 *)}
+
+let default_date () =
+ {year = 0;
+ month = 1;
+ day = 1;
+ hour = 0;
+ minute = 0;
+ second = 0;
+ ut_relationship = 0;
+ offset_hours = 0;
+ offset_minutes = 0}
+
+(* XMP date format is YYYY-MM-DDThh:mm:ssTZD *)
+let make_xmp_date_from_components d =
+ let tzd =
+ if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else
+ (if d.ut_relationship >=0 then "+" else "-") ^
+ Printf.sprintf "%02i" d.offset_hours ^
+ ":" ^
+ Printf.sprintf "%02i" d.offset_minutes
+ in
+ Cpdfstrftime.strftime
+ ~time:{Cpdfstrftime._tm_sec = d.second;
+ Cpdfstrftime._tm_min = d.minute;
+ Cpdfstrftime._tm_hour = d.hour;
+ Cpdfstrftime._tm_mday = d.day;
+ Cpdfstrftime._tm_mon = d.month - 1;
+ Cpdfstrftime._tm_year = d.year - 1900;
+ Cpdfstrftime._tm_wday = 0;
+ Cpdfstrftime._tm_yday = 0;
+ Cpdfstrftime._tm_isdst = false}
+ "%Y-%m-%dT%H:%M:%S"
+ ^
+ tzd
+
+let xmp_date date =
+ let d = default_date () in
+ try
+ match explode date with
+ 'D'::':'::r ->
+ begin match r with
+ y1::y2::y3::y4::r ->
+ d.year <- int_of_string (implode [y1; y2; y3; y4]);
+ begin match r with
+ m1::m2::r ->
+ d.month <- int_of_string (implode [m1; m2]);
+ begin match r with
+ d1::d2::r ->
+ d.day <- int_of_string (implode [d1; d2]);
+ begin match r with
+ h1::h2::r ->
+ d.hour <- int_of_string (implode [h1; h2]);
+ begin match r with
+ m1::m2::r ->
+ d.minute <- int_of_string (implode [m1; m2]);
+ begin match r with
+ s1::s2::r ->
+ d.second <- int_of_string (implode [s1; s2]);
+ begin match r with
+ o::r ->
+ d.ut_relationship <-
+ if o = '+' then 1 else
+ if o = '-' then -1 else
+ 0;
+ begin match r with
+ h1::h2::'\''::r ->
+ d.offset_hours <- int_of_string (implode [h1; h2]);
+ begin match r with
+ m1::m2::_ ->
+ d.offset_minutes <- int_of_string (implode [m1; m2]);
+ raise Exit
+ | _ -> raise Exit
+ end
+ | _ -> raise Exit
+ end
+ | _ -> raise Exit
+ end
+ | _ -> raise Exit
+ end
+ | _ -> raise Exit
+ end
+ | _ -> raise Exit
+ end
+ | _ -> raise Exit
+ end
+ | _ -> raise Exit
+ end
+ | _ ->
+ Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date;
+ make_xmp_date_from_components d
+ end
+ | _ ->
+ Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date;
+ make_xmp_date_from_components d
+ with
+ Exit -> make_xmp_date_from_components d
+
+let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf =
+ let infodict =
+ match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
+ | Some d -> d
+ | None -> Pdf.Dictionary []
+ in
+ let infodict' = Pdf.add_dict_entry infodict key value in
+ let objnum = Pdf.addobj pdf infodict' in
+ if not xmp_just_set then
+ begin
+ pdf.Pdf.trailerdict <-
+ Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum);
+ pdf.Pdf.minor <-
+ max pdf.Pdf.minor version
+ end;
+ if xmp_also then
+ begin match get_metadata pdf with
+ None -> pdf
+ | Some xmldata ->
+ let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in
+ let changes, value =
+ match key with
+ | "/Producer" -> [(adobe, "Producer")], value
+ | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value
+ | "/Author" -> [(adobe, "Author")], value
+ | "/Title" -> [(adobe, "Title"); (dc, "title")], value
+ | "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value
+ | "/Keywords" -> [(adobe, "Keywords")], value
+ | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value
+ | "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value
+ | "/Trapped" -> [(adobe, "Trapped")], value
+ | _ -> failwith "Unknown call to set_pdf_info"
+ in
+ set_metadata_from_bytes
+ true
+ (set_pdf_info_xml_many changes value xmldata pdf)
+ pdf
+ end
+ else
+ pdf
+
+let expand_date = function
+ | "now" ->
+ begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with
+ | Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S"
+ | _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S"
+ end
+ | x -> x
+
+(* Set metadata date *)
+let set_metadata_date pdf date =
+ match get_metadata pdf with
+ None -> pdf
+ | Some xmldata ->
+ let changes= [(xmp, "MetadataDate")] in
+ let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in
+ set_metadata_from_bytes
+ true
+ (set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf)
+ pdf
+
+
+(* \section{Copy an /ID from one file to another} *)
+let copy_id keepversion copyfrom copyto =
+ match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with
+ | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *)
+ | Some id ->
+ copyto.Pdf.trailerdict <-
+ Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id;
+ copyto.Pdf.minor <-
+ if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1;
+ copyto
+
+let replacements pdf =
+ let info = get_info_utf8 pdf in
+ [("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i));
+ ("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i));
+ ("PRODUCER", info "/Producer");
+ ("CREATOR", info "/Creator");
+ ("TITLE", info "/Title");
+ ("SUBJECT", info "/Subject");
+ ("AUTHOR", info "/Author");
+ ("KEYWORDS", info "/Keywords");
+ ("TRAPPED", info "/Trapped");
+ ("METADATADATE", xmp_date (expand_date "now"))]
+
+let create_metadata pdf =
+ let xmp = ref xmp_template in
+ iter
+ (fun (s, r) -> xmp := string_replace_all s r !xmp)
+ (replacements pdf);
+ set_metadata_from_bytes false (bytes_of_string !xmp) pdf
+
+(* \section{Set viewer preferences} *)
+let set_viewer_preference (key, value, version) pdf =
+ match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
+ | Some catalog ->
+ let viewer_preferences =
+ match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
+ | Some d -> d
+ | None -> Pdf.Dictionary []
+ in
+ let viewer_preferences' =
+ Pdf.add_dict_entry viewer_preferences key value
+ in
+ let catalog' =
+ Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences'
+ in
+ let catalognum = Pdf.addobj pdf catalog' in
+ let trailerdict' =
+ Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
+ in
+ {pdf with
+ Pdf.minor = max pdf.Pdf.minor version;
+ Pdf.root = catalognum;
+ Pdf.trailerdict = trailerdict'}
+ | None -> error "bad root"
+
+
+
+(* \section{Set page layout} *)
+let set_page_layout pdf s =
+ match s with
+ | "SinglePage" | "OneColumn" | "TwoColumnLeft"
+ | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" ->
+ begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
+ | Some catalog ->
+ let catalog' =
+ Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s))
+ in
+ let catalognum = Pdf.addobj pdf catalog' in
+ let trailerdict' =
+ Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
+ in
+ {pdf with
+ Pdf.root = catalognum;
+ Pdf.trailerdict = trailerdict'}
+ | None -> error "bad root"
+ end
+ | _ -> error "Unknown page layout"
+
+
+(* \section{Set page mode} *)
+let set_page_mode pdf s =
+ match s with
+ | "UseNone" | "UseOutlines" | "UseThumbs"
+ | "FullScreen" | "UseOC" | "UseAttachments" ->
+ begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
+ | Some catalog ->
+ let catalog' =
+ Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s))
+ in
+ let catalognum = Pdf.addobj pdf catalog' in
+ let trailerdict' =
+ Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
+ in
+ {pdf with
+ Pdf.root = catalognum;
+ Pdf.trailerdict = trailerdict'}
+ | None -> error "bad root"
+ end
+ | _ -> error "Unknown page mode"
+
+(* Set open action *)
+let set_open_action pdf fit pagenumber =
+ if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then
+ raise (error "set_open_action: invalid page number")
+ else
+ let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in
+ let destination =
+ if fit then
+ Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"]
+ else
+ Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null]
+ in
+ let open_action =
+ Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")]
+ in
+ match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
+ | Some catalog ->
+ let catalog' =
+ Pdf.add_dict_entry catalog "/OpenAction" open_action
+ in
+ let catalognum = Pdf.addobj pdf catalog' in
+ let trailerdict' =
+ Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
+ in
+ {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'}
+ | None -> error "bad root"
+
+
+let set_version v pdf =
+ pdf.Pdf.minor <- v
diff --git a/cpdfmetadata.mli b/cpdfmetadata.mli
new file mode 100644
index 0000000..c1ed9ec
--- /dev/null
+++ b/cpdfmetadata.mli
@@ -0,0 +1,74 @@
+
+(** {2 Types and Exceptions} *)
+
+(** Possible output encodings for some function. [Raw] does no processing at
+all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8.
+[Stripped] extracts the unicode codepoints and returns only those which
+correspond to 7 bit ASCII. *)
+type encoding = Raw | UTF8 | Stripped
+
+val encode_output : encoding -> string -> string
+
+(** {2 Metadata and settings} *)
+
+(** [copy_id keepversion copyfrom copyto] copies the ID, if any, from
+[copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto]
+won't be affected. *)
+val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
+
+(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
+the PDF minor version to [version].*)
+val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
+
+val get_xmp_info : Pdf.t -> string -> string
+
+(** [set_pdf_info (key, value, version)] sets the entry [key] in the
+/ViewerPreferences directory, updating the PDF minor version to [version].*)
+val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
+
+(** Set the page layout to the given name (sans slash) e.g SinglePage *)
+val set_page_layout : Pdf.t -> string -> Pdf.t
+
+(** Set the page layout to the given name (sans slash) e.g SinglePage *)
+val set_page_mode : Pdf.t -> string -> Pdf.t
+
+(** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *)
+val set_open_action : Pdf.t -> bool -> int -> Pdf.t
+
+(** Set the PDF version number *)
+val set_version : int -> Pdf.t -> unit
+
+(** Given a PDF, returns a function which can lookup a given dictionary entry
+from the /Info dictionary, returning it as a UTF8 string *)
+val get_info_utf8 : Pdf.t -> string -> string
+
+(** Output to standard output general information about a PDF. *)
+val output_info : encoding -> Pdf.t -> unit
+
+(** Output to standard output information from any XMP metadata stream in a PDF. *)
+val output_xmp_info : encoding -> Pdf.t -> unit
+
+(** Create XMP metadata from scratch *)
+val create_metadata : Pdf.t -> Pdf.t
+
+(** {2 XML Metadata} *)
+
+(** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *)
+val set_metadata : bool -> string -> Pdf.t -> Pdf.t
+
+(** The same, but the content comes from [bytes]. *)
+val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t
+
+(** Remove the metadata from a file *)
+val remove_metadata : Pdf.t -> Pdf.t
+
+(** Extract metadata to a [Pdfio.bytes] *)
+val get_metadata : Pdf.t -> Pdfio.bytes option
+
+(** Print metadate to stdout *)
+val print_metadata : Pdf.t -> unit
+
+(** Set the metadata date *)
+val set_metadata_date : Pdf.t -> string -> Pdf.t
+
+val expand_date : string -> string