open Pdfutil open Pdfio open Cpdferror type encoding = | Raw | UTF8 | Stripped (* Just strip everything which isn't 7 bit ASCII *) let crude_de_unicode s = implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s))) let encode_output enc s = match enc with | Raw -> s | UTF8 -> Pdftext.utf8_of_pdfdocstring s | Stripped -> crude_de_unicode s let xmp_template = {| CREATEDATE MODDATE PRODUCER CREATOR TITLE SUBJECT AUTHOR KEYWORDS TRAPPED CREATEDATE CREATOR MODDATE METADATADATE TITLE |} (* Set or replace metadata *) let set_metadata_from_bytes keepversion data pdf = let metadata_stream = Pdf.Stream {contents = (Pdf.Dictionary ["/Length", Pdf.Integer (bytes_size data); "/Type", Pdf.Name "/Metadata"; "/Subtype", Pdf.Name "/XML"], Pdf.Got data)} in let objnum = Pdf.addobj pdf metadata_stream in let document_catalog = match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some s -> s | None -> error "Malformed PDF: No root." in let document_catalog' = Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum) in let rootnum = Pdf.addobj pdf document_catalog' in let trailerdict = Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum) in {pdf with Pdf.trailerdict = trailerdict; Pdf.root = rootnum; Pdf.minor = if pdf.Pdf.major > 1 || keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor} let set_metadata keepversion filename pdf = let ch = open_in_bin filename in let data = mkbytes (in_channel_length ch) in for x = 0 to bytes_size data - 1 do bset data x (input_byte ch) done; set_metadata_from_bytes keepversion data pdf (* Remove metadata *) let remove_metadata pdf = match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | None -> error "malformed file" | Some root -> let root' = Pdf.remove_dict_entry root "/Metadata" in let rootnum = Pdf.addobj pdf root' in {pdf with Pdf.trailerdict = Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum); Pdf.root = rootnum} (* Print metadata *) let get_metadata pdf = match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | None -> error "malformed file" | Some root -> match Pdf.lookup_direct pdf "/Metadata" root with | Some ((Pdf.Stream _) as s) -> Pdfcodec.decode_pdfstream pdf s; begin match s with | Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data | _ -> assert false end | _ -> None let print_metadata pdf = match get_metadata pdf with None -> () | Some data -> for x = 0 to bytes_size data - 1 do Printf.printf "%c" (char_of_int (bget data x)) done let get_info raw pdf = let infodict = match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some infodict -> infodict | _ -> Pdf.Dictionary [] in let getstring name = match Pdf.lookup_direct pdf name infodict with | Some (Pdf.String s) -> if raw then s else crude_de_unicode s | Some (Pdf.Boolean false) -> "False" | Some (Pdf.Boolean true) -> "True" | _ -> if name = "/Trapped" then "False" else "" in getstring let get_info_utf8 pdf = let infodict = match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some infodict -> infodict | _ -> Pdf.Dictionary [] in (function name -> match Pdf.lookup_direct pdf name infodict with | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s | Some (Pdf.Boolean false) -> "False" | Some (Pdf.Boolean true) -> "True" | _ -> if name = "/Trapped" then "False" else "") let getstring encoding pdf = match encoding with | Raw -> get_info true pdf | Stripped -> get_info false pdf | UTF8 -> get_info_utf8 pdf let get_catalog_item name pdf = match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some catalog -> begin match Pdf.lookup_direct pdf name catalog with | Some (Pdf.Name x) when x <> "" -> implode (tl (explode x)) | Some (Pdf.Dictionary _) -> "True" | _ -> "" end | _ -> "" let get_viewer_pref_item name pdf = match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some catalog -> begin match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with | Some d -> begin match Pdf.lookup_direct pdf name d with | Some (Pdf.Name x) when x <> "" -> implode (tl (explode x)) | Some (Pdf.Boolean b) -> string_of_bool b | _ -> "" end | None -> "" end | _ -> "" let get_markinfo_item name pdf = match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some catalog -> begin match Pdf.lookup_direct pdf "/MarkInfo" catalog with | Some d -> begin match Pdf.lookup_direct pdf name d with | Some (Pdf.Boolean b) -> b | _ -> false end | None -> false end | _ -> false (* We fetch the open action, which is either: 1. A destination in array form. Change to a page number; 2. A /GoTo action with a /D destination - return the destination with number changed; 3. Something else - return the whole thing with no changes; 4. Not there - return `Null. *) let get_open_action pdf = let rewrite_page a = match a with | Pdf.Array (Pdf.Indirect i::more) as x-> begin match position_1 i (Pdf.page_reference_numbers pdf) with | Some i' -> Pdf.Array (Pdf.Integer i'::more) | None -> x end | x -> x in let convert = Cpdfjson.json_of_object ~utf8:true pdf (fun _ -> ()) ~no_stream_data:true ~parse_content:false in match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OpenAction"] with | Some (Pdf.Array a) -> convert (rewrite_page (Pdf.Array a)) | Some (Pdf.Dictionary d) -> begin match Pdf.lookup_direct pdf "/D" (Pdf.Dictionary d) with | Some (Pdf.Array a) -> convert (rewrite_page (Pdf.Array a)) | _ -> convert (Pdf.Dictionary d) end | Some x -> convert x | None -> `Null let get_open_action_string pdf = match get_open_action pdf with | `Null -> "" | j -> Pdfwrite.string_of_pdf (Cpdfjson.object_of_json j) let is_xfa pdf = Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/AcroForm"; "/XFA"] <> None let output_info ?(json=ref [("none", `Null)]) encoding pdf = let notjson = !json = [("none", `Null)] in let getstring = getstring encoding pdf in let pages = Pdfpage.pages_of_pagetree pdf in let mediaboxes = map (fun page -> page.Pdfpage.mediabox) pages in let trimboxes = map (fun page -> Pdf.lookup_direct pdf "/TrimBox" page.Pdfpage.rest) pages in let artboxes = map (fun page -> Pdf.lookup_direct pdf "/ArtBox" page.Pdfpage.rest) pages in let cropboxes = map (fun page -> Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest) pages in let bleedboxes = map (fun page -> Pdf.lookup_direct pdf "/BleedBox" page.Pdfpage.rest) pages in if notjson then Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor; json =| ("Version", `List [`Int pdf.Pdf.major; `Int pdf.Pdf.minor]); if notjson then Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf); json =| ("Pages", `Int (Pdfpage.endpage pdf)); if notjson then Printf.printf "Title: %s\n" (getstring "/Title"); json =| ("Title", if getstring "/Title" = "" then `Null else `String (getstring "/Title")); if notjson then Printf.printf "Author: %s\n" (getstring "/Author"); json =| ("Author", if getstring "/Author" = "" then `Null else `String (getstring "/Author")); if notjson then Printf.printf "Subject: %s\n" (getstring "/Subject"); json =| ("Subject", if getstring "/Subject" = "" then `Null else `String (getstring "/Subject")); if notjson then Printf.printf "Keywords: %s\n" (getstring "/Keywords"); json =| ("Keywords", if getstring "/Keywords" = "" then `Null else `String (getstring "/Keywords")); if notjson then Printf.printf "Creator: %s\n" (getstring "/Creator"); json =| ("Creator", if getstring "/Creator" = "" then `Null else `String (getstring "/Creator")); if notjson then Printf.printf "Producer: %s\n" (getstring "/Producer"); json =| ("Producer", if getstring "/Producer" = "" then `Null else `String (getstring "/Producer")); if notjson then Printf.printf "Created: %s\n" (getstring "/CreationDate"); json =| ("Created", if getstring "/CreationDate" = "" then `Null else `String (getstring "/CreationDate")); if notjson then Printf.printf "Modified: %s\n" (getstring "/ModDate"); json =| ("Modified", if getstring "/ModDate" = "" then `Null else `String (getstring "/ModDate")); if notjson then Printf.printf "Trapped: %s\n" (getstring "/Trapped"); json =| ("Trapped", `Bool (bool_of_string (String.lowercase_ascii (getstring "/Trapped")))); if notjson then Printf.printf "PageMode: %s\n" (get_catalog_item "/PageMode" pdf); json =| ("PageMode", match (get_catalog_item "/PageMode" pdf) with "" -> `Null | x -> `String x); if notjson then Printf.printf "PageLayout: %s\n" (get_catalog_item "/PageLayout" pdf); json =| ("PageLayout", match (get_catalog_item "/PageLayout" pdf) with "" -> `Null | x -> `String x); if notjson then Printf.printf "OpenAction: %s\n" (get_open_action_string pdf); json =| ("OpenAction", get_open_action pdf); if notjson then Printf.printf "HideToolbar: %s\n" (get_viewer_pref_item "/HideToolbar" pdf); json =| ("HideToolbar", match get_viewer_pref_item "/HideToolbar" pdf with "" -> `Null | s -> `Bool (bool_of_string s)); if notjson then Printf.printf "HideMenubar: %s\n" (get_viewer_pref_item "/HideMenubar" pdf); json =| ("HideMenubar", match get_viewer_pref_item "/HideMenubar" pdf with "" -> `Null | s -> `Bool (bool_of_string s)); if notjson then Printf.printf "HideWindowUI: %s\n" (get_viewer_pref_item "/HideWindowUI" pdf); json =| ("HideWindowUI", match get_viewer_pref_item "/HideWindowUI" pdf with "" -> `Null | s -> `Bool (bool_of_string s)); if notjson then Printf.printf "FitWindow: %s\n" (get_viewer_pref_item "/FitWindow" pdf); json =| ("FitWindow", match get_viewer_pref_item "/FitWindow" pdf with "" -> `Null | s -> `Bool (bool_of_string s)); if notjson then Printf.printf "CenterWindow: %s\n" (get_viewer_pref_item "/CenterWindow" pdf); json =| ("CenterWindow", match get_viewer_pref_item "/CenterWindow" pdf with "" -> `Null | s -> `Bool (bool_of_string s)); if notjson then Printf.printf "DisplayDocTitle: %s\n" (get_viewer_pref_item "/DisplayDocTitle" pdf); json =| ("DisplayDocTitle", match get_viewer_pref_item "/DisplayDocTitle" pdf with "" -> `Null | s -> `Bool (bool_of_string s)); if notjson then Printf.printf "NonFullScreenPageMode: %s\n" (get_viewer_pref_item "/NonFullScreenPageMode" pdf); json =| ("NonFullPageScreenMode", match (get_viewer_pref_item "/NonFullPageScreenMode" pdf) with "" -> `Null | x -> `String x); if notjson then Printf.printf "AcroForm: %s\n" (match get_catalog_item "/AcroForm" pdf with "" -> "False" | x -> x); json =| ("AcroForm", match (get_catalog_item "/AcroForm" pdf) with "" -> `Bool false | x -> `Bool true); if notjson then Printf.printf "XFA: %s\n" (if is_xfa pdf then "True" else "False"); json =| ("XFA", `Bool (is_xfa pdf)); if notjson then Printf.printf "Marked: %s\n" (match get_markinfo_item "/Marked" pdf with true -> "True" | _ -> "False"); json =| ("Marked", `Bool (get_markinfo_item "/Marked" pdf)); if notjson then Printf.printf "UserProperties: %s\n" (match get_markinfo_item "/UserProperties" pdf with true -> "True" | _ -> "False"); json =| ("UserProperties", `Bool (get_markinfo_item "/UserProperties" pdf)); if notjson then Printf.printf "Suspects: %s\n" (match get_markinfo_item "/Suspects" pdf with true -> "True" | _ -> "False"); json =| ("Suspects", `Bool (get_markinfo_item "/Suspects" pdf)); if notjson then begin Printf.printf "MediaBox: "; begin if length (setify mediaboxes) = 1 then match hd mediaboxes with | Pdf.Array [a; b; c; d] -> Printf.printf "%f %f %f %f\n" (Pdf.getnum pdf a) (Pdf.getnum pdf b) (Pdf.getnum pdf c) (Pdf.getnum pdf d) | _ -> Printf.printf "\n" else Printf.printf "various\n" end; let printbox boxes s = Printf.printf s; Printf.printf ": "; if length (setify boxes) = 1 then begin match hd boxes with | Some (Pdf.Array [a; b; c; d]) -> Printf.printf "%f %f %f %f\n" (Pdf.getnum pdf a) (Pdf.getnum pdf b) (Pdf.getnum pdf c) (Pdf.getnum pdf d) | _ -> Printf.printf "\n" end else Printf.printf "various\n" in printbox cropboxes "CropBox"; printbox bleedboxes "BleedBox"; printbox trimboxes "TrimBox"; printbox artboxes "ArtBox" end else begin let mediabox = if length (setify mediaboxes) = 1 then match hd mediaboxes with | Pdf.Array [a; b; c; d] -> `List [`Float (Pdf.getnum pdf a); `Float (Pdf.getnum pdf b); `Float (Pdf.getnum pdf c); `Float (Pdf.getnum pdf d)] | _ -> `Null else `String "various" in let jsonbox boxes = if length (setify boxes) = 1 then match hd boxes with | Some (Pdf.Array [a; b; c; d]) -> `List [`Float (Pdf.getnum pdf a); `Float (Pdf.getnum pdf b); `Float (Pdf.getnum pdf c); `Float (Pdf.getnum pdf d)] | _ -> `Null else `String "various" in json =| ("MediaBox", mediabox); json =| ("CropBox", jsonbox cropboxes); json =| ("BleedBox", jsonbox bleedboxes); json =| ("TrimBox", jsonbox trimboxes); json =| ("ArtBox", jsonbox artboxes); end type xmltree = E of Cpdfxmlm.tag * xmltree list | D of string let xmltree_of_bytes b = let i = Cpdfxmlm.make_input ~strip:true (`String (0, string_of_bytes b)) in let el tag childs = E (tag, childs) and data d = D d in Cpdfxmlm.input_doc_tree ~el ~data i let bytes_of_xmltree t = let buf = Buffer.create 1024 in let o = Cpdfxmlm.make_output ~indent:None (`Buffer buf) in let frag = function E (tag, childs) -> `El (tag, childs) | D d -> `Data d in Cpdfxmlm.output_doc_tree frag o t; (* Delete the ?xml declaration, per XMP spec. *) let s = Buffer.contents buf in bytes_of_string (String.sub s 39 (String.length s - 39)) let rec string_of_xmltree = function D d -> Printf.sprintf "D {%s}" d | E (tag, trees) -> Printf.sprintf "E (%s, %s)" (string_of_tag tag) (string_of_xmltrees trees) and string_of_tag ((n, n'), attributes) = Printf.sprintf "NAME |%s| |%s|, ATTRIBUTES {%s}" n n' (string_of_attributes attributes) and string_of_attribute ((n, n'), str) = Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str and string_of_attributes attrs = fold_left (fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs) and string_of_xmltrees trees = fold_left (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees) let adobe = "http://ns.adobe.com/pdf/1.3/" let xmp = "http://ns.adobe.com/xap/1.0/" let dc = "http://purl.org/dc/elements/1.1/" let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" let pdfaid = "http://www.aiim.org/pdfa/ns/id/" let pdfxid = "http://www.npes.org/pdfx/ns/id/" let pdfe = "http://www.aiim.org/pdfe/ns/id/" let pdfuaid = "http://www.aiim.org/pdfua/ns/id/" let pdfvtid = "http://www.npes.org/pdfvt/ns/id/" let combine_with_commas strs = String.trim (fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs) (* Collect all
  • elements inside a seq, bag, or alt. Combine with commas. If none found, return empty string instead. *) let collect_list_items = function E (((n, n'), _), elts) when n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag") -> combine_with_commas (option_map (function E (((n, n'), _), [D d]) when n = rdf && n' = "li" -> Some d | _ -> None) elts) | _ -> "" let collect_list_items_all all = match keep (function E _ -> true | _ -> false) all with h::_ -> Some (collect_list_items h) | [] -> None let rec get_data_for namespace name = function D _ -> None | E (((n, n'), _), [D d]) when n = namespace && n' = name -> Some d | E (((n, n'), _), e) when n = namespace && n' = name -> collect_list_items_all e | E (_, l) -> match option_map (get_data_for namespace name) l with x :: _ -> Some x | _ -> None (* PDF/A: 2 B PDF/E: PDF/E-1 PDF/VT: PDF/VT-1 PDF/UA: 1 PDF/X: PDF/X-4 (Fallback DID /GTS_PDFXVersion) *) let determine_subformats pdf = let formats = ref [] in let fallback_pdfx () = match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some d -> begin match Pdf.lookup_direct pdf "/GTS_PDFXVersion" d with | Some (Pdf.String s) -> formats =| s | _ -> () end | None -> () in match get_metadata pdf with | None -> fallback_pdfx (); !formats | Some metadata -> let _, tree = xmltree_of_bytes metadata in (* PDF/E *) begin match get_data_for pdfe "ISO_PDFEVersion" tree with | Some s -> formats =| s | None -> () end; (* PDF/UA *) begin match get_data_for pdfuaid "part" tree with | Some s -> formats =| "PDF/UA-" ^ s | None -> () end; (* PDF/A *) begin match get_data_for pdfaid "part" tree with | Some part -> let conformance = match get_data_for pdfaid "conformance" tree with | Some s -> String.lowercase_ascii s | None -> "" in formats =| "PDF/A-" ^ part ^ conformance | None -> () end; (* PDF/X *) begin match get_data_for pdfxid "GTS_PDFXVersion" tree with | Some s -> formats =| s | None -> fallback_pdfx () end; (* PDF/VT *) begin match get_data_for pdfvtid "GTS_PDFVTVersion" tree with | Some s -> formats =| s | None -> () end; !formats let language pdf = match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Lang"] with | Some (Pdf.String x) -> Some x | _ -> None let set_language pdf s = Pdf.replace_chain pdf ["/Root"; "/Lang"] (Pdf.String s) let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf = let notjson = !json = [("none", `Null)] in let print_out tree title namespace name = match get_data_for namespace name tree with None -> () | Some data -> if notjson then begin Printf.printf "%s: " title; print_endline data end else json =| (title, `String data) in if notjson then Printf.printf "Subformats: %s\n" (combine_with_commas (determine_subformats pdf)) else json =| ("Subformats", `List (map (fun x -> `String x) (determine_subformats pdf))); if notjson then Printf.printf "Language: %s\n" (match language pdf with None -> "" | Some x -> "\"" ^ x ^ "\"") else json =| ("Language", match language pdf with None -> `Null | Some x -> `String x); match get_metadata pdf with None -> () | Some metadata -> try let dtd, tree = xmltree_of_bytes metadata in print_out tree "XMP pdf:Keywords" adobe "Keywords"; print_out tree "XMP pdf:Producer" adobe "Producer"; print_out tree "XMP pdf:Trapped" adobe "Trapped"; print_out tree "XMP pdf:Title" adobe "Title"; print_out tree "XMP pdf:Creator" adobe "Creator"; print_out tree "XMP pdf:Subject" adobe "Subject"; print_out tree "XMP pdf:Author" adobe "Author"; print_out tree "XMP pdf:CreationDate" adobe "CreationDate"; print_out tree "XMP pdf:ModDate" adobe "ModDate"; print_out tree "XMP xmp:CreateDate" xmp "CreateDate"; print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool"; print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate"; print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate"; print_out tree "XMP dc:title" dc "title"; print_out tree "XMP dc:creator" dc "creator"; print_out tree "XMP dc:subject" dc "subject"; print_out tree "XMP dc:description" dc "description"; print_out tree "PDF/UA pdfuaid:amd" pdfuaid "amd"; print_out tree "PDF/UA pdfuaid:corr" pdfuaid "corr" with _ -> () (* Get XMP info equivalent of an old metadata field *) let check = function "/Title" -> [(adobe, "Title"); (dc, "title")] | "/Author" -> [(adobe, "Author"); (dc, "creator")] | "/Subject" -> [(adobe, "Subject"); (dc, "subject")] | "/Keywords" -> [(adobe, "Keywords")] | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")] | "/Producer" -> [(adobe, "Producer")] | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")] | "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")] | _ -> failwith "Cpdf.check_name not known" let get_xmp_info pdf name = let tocheck = check name in match get_metadata pdf with None -> "" | Some metadata -> try let _, tree = xmltree_of_bytes metadata in let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in match lose (eq "") results with x::_ -> x | [] -> "" with _ -> "" (* Set XMP info *) let rec set_xml_field kind fieldname value = function D data -> D data | E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e *) E (((n, n'), m), [D value]) | E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts) let set_pdf_info_xml kind fieldname value xmldata pdf = let dtd, tree = xmltree_of_bytes xmldata in let str = match value with Pdf.String s -> Pdftext.utf8_of_pdfdocstring s | Pdf.Boolean true -> "True" | Pdf.Boolean false -> "False" | _ -> failwith "set_pdf_info_xml: not a string" in let newtree = set_xml_field kind fieldname str tree in bytes_of_xmltree (dtd, newtree) let set_pdf_info_xml_many changes value xmldata pdf = let xmldata = ref xmldata in iter (fun (kind, fieldname) -> xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf) changes; !xmldata (* Set an entry in the /Info dictionary *) (* We must parse the date to get its components, then use strftime to build the * new string in XMP format *) type date = {mutable year : int; mutable month : int; (* 1 - 12 *) mutable day : int; (* 1 - 31 *) mutable hour : int; (* 0 - 23 *) mutable minute : int; (* 0 - 59 *) mutable second : int; (* 0 - 59 *) mutable ut_relationship : int; (* -1, 0, +1 *) mutable offset_hours : int; (* 0 - 59 *) mutable offset_minutes : int (* 0 - 59 *)} let default_date () = {year = 0; month = 1; day = 1; hour = 0; minute = 0; second = 0; ut_relationship = 0; offset_hours = 0; offset_minutes = 0} (* XMP date format is YYYY-MM-DDThh:mm:ssTZD *) let make_xmp_date_from_components d = let tzd = if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else (if d.ut_relationship >=0 then "+" else "-") ^ Printf.sprintf "%02i" d.offset_hours ^ ":" ^ Printf.sprintf "%02i" d.offset_minutes in Cpdfstrftime.strftime ~time:{Cpdfstrftime._tm_sec = d.second; Cpdfstrftime._tm_min = d.minute; Cpdfstrftime._tm_hour = d.hour; Cpdfstrftime._tm_mday = d.day; Cpdfstrftime._tm_mon = d.month - 1; Cpdfstrftime._tm_year = d.year - 1900; Cpdfstrftime._tm_wday = 0; Cpdfstrftime._tm_yday = 0; Cpdfstrftime._tm_isdst = false} "%Y-%m-%dT%H:%M:%S" ^ tzd let xmp_date date = let d = default_date () in try match explode date with 'D'::':'::r -> begin match r with y1::y2::y3::y4::r -> d.year <- int_of_string (implode [y1; y2; y3; y4]); begin match r with m1::m2::r -> d.month <- int_of_string (implode [m1; m2]); begin match r with d1::d2::r -> d.day <- int_of_string (implode [d1; d2]); begin match r with h1::h2::r -> d.hour <- int_of_string (implode [h1; h2]); begin match r with m1::m2::r -> d.minute <- int_of_string (implode [m1; m2]); begin match r with s1::s2::r -> d.second <- int_of_string (implode [s1; s2]); begin match r with o::r -> d.ut_relationship <- if o = '+' then 1 else if o = '-' then -1 else 0; begin match r with h1::h2::'\''::r -> d.offset_hours <- int_of_string (implode [h1; h2]); begin match r with m1::m2::_ -> d.offset_minutes <- int_of_string (implode [m1; m2]); raise Exit | _ -> raise Exit end | _ -> raise Exit end | _ -> raise Exit end | _ -> raise Exit end | _ -> raise Exit end | _ -> raise Exit end | _ -> raise Exit end | _ -> raise Exit end | _ -> Pdfe.log (Printf.sprintf "xmp_date: Malformed date string (no year): %s\n" date); make_xmp_date_from_components d end | _ -> Pdfe.log (Printf.sprintf "xmp_date: Malformed date string (no prefix): %s\n" date); make_xmp_date_from_components d with Exit -> make_xmp_date_from_components d let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf = let infodict = match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some d -> d | None -> Pdf.Dictionary [] in let infodict' = Pdf.add_dict_entry infodict key value in let objnum = Pdf.addobj pdf infodict' in if not xmp_just_set then begin pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum); if pdf.Pdf.major = 1 then pdf.Pdf.minor <- max pdf.Pdf.minor version end; if xmp_also then begin match get_metadata pdf with None -> pdf | Some xmldata -> let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in let changes, value = match key with | "/Producer" -> [(adobe, "Producer")], value | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value | "/Author" -> [(adobe, "Author")], value | "/Title" -> [(adobe, "Title"); (dc, "title")], value | "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value | "/Keywords" -> [(adobe, "Keywords")], value | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value | "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value | "/Trapped" -> [(adobe, "Trapped")], value | _ -> failwith "Unknown call to set_pdf_info" in set_metadata_from_bytes true (set_pdf_info_xml_many changes value xmldata pdf) pdf end else pdf let expand_date = function | "now" -> begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with | Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S" | _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S" end | x -> x (* Set metadata date *) let set_metadata_date pdf date = match get_metadata pdf with None -> pdf | Some xmldata -> let changes= [(xmp, "MetadataDate")] in let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in set_metadata_from_bytes true (set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf) pdf (* Copy an /ID from one file to another *) let copy_id keepversion copyfrom copyto = match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *) | Some id -> copyto.Pdf.trailerdict <- Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id; copyto.Pdf.minor <- if copyto.Pdf.major > 1 || keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1; copyto let replacements pdf = let info = get_info_utf8 pdf in [("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i)); ("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i)); ("PRODUCER", info "/Producer"); ("CREATOR", info "/Creator"); ("TITLE", info "/Title"); ("SUBJECT", info "/Subject"); ("AUTHOR", info "/Author"); ("KEYWORDS", info "/Keywords"); ("TRAPPED", info "/Trapped"); ("METADATADATE", xmp_date (expand_date "now"))] let create_metadata pdf = let xmp = ref xmp_template in iter (fun (s, r) -> xmp := string_replace_all s r !xmp) (replacements pdf); set_metadata_from_bytes false (bytes_of_string !xmp) pdf (* Set viewer preferences *) let set_viewer_preference (key, value, version) pdf = match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some catalog -> let viewer_preferences = match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with | Some d -> d | None -> Pdf.Dictionary [] in let viewer_preferences' = Pdf.add_dict_entry viewer_preferences key value in let catalog' = Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences' in let catalognum = Pdf.addobj pdf catalog' in let trailerdict' = Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) in {pdf with Pdf.minor = max pdf.Pdf.minor version; Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} | None -> error "bad root" (* Set page layout *) let set_page_layout pdf s = match s with | "SinglePage" | "OneColumn" | "TwoColumnLeft" | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" -> begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some catalog -> let catalog' = Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s)) in let catalognum = Pdf.addobj pdf catalog' in let trailerdict' = Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) in {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} | None -> error "bad root" end | _ -> error "Unknown page layout" (* Set page mode *) let set_page_mode pdf s = match s with | "UseNone" | "UseOutlines" | "UseThumbs" | "FullScreen" | "UseOC" | "UseAttachments" -> begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some catalog -> let catalog' = Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s)) in let catalognum = Pdf.addobj pdf catalog' in let trailerdict' = Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) in {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} | None -> error "bad root" end | _ -> error "Unknown page mode" let set_non_full_screen_page_mode pdf s = match s with | "UseNone" | "UseOutlines" | "UseThumbs" | "UseOC" | "UseAttachments" -> begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some catalog -> let viewerprefs = match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with | Some d -> d | None -> Pdf.Dictionary [] in let viewerprefsnum = Pdf.addobj pdf (Pdf.add_dict_entry viewerprefs "/NonFullScreenPageMode" (Pdf.Name ("/" ^ s))) in let catalog' = Pdf.add_dict_entry catalog "/ViewerPreferences" (Pdf.Indirect viewerprefsnum)in let catalognum = Pdf.addobj pdf catalog' in let trailerdict' = Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) in {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} | None -> error "bad root" end | _ -> error "Unknown non full screen page mode" (* Set open action *) let set_open_action pdf ?dest fit pagenumber = if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then raise (error "set_open_action: invalid page number") else let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in let destination = match dest with | Some s -> begin match Pdfread.parse_single_object s with | Pdf.Array (Pdf.Integer pagenum::more) -> begin try let pageobjectnumber = select pagenum (Pdf.page_reference_numbers pdf) in Pdf.Array (Pdf.Indirect pageobjectnumber::more) with _ -> raise (Pdf.PDFError "bad page number in custom destination") end | _ | exception _ -> raise (Pdf.PDFError "Bad destination syntax") end | None -> if fit then Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"] else Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null] in let open_action = Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")] in match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | Some catalog -> let catalog' = Pdf.add_dict_entry catalog "/OpenAction" open_action in let catalognum = Pdf.addobj pdf catalog' in let trailerdict' = Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) in {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} | None -> error "bad root" let set_version v pdf = pdf.Pdf.minor <- v