mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	more
This commit is contained in:
		
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							@@ -1,7 +1,7 @@
 | 
			
		||||
# Build the cpdf command line tools and top level
 | 
			
		||||
MODS = cpdfyojson cpdfxmlm \
 | 
			
		||||
       cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
 | 
			
		||||
       cpdfpagespec cpdfposition cpdf cpdfpresent cpdffont cpdftype \
 | 
			
		||||
       cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdf cpdffont cpdftype \
 | 
			
		||||
       cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfcommand
 | 
			
		||||
 | 
			
		||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										675
									
								
								cpdf.ml
									
									
									
									
									
								
							
							
						
						
									
										675
									
								
								cpdf.ml
									
									
									
									
									
								
							@@ -10,67 +10,11 @@ type color =
 | 
			
		||||
 | 
			
		||||
let debug = ref false
 | 
			
		||||
 | 
			
		||||
let xmp_template =
 | 
			
		||||
{|<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
 | 
			
		||||
 | 
			
		||||
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
 | 
			
		||||
 xmlns:iX='http://ns.adobe.com/iX/1.0/'>
 | 
			
		||||
 | 
			
		||||
 <rdf:Description about=''
 | 
			
		||||
  xmlns='http://ns.adobe.com/pdf/1.3/'
 | 
			
		||||
  xmlns:pdf='http://ns.adobe.com/pdf/1.3/'>
 | 
			
		||||
  <pdf:CreationDate>CREATEDATE</pdf:CreationDate>
 | 
			
		||||
  <pdf:ModDate>MODDATE</pdf:ModDate>
 | 
			
		||||
  <pdf:Producer>PRODUCER</pdf:Producer>
 | 
			
		||||
  <pdf:Creator>CREATOR</pdf:Creator>
 | 
			
		||||
  <pdf:Title>TITLE</pdf:Title>
 | 
			
		||||
  <pdf:Subject>SUBJECT</pdf:Subject>
 | 
			
		||||
  <pdf:Author>AUTHOR</pdf:Author>
 | 
			
		||||
  <pdf:Keywords>KEYWORDS</pdf:Keywords>
 | 
			
		||||
  <pdf:Trapped>TRAPPED</pdf:Trapped>
 | 
			
		||||
 </rdf:Description>
 | 
			
		||||
 | 
			
		||||
 <rdf:Description about=''
 | 
			
		||||
  xmlns='http://ns.adobe.com/xap/1.0/'
 | 
			
		||||
  xmlns:xap='http://ns.adobe.com/xap/1.0/'>
 | 
			
		||||
   <xap:CreateDate>CREATEDATE</xap:CreateDate>
 | 
			
		||||
   <xap:CreatorTool>CREATOR</xap:CreatorTool>
 | 
			
		||||
   <xap:ModifyDate>MODDATE</xap:ModifyDate>
 | 
			
		||||
   <xap:MetadataDate>METADATADATE</xap:MetadataDate>
 | 
			
		||||
 </rdf:Description>
 | 
			
		||||
 | 
			
		||||
 <rdf:Description about=''
 | 
			
		||||
  xmlns='http://purl.org/dc/elements/1.1/'
 | 
			
		||||
  xmlns:dc='http://purl.org/dc/elements/1.1/'>
 | 
			
		||||
   <dc:title>TITLE</dc:title>
 | 
			
		||||
 </rdf:Description>
 | 
			
		||||
 | 
			
		||||
</rdf:RDF>
 | 
			
		||||
 | 
			
		||||
<?xpacket end='r'?>|}
 | 
			
		||||
 | 
			
		||||
type encoding =
 | 
			
		||||
  | Raw
 | 
			
		||||
  | UTF8
 | 
			
		||||
  | Stripped
 | 
			
		||||
 | 
			
		||||
(* Just strip everything which isn't 7 bit ASCII *)
 | 
			
		||||
let crude_de_unicode s =
 | 
			
		||||
  implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s)))
 | 
			
		||||
 | 
			
		||||
let encode_output enc s =
 | 
			
		||||
  match enc with
 | 
			
		||||
  | Raw -> s
 | 
			
		||||
  | UTF8 -> Pdftext.utf8_of_pdfdocstring s
 | 
			
		||||
  | Stripped -> crude_de_unicode s
 | 
			
		||||
 | 
			
		||||
(* Get the number of pages in file. Doesn't need decryption. *)
 | 
			
		||||
let endpage_io ?revision i user_pw owner_pw =
 | 
			
		||||
  let pdf = Pdfread.pdf_of_input_lazy ?revision user_pw owner_pw i in
 | 
			
		||||
    Pdfpage.endpage pdf
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
let print_pdf_objs pdf =
 | 
			
		||||
  Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict);
 | 
			
		||||
  Printf.printf "Root: %i\n" pdf.Pdf.root;
 | 
			
		||||
@@ -103,14 +47,6 @@ let rec process_text time text m =
 | 
			
		||||
  | [] -> Cpdfstrftime.strftime ~time text
 | 
			
		||||
  | (s, r)::t -> process_text time (string_replace_all_lazy s r text) t
 | 
			
		||||
 | 
			
		||||
let expand_date = function
 | 
			
		||||
  | "now" ->
 | 
			
		||||
      begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with
 | 
			
		||||
      | Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S" 
 | 
			
		||||
      | _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S"
 | 
			
		||||
      end
 | 
			
		||||
  | x -> x
 | 
			
		||||
 | 
			
		||||
(* For uses of process_pages which don't need to deal with matrices, this
 | 
			
		||||
   function transforms into one which returns the identity matrix *)
 | 
			
		||||
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
 | 
			
		||||
@@ -205,17 +141,6 @@ let combine_pdf_resources pdf a b =
 | 
			
		||||
            (Pdf.Dictionary [])
 | 
			
		||||
            (unknown_keys_a @ unknown_keys_b @ combined_known_entries)
 | 
			
		||||
 | 
			
		||||
(* \section{Copy an /ID from one file to another} *)
 | 
			
		||||
let copy_id keepversion copyfrom copyto =
 | 
			
		||||
  match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with
 | 
			
		||||
  | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *)
 | 
			
		||||
  | Some id ->
 | 
			
		||||
      copyto.Pdf.trailerdict <-
 | 
			
		||||
        Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id;
 | 
			
		||||
      copyto.Pdf.minor <-
 | 
			
		||||
        if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1;
 | 
			
		||||
      copyto
 | 
			
		||||
 | 
			
		||||
(* \section{Remove bookmarks} *)
 | 
			
		||||
 | 
			
		||||
(* \section{Add bookmarks} *)
 | 
			
		||||
@@ -377,155 +302,6 @@ let add_bookmarks ~json verify input pdf =
 | 
			
		||||
    (*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*)
 | 
			
		||||
    Pdfmarks.add_bookmarks parsed pdf 
 | 
			
		||||
 | 
			
		||||
(* \section{Set page mode} *)
 | 
			
		||||
let set_page_mode pdf s =
 | 
			
		||||
  match s with
 | 
			
		||||
  | "UseNone" | "UseOutlines" | "UseThumbs"
 | 
			
		||||
  | "FullScreen" | "UseOC" | "UseAttachments" ->
 | 
			
		||||
      begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
      | Some catalog ->
 | 
			
		||||
          let catalog' =
 | 
			
		||||
            Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s))
 | 
			
		||||
          in
 | 
			
		||||
            let catalognum = Pdf.addobj pdf catalog' in
 | 
			
		||||
              let trailerdict' =
 | 
			
		||||
                Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
 | 
			
		||||
              in
 | 
			
		||||
                {pdf with
 | 
			
		||||
                  Pdf.root = catalognum;
 | 
			
		||||
                  Pdf.trailerdict = trailerdict'}
 | 
			
		||||
      | None -> error "bad root"
 | 
			
		||||
      end
 | 
			
		||||
  | _ -> error "Unknown page mode"
 | 
			
		||||
 | 
			
		||||
(* Set open action *)
 | 
			
		||||
let set_open_action pdf fit pagenumber =
 | 
			
		||||
  if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then
 | 
			
		||||
    raise (error "set_open_action: invalid page number")
 | 
			
		||||
  else
 | 
			
		||||
    let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in
 | 
			
		||||
      let destination =
 | 
			
		||||
        if fit then
 | 
			
		||||
          Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"]
 | 
			
		||||
        else
 | 
			
		||||
          Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null]
 | 
			
		||||
      in
 | 
			
		||||
        let open_action =
 | 
			
		||||
          Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")]
 | 
			
		||||
        in
 | 
			
		||||
          match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
          | Some catalog ->
 | 
			
		||||
              let catalog' =
 | 
			
		||||
                Pdf.add_dict_entry catalog "/OpenAction" open_action
 | 
			
		||||
              in
 | 
			
		||||
                let catalognum = Pdf.addobj pdf catalog' in
 | 
			
		||||
                  let trailerdict' =
 | 
			
		||||
                    Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
 | 
			
		||||
                  in
 | 
			
		||||
                    {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'}
 | 
			
		||||
          | None -> error "bad root"
 | 
			
		||||
 | 
			
		||||
(* \section{Set viewer preferences} *)
 | 
			
		||||
let set_viewer_preference (key, value, version) pdf =
 | 
			
		||||
  match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
  | Some catalog ->
 | 
			
		||||
      let viewer_preferences =
 | 
			
		||||
        match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
 | 
			
		||||
        | Some d -> d
 | 
			
		||||
        | None -> Pdf.Dictionary []
 | 
			
		||||
      in
 | 
			
		||||
        let viewer_preferences' =
 | 
			
		||||
          Pdf.add_dict_entry viewer_preferences key value
 | 
			
		||||
        in
 | 
			
		||||
          let catalog' =
 | 
			
		||||
            Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences'
 | 
			
		||||
          in
 | 
			
		||||
            let catalognum = Pdf.addobj pdf catalog' in
 | 
			
		||||
              let trailerdict' =
 | 
			
		||||
                Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
 | 
			
		||||
              in
 | 
			
		||||
                {pdf with
 | 
			
		||||
                  Pdf.minor = max pdf.Pdf.minor version;
 | 
			
		||||
                  Pdf.root = catalognum;
 | 
			
		||||
                  Pdf.trailerdict = trailerdict'}
 | 
			
		||||
  | None -> error "bad root"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* \section{Set page layout} *)
 | 
			
		||||
let set_page_layout pdf s =
 | 
			
		||||
  match s with
 | 
			
		||||
  | "SinglePage" | "OneColumn" | "TwoColumnLeft"
 | 
			
		||||
  | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" ->
 | 
			
		||||
      begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
      | Some catalog ->
 | 
			
		||||
          let catalog' =
 | 
			
		||||
            Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s))
 | 
			
		||||
          in
 | 
			
		||||
            let catalognum = Pdf.addobj pdf catalog' in
 | 
			
		||||
              let trailerdict' =
 | 
			
		||||
                Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
 | 
			
		||||
              in
 | 
			
		||||
                {pdf with
 | 
			
		||||
                  Pdf.root = catalognum;
 | 
			
		||||
                  Pdf.trailerdict = trailerdict'}
 | 
			
		||||
      | None -> error "bad root"
 | 
			
		||||
      end
 | 
			
		||||
  | _ -> error "Unknown page layout"
 | 
			
		||||
 | 
			
		||||
(* \section{Set or replace metadata} *)
 | 
			
		||||
let set_metadata_from_bytes keepversion data pdf =
 | 
			
		||||
  let metadata_stream =
 | 
			
		||||
    Pdf.Stream
 | 
			
		||||
      {contents =
 | 
			
		||||
        (Pdf.Dictionary
 | 
			
		||||
         ["/Length", Pdf.Integer (bytes_size data);
 | 
			
		||||
          "/Type", Pdf.Name "/Metadata";
 | 
			
		||||
          "/Subtype", Pdf.Name "/XML"],
 | 
			
		||||
         Pdf.Got data)}
 | 
			
		||||
  in
 | 
			
		||||
    let objnum = Pdf.addobj pdf metadata_stream in 
 | 
			
		||||
      let document_catalog =
 | 
			
		||||
        match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
        | Some s -> s
 | 
			
		||||
        | None -> error "Malformed PDF: No root."
 | 
			
		||||
      in
 | 
			
		||||
        let document_catalog' =
 | 
			
		||||
          Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum)
 | 
			
		||||
        in
 | 
			
		||||
          let rootnum = Pdf.addobj pdf document_catalog' in
 | 
			
		||||
            let trailerdict =
 | 
			
		||||
              Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)
 | 
			
		||||
            in
 | 
			
		||||
            {pdf with
 | 
			
		||||
               Pdf.trailerdict = trailerdict;
 | 
			
		||||
               Pdf.root = rootnum;
 | 
			
		||||
               Pdf.minor =
 | 
			
		||||
                 if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor}
 | 
			
		||||
 | 
			
		||||
let set_metadata keepversion filename pdf =
 | 
			
		||||
  let ch = open_in_bin filename in
 | 
			
		||||
    let data = mkbytes (in_channel_length ch) in
 | 
			
		||||
      for x = 0 to bytes_size data - 1 do
 | 
			
		||||
        bset data x (input_byte ch)
 | 
			
		||||
      done;
 | 
			
		||||
      set_metadata_from_bytes keepversion data pdf
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* \section{Remove metadata} *)
 | 
			
		||||
let remove_metadata pdf =
 | 
			
		||||
  match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
  | None -> error "malformed file" 
 | 
			
		||||
  | Some root ->
 | 
			
		||||
      let root' = Pdf.remove_dict_entry root "/Metadata" in
 | 
			
		||||
        let rootnum = Pdf.addobj pdf root' in
 | 
			
		||||
          {pdf with
 | 
			
		||||
             Pdf.trailerdict =
 | 
			
		||||
               Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum);
 | 
			
		||||
             Pdf.root =
 | 
			
		||||
               rootnum}
 | 
			
		||||
 | 
			
		||||
(* List bookmarks *)
 | 
			
		||||
let output_string_of_target pdf fastrefnums x =
 | 
			
		||||
  match Pdfdest.pdfobject_of_destination x with
 | 
			
		||||
@@ -584,9 +360,9 @@ let list_bookmarks ~json encoding range pdf output =
 | 
			
		||||
            replace q bs q (replace nl bs n (replace bs bs bs codepoints))
 | 
			
		||||
        in
 | 
			
		||||
          match encoding with
 | 
			
		||||
          | UTF8 -> Pdftext.utf8_of_codepoints escaped
 | 
			
		||||
          | Stripped -> process_stripped escaped
 | 
			
		||||
          | Raw -> s
 | 
			
		||||
          | Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped
 | 
			
		||||
          | Cpdfmetadata.Stripped -> process_stripped escaped
 | 
			
		||||
          | Cpdfmetadata.Raw -> s
 | 
			
		||||
    in
 | 
			
		||||
      let bookmarks = Pdfmarks.read_bookmarks pdf in
 | 
			
		||||
      let refnums = Pdf.page_reference_numbers pdf in
 | 
			
		||||
@@ -719,27 +495,6 @@ let hasbox pdf page boxname =
 | 
			
		||||
        | _ -> false
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* Print metadata *)
 | 
			
		||||
let get_metadata pdf =
 | 
			
		||||
  match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
  | None -> error "malformed file"
 | 
			
		||||
  | Some root ->
 | 
			
		||||
      match Pdf.lookup_direct pdf "/Metadata" root with
 | 
			
		||||
      | Some ((Pdf.Stream _) as s) ->
 | 
			
		||||
          Pdfcodec.decode_pdfstream pdf s;
 | 
			
		||||
          begin match s with
 | 
			
		||||
          | Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data 
 | 
			
		||||
          | _ -> assert false
 | 
			
		||||
          end
 | 
			
		||||
      | _ -> None
 | 
			
		||||
 | 
			
		||||
let print_metadata pdf =
 | 
			
		||||
  match get_metadata pdf with
 | 
			
		||||
    None -> ()
 | 
			
		||||
  | Some data ->
 | 
			
		||||
      for x = 0 to bytes_size data - 1 do
 | 
			
		||||
        Printf.printf "%c" (char_of_int (bget data x))
 | 
			
		||||
      done
 | 
			
		||||
 | 
			
		||||
(* List fonts *)
 | 
			
		||||
let list_font pdf page (name, dict) =
 | 
			
		||||
@@ -1728,7 +1483,7 @@ let stamp relative_to_cropbox position topline midline fast scale_to_fit isover
 | 
			
		||||
        let merged =
 | 
			
		||||
          {merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
 | 
			
		||||
        in
 | 
			
		||||
          let merged = copy_id true pdf merged in
 | 
			
		||||
          let merged = Cpdfmetadata.copy_id true pdf merged in
 | 
			
		||||
            let merged_pages = Pdfpage.pages_of_pagetree merged in
 | 
			
		||||
              let under_pages, over_page =
 | 
			
		||||
                all_but_last merged_pages, last merged_pages
 | 
			
		||||
@@ -1819,7 +1574,7 @@ let stamp_as_xobject pdf range over =
 | 
			
		||||
        let merged =
 | 
			
		||||
          {merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
 | 
			
		||||
        in
 | 
			
		||||
          let merged = copy_id true pdf merged in
 | 
			
		||||
          let merged = Cpdfmetadata.copy_id true pdf merged in
 | 
			
		||||
            let merged_pages = Pdfpage.pages_of_pagetree merged in
 | 
			
		||||
              let under_pages, over_page =
 | 
			
		||||
                all_but_last merged_pages, last merged_pages
 | 
			
		||||
@@ -2185,7 +1940,7 @@ let scale_contents ?(fast=false) position scale pdf range =
 | 
			
		||||
(* \section{List annotations} *)
 | 
			
		||||
let get_annotation_string encoding pdf annot =
 | 
			
		||||
  match Pdf.lookup_direct pdf "/Contents" annot with
 | 
			
		||||
  | Some (Pdf.String s) -> encode_output encoding s
 | 
			
		||||
  | Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s
 | 
			
		||||
  | _ -> ""
 | 
			
		||||
 | 
			
		||||
let print_annotation encoding pdf num s =
 | 
			
		||||
@@ -2696,415 +2451,6 @@ let twoup fast pdf =
 | 
			
		||||
        let pdf = upright all (rotate_pdf ~-90 pdf all) in
 | 
			
		||||
          scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all
 | 
			
		||||
 | 
			
		||||
(* \section{Output info} *)
 | 
			
		||||
let get_info raw pdf =
 | 
			
		||||
  let infodict =
 | 
			
		||||
    match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
 | 
			
		||||
    | Some infodict -> infodict
 | 
			
		||||
    | _ -> Pdf.Dictionary []
 | 
			
		||||
  in
 | 
			
		||||
    let getstring name =
 | 
			
		||||
      match Pdf.lookup_direct pdf name infodict with
 | 
			
		||||
      | Some (Pdf.String s) ->
 | 
			
		||||
          if raw then s else crude_de_unicode s
 | 
			
		||||
      | Some (Pdf.Boolean false) -> "False"
 | 
			
		||||
      | Some (Pdf.Boolean true) -> "True"
 | 
			
		||||
      | _ -> if name = "/Trapped" then "False" else ""
 | 
			
		||||
    in
 | 
			
		||||
      getstring
 | 
			
		||||
       
 | 
			
		||||
let get_info_utf8 pdf =
 | 
			
		||||
  let infodict =
 | 
			
		||||
    match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
 | 
			
		||||
    | Some infodict -> infodict
 | 
			
		||||
    | _ -> Pdf.Dictionary []
 | 
			
		||||
  in
 | 
			
		||||
    (function name ->
 | 
			
		||||
      match Pdf.lookup_direct pdf name infodict with
 | 
			
		||||
      | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
 | 
			
		||||
      | Some (Pdf.Boolean false) -> "False"
 | 
			
		||||
      | Some (Pdf.Boolean true) -> "True"
 | 
			
		||||
      | _ -> if name = "/Trapped" then "False" else "")
 | 
			
		||||
 | 
			
		||||
let getstring encoding pdf =
 | 
			
		||||
  match encoding with
 | 
			
		||||
  | Raw -> get_info true pdf
 | 
			
		||||
  | Stripped -> get_info false pdf
 | 
			
		||||
  | UTF8 -> get_info_utf8 pdf
 | 
			
		||||
 | 
			
		||||
let output_info encoding pdf =
 | 
			
		||||
  let getstring = getstring encoding pdf in
 | 
			
		||||
    Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor;
 | 
			
		||||
    Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf);
 | 
			
		||||
    Printf.printf "Title: %s\n" (getstring "/Title");
 | 
			
		||||
    Printf.printf "Author: %s\n" (getstring "/Author");
 | 
			
		||||
    Printf.printf "Subject: %s\n" (getstring "/Subject");
 | 
			
		||||
    Printf.printf "Keywords: %s\n" (getstring "/Keywords");
 | 
			
		||||
    Printf.printf "Creator: %s\n" (getstring "/Creator");
 | 
			
		||||
    Printf.printf "Producer: %s\n" (getstring "/Producer");
 | 
			
		||||
    Printf.printf "Created: %s\n" (getstring "/CreationDate");
 | 
			
		||||
    Printf.printf "Modified: %s\n" (getstring "/ModDate");
 | 
			
		||||
    Printf.printf "Trapped: %s\n" (getstring "/Trapped")
 | 
			
		||||
 | 
			
		||||
type xmltree =
 | 
			
		||||
    E of Cpdfxmlm.tag * xmltree list
 | 
			
		||||
  | D of string
 | 
			
		||||
 | 
			
		||||
let xmltree_of_bytes b =
 | 
			
		||||
  let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in
 | 
			
		||||
    let el tag childs = E (tag, childs)
 | 
			
		||||
    and data d = D d in
 | 
			
		||||
      Cpdfxmlm.input_doc_tree ~el ~data i
 | 
			
		||||
 | 
			
		||||
let bytes_of_xmltree t =
 | 
			
		||||
  let buf = Buffer.create 1024 in
 | 
			
		||||
  let o = Cpdfxmlm.make_output (`Buffer buf) in
 | 
			
		||||
  let frag = function
 | 
			
		||||
      E (tag, childs) -> `El (tag, childs)
 | 
			
		||||
    | D d -> `Data d
 | 
			
		||||
  in
 | 
			
		||||
    Cpdfxmlm.output_doc_tree frag o t;
 | 
			
		||||
    bytes_of_string (Buffer.contents buf)
 | 
			
		||||
 | 
			
		||||
let rec string_of_xmltree = function
 | 
			
		||||
   D d ->
 | 
			
		||||
     Printf.sprintf "DATA {%s}" d
 | 
			
		||||
 | E (tag, trees) ->
 | 
			
		||||
     Printf.sprintf "ELT (%s, %s)"
 | 
			
		||||
       (string_of_tag tag)
 | 
			
		||||
       (string_of_xmltrees trees)
 | 
			
		||||
 | 
			
		||||
and string_of_tag ((n, n'), attributes) =
 | 
			
		||||
  Printf.sprintf
 | 
			
		||||
    "NAME |%s| |%s|, ATTRIBUTES {%s}" n n'
 | 
			
		||||
    (string_of_attributes attributes)
 | 
			
		||||
 | 
			
		||||
and string_of_attribute ((n, n'), str) =
 | 
			
		||||
  Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str
 | 
			
		||||
 | 
			
		||||
and string_of_attributes attrs =
 | 
			
		||||
  fold_left
 | 
			
		||||
    (fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs)
 | 
			
		||||
 | 
			
		||||
and string_of_xmltrees trees =
 | 
			
		||||
  fold_left
 | 
			
		||||
    (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
 | 
			
		||||
 | 
			
		||||
let adobe = "http://ns.adobe.com/pdf/1.3/"
 | 
			
		||||
 | 
			
		||||
let xmp = "http://ns.adobe.com/xap/1.0/"
 | 
			
		||||
 | 
			
		||||
let dc = "http://purl.org/dc/elements/1.1/"
 | 
			
		||||
 | 
			
		||||
let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 | 
			
		||||
 | 
			
		||||
let combine_with_spaces strs =
 | 
			
		||||
  String.trim
 | 
			
		||||
    (fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs)
 | 
			
		||||
 | 
			
		||||
(* Collect all <li> elements inside a seq, bag, or alt. Combine with commas. If
 | 
			
		||||
none found, return empty string instead. *)
 | 
			
		||||
let collect_list_items = function
 | 
			
		||||
   E (((n, n'), _), elts) when
 | 
			
		||||
     n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag")
 | 
			
		||||
   ->
 | 
			
		||||
     combine_with_spaces
 | 
			
		||||
       (option_map
 | 
			
		||||
         (function
 | 
			
		||||
             E (((n, n'), _), [D d]) when n = rdf && n' = "li" ->
 | 
			
		||||
               Some d
 | 
			
		||||
           | _ -> None)
 | 
			
		||||
         elts)
 | 
			
		||||
 | _ -> ""
 | 
			
		||||
 | 
			
		||||
let collect_list_items_all all =
 | 
			
		||||
  match keep (function E _ -> true | _ -> false) all with
 | 
			
		||||
    h::_ -> Some (collect_list_items h)
 | 
			
		||||
  | [] -> None
 | 
			
		||||
 | 
			
		||||
let rec get_data_for namespace name = function
 | 
			
		||||
   D _ -> None
 | 
			
		||||
 | E (((n, n'), _), [D d]) when n = namespace && n' = name ->
 | 
			
		||||
     Some d
 | 
			
		||||
 | E (((n, n'), _), e) when n = namespace && n' = name ->
 | 
			
		||||
     collect_list_items_all e
 | 
			
		||||
 | E (_, l) ->
 | 
			
		||||
     match option_map (get_data_for namespace name) l with
 | 
			
		||||
       x :: _ -> Some x
 | 
			
		||||
     | _ -> None
 | 
			
		||||
 | 
			
		||||
let output_xmp_info encoding pdf =
 | 
			
		||||
  let print_out tree title namespace name =
 | 
			
		||||
    match get_data_for namespace name tree with
 | 
			
		||||
      None -> ()
 | 
			
		||||
    | Some data ->
 | 
			
		||||
        Printf.printf "%s: " title;
 | 
			
		||||
        print_endline data
 | 
			
		||||
  in
 | 
			
		||||
    match get_metadata pdf with
 | 
			
		||||
      None -> ()
 | 
			
		||||
    | Some metadata ->
 | 
			
		||||
        try
 | 
			
		||||
          let dtd, tree = xmltree_of_bytes metadata in
 | 
			
		||||
            print_out tree "XMP pdf:Keywords" adobe "Keywords";
 | 
			
		||||
            print_out tree "XMP pdf:Producer" adobe "Producer";
 | 
			
		||||
            print_out tree "XMP pdf:Trapped" adobe "Trapped";
 | 
			
		||||
            print_out tree "XMP pdf:Title" adobe "Title";
 | 
			
		||||
            print_out tree "XMP pdf:Creator" adobe "Creator";
 | 
			
		||||
            print_out tree "XMP pdf:Subject" adobe "Subject";
 | 
			
		||||
            print_out tree "XMP pdf:Author" adobe "Author";
 | 
			
		||||
            print_out tree "XMP pdf:CreationDate" adobe "CreationDate";
 | 
			
		||||
            print_out tree "XMP pdf:ModDate" adobe "ModDate";
 | 
			
		||||
            print_out tree "XMP xmp:CreateDate" xmp "CreateDate";
 | 
			
		||||
            print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool";
 | 
			
		||||
            print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate";
 | 
			
		||||
            print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate";
 | 
			
		||||
            print_out tree "XMP dc:title" dc "title";
 | 
			
		||||
            print_out tree "XMP dc:creator" dc "creator";
 | 
			
		||||
            print_out tree "XMP dc:subject" dc "subject";
 | 
			
		||||
            print_out tree "XMP dc:description" dc "description"
 | 
			
		||||
        with
 | 
			
		||||
          _ -> ()
 | 
			
		||||
 | 
			
		||||
(* Get XMP info equivalent of an old metadata field *)
 | 
			
		||||
let check = function
 | 
			
		||||
  "/Title" -> [(adobe, "Title"); (dc, "title")]
 | 
			
		||||
| "/Author" -> [(adobe, "Author"); (dc, "creator")]
 | 
			
		||||
| "/Subject" -> [(adobe, "Subject"); (dc, "subject")]
 | 
			
		||||
| "/Keywords" -> [(adobe, "Keywords")]
 | 
			
		||||
| "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")]
 | 
			
		||||
| "/Producer" -> [(adobe, "Producer")]
 | 
			
		||||
| "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")]
 | 
			
		||||
| "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")]
 | 
			
		||||
| _ -> failwith "Cpdf.check_name not known"
 | 
			
		||||
 | 
			
		||||
let get_xmp_info pdf name =
 | 
			
		||||
  let tocheck = check name in
 | 
			
		||||
  match get_metadata pdf with
 | 
			
		||||
    None -> ""
 | 
			
		||||
  | Some metadata ->
 | 
			
		||||
      try
 | 
			
		||||
        let _, tree = xmltree_of_bytes metadata in
 | 
			
		||||
          let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in
 | 
			
		||||
            match lose (eq "") results with
 | 
			
		||||
             x::_ -> x
 | 
			
		||||
           | [] -> ""
 | 
			
		||||
      with
 | 
			
		||||
        _ -> ""
 | 
			
		||||
 | 
			
		||||
(* Set XMP info *)
 | 
			
		||||
let rec set_xml_field kind fieldname value = function
 | 
			
		||||
  D data -> D data
 | 
			
		||||
| E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e <tag/> *)
 | 
			
		||||
    E (((n, n'), m), [D value])
 | 
			
		||||
| E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts)
 | 
			
		||||
 | 
			
		||||
let set_pdf_info_xml kind fieldname value xmldata pdf =
 | 
			
		||||
  let dtd, tree = xmltree_of_bytes xmldata in
 | 
			
		||||
  let str =
 | 
			
		||||
    match value with
 | 
			
		||||
      Pdf.String s -> s
 | 
			
		||||
    | Pdf.Boolean true -> "True"
 | 
			
		||||
    | Pdf.Boolean false -> "False"
 | 
			
		||||
    | _ -> failwith "set_pdf_info_xml: not a string"
 | 
			
		||||
  in
 | 
			
		||||
  let newtree = set_xml_field kind fieldname str tree in
 | 
			
		||||
    bytes_of_xmltree (dtd, newtree)
 | 
			
		||||
 | 
			
		||||
let set_pdf_info_xml_many changes value xmldata pdf =
 | 
			
		||||
  let xmldata = ref xmldata in
 | 
			
		||||
    iter
 | 
			
		||||
      (fun (kind, fieldname) ->
 | 
			
		||||
         xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf)
 | 
			
		||||
      changes;
 | 
			
		||||
    !xmldata
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* \section{Set an entry in the /Info dictionary} *)
 | 
			
		||||
 | 
			
		||||
(* We must parse the date to get its components, then use strftime to build the
 | 
			
		||||
 * new string in XMP format *)
 | 
			
		||||
 | 
			
		||||
type date =
 | 
			
		||||
  {mutable year : int;
 | 
			
		||||
   mutable month : int; (* 1 - 12 *)
 | 
			
		||||
   mutable day : int; (* 1 - 31 *)
 | 
			
		||||
   mutable hour : int; (* 0 - 23 *)
 | 
			
		||||
   mutable minute : int; (* 0 - 59 *)
 | 
			
		||||
   mutable second : int; (* 0 - 59 *)
 | 
			
		||||
   mutable ut_relationship : int; (* -1, 0, +1 *)
 | 
			
		||||
   mutable offset_hours : int; (* 0 - 59 *)
 | 
			
		||||
   mutable offset_minutes : int (* 0 - 59 *)}
 | 
			
		||||
 | 
			
		||||
let default_date () =
 | 
			
		||||
  {year = 0;
 | 
			
		||||
   month = 1;
 | 
			
		||||
   day = 1;
 | 
			
		||||
   hour = 0;
 | 
			
		||||
   minute = 0;
 | 
			
		||||
   second = 0;
 | 
			
		||||
   ut_relationship = 0;
 | 
			
		||||
   offset_hours = 0;
 | 
			
		||||
   offset_minutes = 0}
 | 
			
		||||
 | 
			
		||||
(* XMP date format is YYYY-MM-DDThh:mm:ssTZD *)
 | 
			
		||||
let make_xmp_date_from_components d =
 | 
			
		||||
  let tzd =
 | 
			
		||||
    if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else
 | 
			
		||||
    (if d.ut_relationship >=0 then "+" else "-") ^
 | 
			
		||||
    Printf.sprintf "%02i" d.offset_hours ^
 | 
			
		||||
    ":" ^
 | 
			
		||||
    Printf.sprintf "%02i" d.offset_minutes
 | 
			
		||||
  in 
 | 
			
		||||
    Cpdfstrftime.strftime
 | 
			
		||||
      ~time:{Cpdfstrftime._tm_sec = d.second;
 | 
			
		||||
             Cpdfstrftime._tm_min = d.minute;
 | 
			
		||||
             Cpdfstrftime._tm_hour = d.hour;
 | 
			
		||||
             Cpdfstrftime._tm_mday = d.day;
 | 
			
		||||
             Cpdfstrftime._tm_mon = d.month - 1;
 | 
			
		||||
             Cpdfstrftime._tm_year = d.year - 1900;
 | 
			
		||||
             Cpdfstrftime._tm_wday = 0;
 | 
			
		||||
             Cpdfstrftime._tm_yday = 0;
 | 
			
		||||
             Cpdfstrftime._tm_isdst = false}
 | 
			
		||||
      "%Y-%m-%dT%H:%M:%S"
 | 
			
		||||
  ^
 | 
			
		||||
    tzd
 | 
			
		||||
 | 
			
		||||
let xmp_date date =
 | 
			
		||||
  let d = default_date () in
 | 
			
		||||
  try
 | 
			
		||||
    match explode date with
 | 
			
		||||
      'D'::':'::r ->
 | 
			
		||||
        begin match r with
 | 
			
		||||
          y1::y2::y3::y4::r ->
 | 
			
		||||
            d.year <- int_of_string (implode [y1; y2; y3; y4]);
 | 
			
		||||
            begin match r with
 | 
			
		||||
              m1::m2::r ->
 | 
			
		||||
                d.month <- int_of_string (implode [m1; m2]);
 | 
			
		||||
                begin match r with
 | 
			
		||||
                  d1::d2::r ->
 | 
			
		||||
                  d.day <- int_of_string (implode [d1; d2]);
 | 
			
		||||
                  begin match r with
 | 
			
		||||
                    h1::h2::r ->
 | 
			
		||||
                    d.hour <- int_of_string (implode [h1; h2]);
 | 
			
		||||
                    begin match r with
 | 
			
		||||
                      m1::m2::r ->
 | 
			
		||||
                      d.minute <- int_of_string (implode [m1; m2]);
 | 
			
		||||
                      begin match r with
 | 
			
		||||
                       s1::s2::r ->
 | 
			
		||||
                       d.second <- int_of_string (implode [s1; s2]);
 | 
			
		||||
                         begin match r with
 | 
			
		||||
                           o::r ->
 | 
			
		||||
                           d.ut_relationship <-
 | 
			
		||||
                             if o = '+' then 1 else
 | 
			
		||||
                             if o = '-' then -1 else
 | 
			
		||||
                             0;
 | 
			
		||||
                           begin match r with
 | 
			
		||||
                             h1::h2::'\''::r ->
 | 
			
		||||
                             d.offset_hours <- int_of_string (implode [h1; h2]);
 | 
			
		||||
                             begin match r with
 | 
			
		||||
                               m1::m2::_ ->
 | 
			
		||||
                               d.offset_minutes <- int_of_string (implode [m1; m2]);
 | 
			
		||||
                               raise Exit
 | 
			
		||||
                             | _ -> raise Exit
 | 
			
		||||
                             end
 | 
			
		||||
                           | _ -> raise Exit
 | 
			
		||||
                           end
 | 
			
		||||
                         | _ -> raise Exit
 | 
			
		||||
                         end
 | 
			
		||||
                      | _ -> raise Exit
 | 
			
		||||
                      end
 | 
			
		||||
                    | _ -> raise Exit
 | 
			
		||||
                    end
 | 
			
		||||
                  | _ -> raise Exit
 | 
			
		||||
                  end
 | 
			
		||||
                | _ -> raise Exit
 | 
			
		||||
                end
 | 
			
		||||
            | _ -> raise Exit  
 | 
			
		||||
            end
 | 
			
		||||
        | _ ->
 | 
			
		||||
          Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date;
 | 
			
		||||
          make_xmp_date_from_components d
 | 
			
		||||
        end
 | 
			
		||||
    | _ ->
 | 
			
		||||
        Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date;
 | 
			
		||||
        make_xmp_date_from_components d
 | 
			
		||||
  with
 | 
			
		||||
    Exit -> make_xmp_date_from_components d
 | 
			
		||||
 | 
			
		||||
let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf =
 | 
			
		||||
  let infodict =
 | 
			
		||||
    match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
 | 
			
		||||
    | Some d -> d
 | 
			
		||||
    | None -> Pdf.Dictionary []
 | 
			
		||||
  in
 | 
			
		||||
    let infodict' = Pdf.add_dict_entry infodict key value in
 | 
			
		||||
      let objnum = Pdf.addobj pdf infodict' in
 | 
			
		||||
        if not xmp_just_set then
 | 
			
		||||
          begin
 | 
			
		||||
            pdf.Pdf.trailerdict <-
 | 
			
		||||
              Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum);
 | 
			
		||||
            pdf.Pdf.minor <-
 | 
			
		||||
              max pdf.Pdf.minor version
 | 
			
		||||
          end;
 | 
			
		||||
        if xmp_also then
 | 
			
		||||
          begin match get_metadata pdf with
 | 
			
		||||
            None -> pdf
 | 
			
		||||
          | Some xmldata ->
 | 
			
		||||
              let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in
 | 
			
		||||
              let changes, value =
 | 
			
		||||
                match key with
 | 
			
		||||
                | "/Producer" -> [(adobe, "Producer")], value
 | 
			
		||||
                | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value
 | 
			
		||||
                | "/Author" -> [(adobe, "Author")], value
 | 
			
		||||
                | "/Title" -> [(adobe, "Title"); (dc, "title")], value
 | 
			
		||||
                | "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value
 | 
			
		||||
                | "/Keywords" -> [(adobe, "Keywords")], value
 | 
			
		||||
                | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value
 | 
			
		||||
                | "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value
 | 
			
		||||
                | "/Trapped" -> [(adobe, "Trapped")], value
 | 
			
		||||
                | _ -> failwith "Unknown call to set_pdf_info"
 | 
			
		||||
              in
 | 
			
		||||
                set_metadata_from_bytes
 | 
			
		||||
                  true
 | 
			
		||||
                  (set_pdf_info_xml_many changes value xmldata pdf)
 | 
			
		||||
                  pdf
 | 
			
		||||
          end
 | 
			
		||||
       else
 | 
			
		||||
         pdf
 | 
			
		||||
 | 
			
		||||
(* Set metadata date *)
 | 
			
		||||
let set_metadata_date pdf date =
 | 
			
		||||
  match get_metadata pdf with
 | 
			
		||||
    None -> pdf
 | 
			
		||||
  | Some xmldata ->
 | 
			
		||||
      let changes= [(xmp, "MetadataDate")] in
 | 
			
		||||
      let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in
 | 
			
		||||
        set_metadata_from_bytes
 | 
			
		||||
          true
 | 
			
		||||
          (set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf)
 | 
			
		||||
          pdf
 | 
			
		||||
 | 
			
		||||
let replacements pdf =
 | 
			
		||||
  let info = get_info_utf8 pdf in
 | 
			
		||||
    [("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i));
 | 
			
		||||
     ("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i));
 | 
			
		||||
     ("PRODUCER", info "/Producer");
 | 
			
		||||
     ("CREATOR", info "/Creator");
 | 
			
		||||
     ("TITLE", info "/Title");
 | 
			
		||||
     ("SUBJECT", info "/Subject");
 | 
			
		||||
     ("AUTHOR", info "/Author");
 | 
			
		||||
     ("KEYWORDS", info "/Keywords");
 | 
			
		||||
     ("TRAPPED", info "/Trapped");
 | 
			
		||||
     ("METADATADATE", xmp_date (expand_date "now"))]
 | 
			
		||||
 | 
			
		||||
let create_metadata pdf =
 | 
			
		||||
  let xmp = ref xmp_template in
 | 
			
		||||
  iter
 | 
			
		||||
    (fun (s, r) -> xmp := string_replace_all s r !xmp)
 | 
			
		||||
    (replacements pdf);
 | 
			
		||||
  set_metadata_from_bytes false (bytes_of_string !xmp) pdf
 | 
			
		||||
 | 
			
		||||
(* \section{Blacken text} *)
 | 
			
		||||
 | 
			
		||||
(*
 | 
			
		||||
@@ -3497,9 +2843,6 @@ let draft onlyremove boxes range pdf =
 | 
			
		||||
       pagenums;
 | 
			
		||||
      Pdfpage.change_pages true !pdf (rev !pages')
 | 
			
		||||
 | 
			
		||||
let set_version v pdf =
 | 
			
		||||
  pdf.Pdf.minor <- v
 | 
			
		||||
        
 | 
			
		||||
let blank_document width height pages =
 | 
			
		||||
  let pdf_pages =
 | 
			
		||||
    map (fun () -> Pdfpage.blankpage (Pdfpaper.make Pdfunits.PdfPoint width height)) (many () pages)
 | 
			
		||||
@@ -4022,7 +3365,7 @@ let list_spot_colours pdf =
 | 
			
		||||
let add_bookmark_title filename use_title pdf =
 | 
			
		||||
  let title =
 | 
			
		||||
    if use_title then
 | 
			
		||||
      match get_info_utf8 pdf "/Title", get_xmp_info pdf "/Title" with
 | 
			
		||||
      match Cpdfmetadata.get_info_utf8 pdf "/Title", Cpdfmetadata.get_xmp_info pdf "/Title" with
 | 
			
		||||
        "", x | x, "" | _, x -> x
 | 
			
		||||
    else
 | 
			
		||||
      Filename.basename filename
 | 
			
		||||
@@ -4062,13 +3405,13 @@ let create_pdf pages pagesize =
 | 
			
		||||
 | 
			
		||||
(* Remove characters which might not make good filenames. *)
 | 
			
		||||
let remove_unsafe_characters encoding s =
 | 
			
		||||
  if encoding = Raw then s else
 | 
			
		||||
  if encoding = Cpdfmetadata.Raw then s else
 | 
			
		||||
    let chars =
 | 
			
		||||
      lose
 | 
			
		||||
        (function x ->
 | 
			
		||||
           match x with
 | 
			
		||||
           '/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true
 | 
			
		||||
           | x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Stripped) -> true
 | 
			
		||||
           | x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Cpdfmetadata.Stripped) -> true
 | 
			
		||||
           | _ -> false)
 | 
			
		||||
        (explode s)
 | 
			
		||||
    in
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										80
									
								
								cpdf.mli
									
									
									
									
									
								
							
							
						
						
									
										80
									
								
								cpdf.mli
									
									
									
									
									
								
							@@ -1,13 +1,6 @@
 | 
			
		||||
(** Coherent PDF Tools Core Routines *)
 | 
			
		||||
open Pdfutil
 | 
			
		||||
 | 
			
		||||
(** {2 Types and Exceptions} *)
 | 
			
		||||
 | 
			
		||||
(** Possible output encodings for some function. [Raw] does no processing at
 | 
			
		||||
all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8.
 | 
			
		||||
[Stripped] extracts the unicode codepoints and returns only those which
 | 
			
		||||
correspond to 7 bit ASCII. *)
 | 
			
		||||
type encoding = Raw | UTF8 | Stripped
 | 
			
		||||
 | 
			
		||||
type color =
 | 
			
		||||
  Grey of float
 | 
			
		||||
@@ -38,45 +31,6 @@ val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list
 | 
			
		||||
 | 
			
		||||
val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** {2 Metadata and settings} *)
 | 
			
		||||
 | 
			
		||||
(** [copy_id keepversion copyfrom copyto] copies the ID, if any, from
 | 
			
		||||
[copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto]
 | 
			
		||||
won't be affected. *)
 | 
			
		||||
val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
 | 
			
		||||
the PDF minor version to [version].*)
 | 
			
		||||
val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
val get_xmp_info : Pdf.t -> string -> string
 | 
			
		||||
 | 
			
		||||
(** [set_pdf_info (key, value, version)] sets the entry [key] in the
 | 
			
		||||
/ViewerPreferences directory, updating the PDF minor version to [version].*)
 | 
			
		||||
val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
 | 
			
		||||
val set_page_layout : Pdf.t -> string -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
 | 
			
		||||
val set_page_mode : Pdf.t -> string -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *)
 | 
			
		||||
val set_open_action : Pdf.t -> bool -> int -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Set the PDF version number *)
 | 
			
		||||
val set_version : int -> Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** Given a PDF, returns a function which can lookup a given dictionary entry
 | 
			
		||||
from the /Info dictionary, returning it as a UTF8 string *)
 | 
			
		||||
val get_info_utf8 : Pdf.t -> string -> string
 | 
			
		||||
 | 
			
		||||
(** Output to standard output general information about a PDF. *)
 | 
			
		||||
val output_info : encoding -> Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** Output to standard output information from any XMP metadata stream in a PDF. *)
 | 
			
		||||
val output_xmp_info : encoding -> Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** {2 Bookmarks} *)
 | 
			
		||||
 | 
			
		||||
(** [parse_bookmark_file verify pdf input] parses the bookmark file in [input].
 | 
			
		||||
@@ -90,30 +44,9 @@ val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
 | 
			
		||||
output in the format specified in cpdfmanual.pdf *)
 | 
			
		||||
val list_bookmarks : json:bool -> encoding -> int list -> Pdf.t -> Pdfio.output -> unit
 | 
			
		||||
val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit
 | 
			
		||||
 | 
			
		||||
(** {2 XML Metadata} *)
 | 
			
		||||
 | 
			
		||||
(** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *) 
 | 
			
		||||
val set_metadata : bool -> string -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** The same, but the content comes from [bytes]. *)
 | 
			
		||||
val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Remove the metadata from a file *)
 | 
			
		||||
val remove_metadata : Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Extract metadata to a [Pdfio.bytes] *)
 | 
			
		||||
val get_metadata : Pdf.t -> Pdfio.bytes option
 | 
			
		||||
 | 
			
		||||
(** Print metadate to stdout *)
 | 
			
		||||
val print_metadata : Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** Set the metadata date *)
 | 
			
		||||
val set_metadata_date : Pdf.t -> string -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Create XMP metadata from scratch *)
 | 
			
		||||
val create_metadata : Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** {2 Stamping} *)
 | 
			
		||||
 | 
			
		||||
@@ -145,9 +78,6 @@ val list_fonts : Pdf.t -> int list -> (int * string * string * string * string)
 | 
			
		||||
 | 
			
		||||
(** {2 Adding text} *)
 | 
			
		||||
 | 
			
		||||
(** Expand the string "now" to a PDF date string, ignoring any other string *)
 | 
			
		||||
val expand_date : string -> string
 | 
			
		||||
 | 
			
		||||
(** Justification of multiline text *)
 | 
			
		||||
type justification =
 | 
			
		||||
  | LeftJustify
 | 
			
		||||
@@ -281,10 +211,10 @@ val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t
 | 
			
		||||
(** {2 Annotations} *)
 | 
			
		||||
 | 
			
		||||
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *)
 | 
			
		||||
val list_annotations : json:bool -> encoding -> Pdf.t -> unit
 | 
			
		||||
val list_annotations : json:bool -> Cpdfmetadata.encoding -> Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** Return the annotations as a (pagenumber, content) list *)
 | 
			
		||||
val get_annotations : encoding -> Pdf.t -> (int * string) list
 | 
			
		||||
val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list
 | 
			
		||||
 | 
			
		||||
(** Copy the annotations on a given set of pages from a to b. b is returned. *)
 | 
			
		||||
val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t
 | 
			
		||||
@@ -375,12 +305,12 @@ val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
val create_pdf : int -> Pdfpaper.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
val name_of_spec : encoding ->
 | 
			
		||||
val name_of_spec : Cpdfmetadata.encoding ->
 | 
			
		||||
           Pdfmarks.t list ->
 | 
			
		||||
           Pdf.t -> int -> string -> int -> string -> int -> int -> string
 | 
			
		||||
 | 
			
		||||
val extract_images : string ->
 | 
			
		||||
           string ->
 | 
			
		||||
           encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
 | 
			
		||||
           Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -417,7 +417,7 @@ type args =
 | 
			
		||||
   mutable retain_numbering : bool;
 | 
			
		||||
   mutable remove_duplicate_fonts : bool;
 | 
			
		||||
   mutable remove_duplicate_streams : bool;
 | 
			
		||||
   mutable encoding : Cpdf.encoding;
 | 
			
		||||
   mutable encoding : Cpdfmetadata.encoding;
 | 
			
		||||
   mutable scale : float;
 | 
			
		||||
   mutable copyfontpage : int;
 | 
			
		||||
   mutable copyfontname : string option;
 | 
			
		||||
@@ -536,7 +536,7 @@ let args =
 | 
			
		||||
   retain_numbering = false;
 | 
			
		||||
   remove_duplicate_fonts = false;
 | 
			
		||||
   remove_duplicate_streams = false;
 | 
			
		||||
   encoding = Cpdf.Stripped;
 | 
			
		||||
   encoding = Cpdfmetadata.Stripped;
 | 
			
		||||
   scale = 1.;
 | 
			
		||||
   copyfontpage = 1;
 | 
			
		||||
   copyfontname = None;
 | 
			
		||||
@@ -655,7 +655,7 @@ let reset_arguments () =
 | 
			
		||||
  args.retain_numbering <- false;
 | 
			
		||||
  args.remove_duplicate_fonts <- false;
 | 
			
		||||
  args.remove_duplicate_streams <- false;
 | 
			
		||||
  args.encoding <- Cpdf.Stripped;
 | 
			
		||||
  args.encoding <- Cpdfmetadata.Stripped;
 | 
			
		||||
  args.scale <- 1.;
 | 
			
		||||
  args.copyfontpage <- 1;
 | 
			
		||||
  args.copyfontname <- None;
 | 
			
		||||
@@ -1779,13 +1779,13 @@ and specs =
 | 
			
		||||
       Arg.Unit setrecrypt,
 | 
			
		||||
       " Keep this file's encryption when writing");
 | 
			
		||||
   ("-raw",
 | 
			
		||||
      Arg.Unit (setencoding Cpdf.Raw),
 | 
			
		||||
      Arg.Unit (setencoding Cpdfmetadata.Raw),
 | 
			
		||||
      " Do not process text");
 | 
			
		||||
   ("-stripped",
 | 
			
		||||
      Arg.Unit (setencoding Cpdf.Stripped),
 | 
			
		||||
      Arg.Unit (setencoding Cpdfmetadata.Stripped),
 | 
			
		||||
      " Process text by simple stripping to ASCII");
 | 
			
		||||
   ("-utf8",
 | 
			
		||||
      Arg.Unit (setencoding Cpdf.UTF8),
 | 
			
		||||
      Arg.Unit (setencoding Cpdfmetadata.UTF8),
 | 
			
		||||
      " Process text by conversion to UTF8 Unicode");
 | 
			
		||||
   ("-fast",
 | 
			
		||||
      Arg.Unit setfast,
 | 
			
		||||
@@ -2724,15 +2724,15 @@ let unescape_octals s =
 | 
			
		||||
  implode (unescape_octals [] (explode s))
 | 
			
		||||
 | 
			
		||||
let process s = 
 | 
			
		||||
  if args.encoding <> Cpdf.Raw
 | 
			
		||||
  if args.encoding <> Cpdfmetadata.Raw
 | 
			
		||||
    then Pdftext.pdfdocstring_of_utf8 s
 | 
			
		||||
    else unescape_octals s
 | 
			
		||||
 | 
			
		||||
let set_producer s pdf =
 | 
			
		||||
  ignore (Cpdf.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf)
 | 
			
		||||
  ignore (Cpdfmetadata.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf)
 | 
			
		||||
 | 
			
		||||
let set_creator s pdf =
 | 
			
		||||
  ignore (Cpdf.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf)
 | 
			
		||||
  ignore (Cpdfmetadata.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf)
 | 
			
		||||
 | 
			
		||||
let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outname =
 | 
			
		||||
  if args.producer <> None then set_producer (unopt args.producer) pdf;
 | 
			
		||||
@@ -3146,8 +3146,8 @@ let go () =
 | 
			
		||||
        if inname <> "" then
 | 
			
		||||
          Printf.printf "Linearized: %b\n" (Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname)));
 | 
			
		||||
        let pdf = decrypt_if_necessary input (Some Info) pdf in
 | 
			
		||||
          Cpdf.output_info args.encoding pdf;
 | 
			
		||||
          Cpdf.output_xmp_info args.encoding pdf
 | 
			
		||||
          Cpdfmetadata.output_info args.encoding pdf;
 | 
			
		||||
          Cpdfmetadata.output_xmp_info args.encoding pdf
 | 
			
		||||
  | Some PageInfo ->
 | 
			
		||||
      begin match args.inputs, args.out with
 | 
			
		||||
      | (_, pagespec, _, _, _, _)::_, _ ->
 | 
			
		||||
@@ -3157,7 +3157,7 @@ let go () =
 | 
			
		||||
      | _ -> error "list-bookmarks: bad command line"
 | 
			
		||||
      end
 | 
			
		||||
  | Some Metadata ->
 | 
			
		||||
      Cpdf.print_metadata (get_single_pdf (Some Metadata) true)
 | 
			
		||||
      Cpdfmetadata.print_metadata (get_single_pdf (Some Metadata) true)
 | 
			
		||||
  | Some Fonts ->
 | 
			
		||||
      begin match args.inputs, args.out with
 | 
			
		||||
      | (_, pagespec, _, _, _, _)::_, _ ->
 | 
			
		||||
@@ -3357,14 +3357,14 @@ let go () =
 | 
			
		||||
          | SetCreate _ | SetModify _ | SetCreator _ | SetProducer _
 | 
			
		||||
          | SetTrapped | SetUntrapped) as op) ->
 | 
			
		||||
      let key, value, version  =
 | 
			
		||||
        let f s = if args.encoding <> Cpdf.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in
 | 
			
		||||
        let f s = if args.encoding <> Cpdfmetadata.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in
 | 
			
		||||
          match op with
 | 
			
		||||
          | SetAuthor s -> "/Author", Pdf.String (f s), 0
 | 
			
		||||
          | SetTitle s -> "/Title", Pdf.String (f s), 1
 | 
			
		||||
          | SetSubject s -> "/Subject", Pdf.String (f s), 1
 | 
			
		||||
          | SetKeywords s -> "/Keywords", Pdf.String (f s), 1
 | 
			
		||||
          | SetCreate s -> "/CreationDate", Pdf.String (Cpdf.expand_date s), 0
 | 
			
		||||
          | SetModify s -> "/ModDate", Pdf.String (Cpdf.expand_date s), 0
 | 
			
		||||
          | SetCreate s -> "/CreationDate", Pdf.String (Cpdfmetadata.expand_date s), 0
 | 
			
		||||
          | SetModify s -> "/ModDate", Pdf.String (Cpdfmetadata.expand_date s), 0
 | 
			
		||||
          | SetCreator s -> "/Creator", Pdf.String (f s), 0
 | 
			
		||||
          | SetProducer s -> "/Producer", Pdf.String (f s), 0
 | 
			
		||||
          | SetTrapped -> "/Trapped", Pdf.Boolean true, 3
 | 
			
		||||
@@ -3374,12 +3374,12 @@ let go () =
 | 
			
		||||
        let pdf = get_single_pdf args.op false in
 | 
			
		||||
          let version = if args.keepversion then pdf.Pdf.minor else version in
 | 
			
		||||
            write_pdf false
 | 
			
		||||
              (Cpdf.set_pdf_info 
 | 
			
		||||
              (Cpdfmetadata.set_pdf_info 
 | 
			
		||||
                 ~xmp_also:args.alsosetxml
 | 
			
		||||
                 ~xmp_just_set:args.justsetxml
 | 
			
		||||
                 (key, value, version) pdf)
 | 
			
		||||
  | Some (SetMetadataDate date) ->
 | 
			
		||||
      write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date)
 | 
			
		||||
      write_pdf false (Cpdfmetadata.set_metadata_date (get_single_pdf args.op false) date)
 | 
			
		||||
  | Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _
 | 
			
		||||
          | FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) ->
 | 
			
		||||
      begin match args.out with
 | 
			
		||||
@@ -3396,20 +3396,20 @@ let go () =
 | 
			
		||||
        in
 | 
			
		||||
      let pdf = get_single_pdf args.op false in
 | 
			
		||||
     let version = if args.keepversion then pdf.Pdf.minor else version in
 | 
			
		||||
          write_pdf false (Cpdf.set_viewer_preference (key, value, version) pdf)
 | 
			
		||||
          write_pdf false (Cpdfmetadata.set_viewer_preference (key, value, version) pdf)
 | 
			
		||||
      end
 | 
			
		||||
  | Some (OpenAtPage str) ->
 | 
			
		||||
      let pdf = get_single_pdf args.op false in
 | 
			
		||||
      let range = parse_pagespec_allow_empty pdf str in
 | 
			
		||||
      let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
 | 
			
		||||
        write_pdf false (Cpdf.set_open_action pdf false n)
 | 
			
		||||
        write_pdf false (Cpdfmetadata.set_open_action pdf false n)
 | 
			
		||||
  | Some (OpenAtPageFit str) ->
 | 
			
		||||
      let pdf = get_single_pdf args.op false in
 | 
			
		||||
      let range = parse_pagespec_allow_empty pdf str in
 | 
			
		||||
      let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
 | 
			
		||||
        write_pdf false (Cpdf.set_open_action pdf true n)
 | 
			
		||||
        write_pdf false (Cpdfmetadata.set_open_action pdf true n)
 | 
			
		||||
  | Some (SetMetadata metadata_file) ->
 | 
			
		||||
      write_pdf false (Cpdf.set_metadata args.keepversion metadata_file (get_single_pdf args.op false))
 | 
			
		||||
      write_pdf false (Cpdfmetadata.set_metadata args.keepversion metadata_file (get_single_pdf args.op false))
 | 
			
		||||
  | Some (SetVersion v) ->
 | 
			
		||||
      let pdf = get_single_pdf args.op false in
 | 
			
		||||
      let pdf =
 | 
			
		||||
@@ -3419,9 +3419,9 @@ let go () =
 | 
			
		||||
      in
 | 
			
		||||
         write_pdf false pdf
 | 
			
		||||
  | Some (SetPageLayout s) ->
 | 
			
		||||
      write_pdf false (Cpdf.set_page_layout (get_single_pdf args.op false) s)
 | 
			
		||||
      write_pdf false (Cpdfmetadata.set_page_layout (get_single_pdf args.op false) s)
 | 
			
		||||
  | Some (SetPageMode s) ->
 | 
			
		||||
      write_pdf false (Cpdf.set_page_mode (get_single_pdf args.op false) s)
 | 
			
		||||
      write_pdf false (Cpdfmetadata.set_page_mode (get_single_pdf args.op false) s)
 | 
			
		||||
  | Some Split ->
 | 
			
		||||
      begin match args.inputs, args.out with
 | 
			
		||||
        | [(f, ranges, _, _, _, _)], File output_spec ->
 | 
			
		||||
@@ -3514,7 +3514,7 @@ let go () =
 | 
			
		||||
      begin match args.inputs with
 | 
			
		||||
      | [(k, _, u, o, _, _) as input] ->
 | 
			
		||||
          let pdf =
 | 
			
		||||
            Cpdf.copy_id
 | 
			
		||||
            Cpdfmetadata.copy_id
 | 
			
		||||
              args.keepversion
 | 
			
		||||
              (pdfread_pdf_of_file (optstring u) (optstring o) getfrom)
 | 
			
		||||
              (get_pdf_from_input_kind input args.op k)
 | 
			
		||||
@@ -3765,7 +3765,7 @@ let go () =
 | 
			
		||||
      args.recrypt <- false;
 | 
			
		||||
      write_pdf false (get_single_pdf args.op false)
 | 
			
		||||
  | Some RemoveMetadata ->
 | 
			
		||||
      write_pdf false (Cpdf.remove_metadata (get_single_pdf args.op false))
 | 
			
		||||
      write_pdf false (Cpdfmetadata.remove_metadata (get_single_pdf args.op false))
 | 
			
		||||
  | Some ExtractImages ->
 | 
			
		||||
      let output_spec =
 | 
			
		||||
        begin match args.out with
 | 
			
		||||
@@ -3838,7 +3838,7 @@ let go () =
 | 
			
		||||
          write_pdf false (Cpdf.remove_clipping pdf range)
 | 
			
		||||
  | Some CreateMetadata ->
 | 
			
		||||
      let pdf = get_single_pdf args.op false in
 | 
			
		||||
        write_pdf false (Cpdf.create_metadata pdf)
 | 
			
		||||
        write_pdf false (Cpdfmetadata.create_metadata pdf)
 | 
			
		||||
  | Some EmbedMissingFonts ->
 | 
			
		||||
      let fi =
 | 
			
		||||
        match args.inputs with
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										661
									
								
								cpdfmetadata.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										661
									
								
								cpdfmetadata.ml
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,661 @@
 | 
			
		||||
open Pdfutil
 | 
			
		||||
open Pdfio
 | 
			
		||||
open Cpdferror
 | 
			
		||||
 | 
			
		||||
type encoding =
 | 
			
		||||
  | Raw
 | 
			
		||||
  | UTF8
 | 
			
		||||
  | Stripped
 | 
			
		||||
 | 
			
		||||
(* Just strip everything which isn't 7 bit ASCII *)
 | 
			
		||||
let crude_de_unicode s =
 | 
			
		||||
  implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s)))
 | 
			
		||||
 | 
			
		||||
let encode_output enc s =
 | 
			
		||||
  match enc with
 | 
			
		||||
  | Raw -> s
 | 
			
		||||
  | UTF8 -> Pdftext.utf8_of_pdfdocstring s
 | 
			
		||||
  | Stripped -> crude_de_unicode s
 | 
			
		||||
 | 
			
		||||
let xmp_template =
 | 
			
		||||
{|<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
 | 
			
		||||
 | 
			
		||||
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
 | 
			
		||||
 xmlns:iX='http://ns.adobe.com/iX/1.0/'>
 | 
			
		||||
 | 
			
		||||
 <rdf:Description about=''
 | 
			
		||||
  xmlns='http://ns.adobe.com/pdf/1.3/'
 | 
			
		||||
  xmlns:pdf='http://ns.adobe.com/pdf/1.3/'>
 | 
			
		||||
  <pdf:CreationDate>CREATEDATE</pdf:CreationDate>
 | 
			
		||||
  <pdf:ModDate>MODDATE</pdf:ModDate>
 | 
			
		||||
  <pdf:Producer>PRODUCER</pdf:Producer>
 | 
			
		||||
  <pdf:Creator>CREATOR</pdf:Creator>
 | 
			
		||||
  <pdf:Title>TITLE</pdf:Title>
 | 
			
		||||
  <pdf:Subject>SUBJECT</pdf:Subject>
 | 
			
		||||
  <pdf:Author>AUTHOR</pdf:Author>
 | 
			
		||||
  <pdf:Keywords>KEYWORDS</pdf:Keywords>
 | 
			
		||||
  <pdf:Trapped>TRAPPED</pdf:Trapped>
 | 
			
		||||
 </rdf:Description>
 | 
			
		||||
 | 
			
		||||
 <rdf:Description about=''
 | 
			
		||||
  xmlns='http://ns.adobe.com/xap/1.0/'
 | 
			
		||||
  xmlns:xap='http://ns.adobe.com/xap/1.0/'>
 | 
			
		||||
   <xap:CreateDate>CREATEDATE</xap:CreateDate>
 | 
			
		||||
   <xap:CreatorTool>CREATOR</xap:CreatorTool>
 | 
			
		||||
   <xap:ModifyDate>MODDATE</xap:ModifyDate>
 | 
			
		||||
   <xap:MetadataDate>METADATADATE</xap:MetadataDate>
 | 
			
		||||
 </rdf:Description>
 | 
			
		||||
 | 
			
		||||
 <rdf:Description about=''
 | 
			
		||||
  xmlns='http://purl.org/dc/elements/1.1/'
 | 
			
		||||
  xmlns:dc='http://purl.org/dc/elements/1.1/'>
 | 
			
		||||
   <dc:title>TITLE</dc:title>
 | 
			
		||||
 </rdf:Description>
 | 
			
		||||
 | 
			
		||||
</rdf:RDF>
 | 
			
		||||
 | 
			
		||||
<?xpacket end='r'?>|}
 | 
			
		||||
 | 
			
		||||
(* \section{Set or replace metadata} *)
 | 
			
		||||
let set_metadata_from_bytes keepversion data pdf =
 | 
			
		||||
  let metadata_stream =
 | 
			
		||||
    Pdf.Stream
 | 
			
		||||
      {contents =
 | 
			
		||||
        (Pdf.Dictionary
 | 
			
		||||
         ["/Length", Pdf.Integer (bytes_size data);
 | 
			
		||||
          "/Type", Pdf.Name "/Metadata";
 | 
			
		||||
          "/Subtype", Pdf.Name "/XML"],
 | 
			
		||||
         Pdf.Got data)}
 | 
			
		||||
  in
 | 
			
		||||
    let objnum = Pdf.addobj pdf metadata_stream in 
 | 
			
		||||
      let document_catalog =
 | 
			
		||||
        match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
        | Some s -> s
 | 
			
		||||
        | None -> error "Malformed PDF: No root."
 | 
			
		||||
      in
 | 
			
		||||
        let document_catalog' =
 | 
			
		||||
          Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum)
 | 
			
		||||
        in
 | 
			
		||||
          let rootnum = Pdf.addobj pdf document_catalog' in
 | 
			
		||||
            let trailerdict =
 | 
			
		||||
              Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)
 | 
			
		||||
            in
 | 
			
		||||
            {pdf with
 | 
			
		||||
               Pdf.trailerdict = trailerdict;
 | 
			
		||||
               Pdf.root = rootnum;
 | 
			
		||||
               Pdf.minor =
 | 
			
		||||
                 if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor}
 | 
			
		||||
 | 
			
		||||
let set_metadata keepversion filename pdf =
 | 
			
		||||
  let ch = open_in_bin filename in
 | 
			
		||||
    let data = mkbytes (in_channel_length ch) in
 | 
			
		||||
      for x = 0 to bytes_size data - 1 do
 | 
			
		||||
        bset data x (input_byte ch)
 | 
			
		||||
      done;
 | 
			
		||||
      set_metadata_from_bytes keepversion data pdf
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* \section{Remove metadata} *)
 | 
			
		||||
let remove_metadata pdf =
 | 
			
		||||
  match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
  | None -> error "malformed file" 
 | 
			
		||||
  | Some root ->
 | 
			
		||||
      let root' = Pdf.remove_dict_entry root "/Metadata" in
 | 
			
		||||
        let rootnum = Pdf.addobj pdf root' in
 | 
			
		||||
          {pdf with
 | 
			
		||||
             Pdf.trailerdict =
 | 
			
		||||
               Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum);
 | 
			
		||||
             Pdf.root =
 | 
			
		||||
               rootnum}
 | 
			
		||||
(* Print metadata *)
 | 
			
		||||
let get_metadata pdf =
 | 
			
		||||
  match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
  | None -> error "malformed file"
 | 
			
		||||
  | Some root ->
 | 
			
		||||
      match Pdf.lookup_direct pdf "/Metadata" root with
 | 
			
		||||
      | Some ((Pdf.Stream _) as s) ->
 | 
			
		||||
          Pdfcodec.decode_pdfstream pdf s;
 | 
			
		||||
          begin match s with
 | 
			
		||||
          | Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data 
 | 
			
		||||
          | _ -> assert false
 | 
			
		||||
          end
 | 
			
		||||
      | _ -> None
 | 
			
		||||
 | 
			
		||||
let print_metadata pdf =
 | 
			
		||||
  match get_metadata pdf with
 | 
			
		||||
    None -> ()
 | 
			
		||||
  | Some data ->
 | 
			
		||||
      for x = 0 to bytes_size data - 1 do
 | 
			
		||||
        Printf.printf "%c" (char_of_int (bget data x))
 | 
			
		||||
      done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
let get_info raw pdf =
 | 
			
		||||
  let infodict =
 | 
			
		||||
    match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
 | 
			
		||||
    | Some infodict -> infodict
 | 
			
		||||
    | _ -> Pdf.Dictionary []
 | 
			
		||||
  in
 | 
			
		||||
    let getstring name =
 | 
			
		||||
      match Pdf.lookup_direct pdf name infodict with
 | 
			
		||||
      | Some (Pdf.String s) ->
 | 
			
		||||
          if raw then s else crude_de_unicode s
 | 
			
		||||
      | Some (Pdf.Boolean false) -> "False"
 | 
			
		||||
      | Some (Pdf.Boolean true) -> "True"
 | 
			
		||||
      | _ -> if name = "/Trapped" then "False" else ""
 | 
			
		||||
    in
 | 
			
		||||
      getstring
 | 
			
		||||
       
 | 
			
		||||
let get_info_utf8 pdf =
 | 
			
		||||
  let infodict =
 | 
			
		||||
    match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
 | 
			
		||||
    | Some infodict -> infodict
 | 
			
		||||
    | _ -> Pdf.Dictionary []
 | 
			
		||||
  in
 | 
			
		||||
    (function name ->
 | 
			
		||||
      match Pdf.lookup_direct pdf name infodict with
 | 
			
		||||
      | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
 | 
			
		||||
      | Some (Pdf.Boolean false) -> "False"
 | 
			
		||||
      | Some (Pdf.Boolean true) -> "True"
 | 
			
		||||
      | _ -> if name = "/Trapped" then "False" else "")
 | 
			
		||||
 | 
			
		||||
let getstring encoding pdf =
 | 
			
		||||
  match encoding with
 | 
			
		||||
  | Raw -> get_info true pdf
 | 
			
		||||
  | Stripped -> get_info false pdf
 | 
			
		||||
  | UTF8 -> get_info_utf8 pdf
 | 
			
		||||
 | 
			
		||||
let output_info encoding pdf =
 | 
			
		||||
  let getstring = getstring encoding pdf in
 | 
			
		||||
    Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor;
 | 
			
		||||
    Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf);
 | 
			
		||||
    Printf.printf "Title: %s\n" (getstring "/Title");
 | 
			
		||||
    Printf.printf "Author: %s\n" (getstring "/Author");
 | 
			
		||||
    Printf.printf "Subject: %s\n" (getstring "/Subject");
 | 
			
		||||
    Printf.printf "Keywords: %s\n" (getstring "/Keywords");
 | 
			
		||||
    Printf.printf "Creator: %s\n" (getstring "/Creator");
 | 
			
		||||
    Printf.printf "Producer: %s\n" (getstring "/Producer");
 | 
			
		||||
    Printf.printf "Created: %s\n" (getstring "/CreationDate");
 | 
			
		||||
    Printf.printf "Modified: %s\n" (getstring "/ModDate");
 | 
			
		||||
    Printf.printf "Trapped: %s\n" (getstring "/Trapped")
 | 
			
		||||
 | 
			
		||||
type xmltree =
 | 
			
		||||
    E of Cpdfxmlm.tag * xmltree list
 | 
			
		||||
  | D of string
 | 
			
		||||
 | 
			
		||||
let xmltree_of_bytes b =
 | 
			
		||||
  let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in
 | 
			
		||||
    let el tag childs = E (tag, childs)
 | 
			
		||||
    and data d = D d in
 | 
			
		||||
      Cpdfxmlm.input_doc_tree ~el ~data i
 | 
			
		||||
 | 
			
		||||
let bytes_of_xmltree t =
 | 
			
		||||
  let buf = Buffer.create 1024 in
 | 
			
		||||
  let o = Cpdfxmlm.make_output (`Buffer buf) in
 | 
			
		||||
  let frag = function
 | 
			
		||||
      E (tag, childs) -> `El (tag, childs)
 | 
			
		||||
    | D d -> `Data d
 | 
			
		||||
  in
 | 
			
		||||
    Cpdfxmlm.output_doc_tree frag o t;
 | 
			
		||||
    bytes_of_string (Buffer.contents buf)
 | 
			
		||||
 | 
			
		||||
let rec string_of_xmltree = function
 | 
			
		||||
   D d ->
 | 
			
		||||
     Printf.sprintf "DATA {%s}" d
 | 
			
		||||
 | E (tag, trees) ->
 | 
			
		||||
     Printf.sprintf "ELT (%s, %s)"
 | 
			
		||||
       (string_of_tag tag)
 | 
			
		||||
       (string_of_xmltrees trees)
 | 
			
		||||
 | 
			
		||||
and string_of_tag ((n, n'), attributes) =
 | 
			
		||||
  Printf.sprintf
 | 
			
		||||
    "NAME |%s| |%s|, ATTRIBUTES {%s}" n n'
 | 
			
		||||
    (string_of_attributes attributes)
 | 
			
		||||
 | 
			
		||||
and string_of_attribute ((n, n'), str) =
 | 
			
		||||
  Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str
 | 
			
		||||
 | 
			
		||||
and string_of_attributes attrs =
 | 
			
		||||
  fold_left
 | 
			
		||||
    (fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs)
 | 
			
		||||
 | 
			
		||||
and string_of_xmltrees trees =
 | 
			
		||||
  fold_left
 | 
			
		||||
    (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
 | 
			
		||||
 | 
			
		||||
let adobe = "http://ns.adobe.com/pdf/1.3/"
 | 
			
		||||
 | 
			
		||||
let xmp = "http://ns.adobe.com/xap/1.0/"
 | 
			
		||||
 | 
			
		||||
let dc = "http://purl.org/dc/elements/1.1/"
 | 
			
		||||
 | 
			
		||||
let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 | 
			
		||||
 | 
			
		||||
let combine_with_spaces strs =
 | 
			
		||||
  String.trim
 | 
			
		||||
    (fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs)
 | 
			
		||||
 | 
			
		||||
(* Collect all <li> elements inside a seq, bag, or alt. Combine with commas. If
 | 
			
		||||
none found, return empty string instead. *)
 | 
			
		||||
let collect_list_items = function
 | 
			
		||||
   E (((n, n'), _), elts) when
 | 
			
		||||
     n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag")
 | 
			
		||||
   ->
 | 
			
		||||
     combine_with_spaces
 | 
			
		||||
       (option_map
 | 
			
		||||
         (function
 | 
			
		||||
             E (((n, n'), _), [D d]) when n = rdf && n' = "li" ->
 | 
			
		||||
               Some d
 | 
			
		||||
           | _ -> None)
 | 
			
		||||
         elts)
 | 
			
		||||
 | _ -> ""
 | 
			
		||||
 | 
			
		||||
let collect_list_items_all all =
 | 
			
		||||
  match keep (function E _ -> true | _ -> false) all with
 | 
			
		||||
    h::_ -> Some (collect_list_items h)
 | 
			
		||||
  | [] -> None
 | 
			
		||||
 | 
			
		||||
let rec get_data_for namespace name = function
 | 
			
		||||
   D _ -> None
 | 
			
		||||
 | E (((n, n'), _), [D d]) when n = namespace && n' = name ->
 | 
			
		||||
     Some d
 | 
			
		||||
 | E (((n, n'), _), e) when n = namespace && n' = name ->
 | 
			
		||||
     collect_list_items_all e
 | 
			
		||||
 | E (_, l) ->
 | 
			
		||||
     match option_map (get_data_for namespace name) l with
 | 
			
		||||
       x :: _ -> Some x
 | 
			
		||||
     | _ -> None
 | 
			
		||||
 | 
			
		||||
let output_xmp_info encoding pdf =
 | 
			
		||||
  let print_out tree title namespace name =
 | 
			
		||||
    match get_data_for namespace name tree with
 | 
			
		||||
      None -> ()
 | 
			
		||||
    | Some data ->
 | 
			
		||||
        Printf.printf "%s: " title;
 | 
			
		||||
        print_endline data
 | 
			
		||||
  in
 | 
			
		||||
    match get_metadata pdf with
 | 
			
		||||
      None -> ()
 | 
			
		||||
    | Some metadata ->
 | 
			
		||||
        try
 | 
			
		||||
          let dtd, tree = xmltree_of_bytes metadata in
 | 
			
		||||
            print_out tree "XMP pdf:Keywords" adobe "Keywords";
 | 
			
		||||
            print_out tree "XMP pdf:Producer" adobe "Producer";
 | 
			
		||||
            print_out tree "XMP pdf:Trapped" adobe "Trapped";
 | 
			
		||||
            print_out tree "XMP pdf:Title" adobe "Title";
 | 
			
		||||
            print_out tree "XMP pdf:Creator" adobe "Creator";
 | 
			
		||||
            print_out tree "XMP pdf:Subject" adobe "Subject";
 | 
			
		||||
            print_out tree "XMP pdf:Author" adobe "Author";
 | 
			
		||||
            print_out tree "XMP pdf:CreationDate" adobe "CreationDate";
 | 
			
		||||
            print_out tree "XMP pdf:ModDate" adobe "ModDate";
 | 
			
		||||
            print_out tree "XMP xmp:CreateDate" xmp "CreateDate";
 | 
			
		||||
            print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool";
 | 
			
		||||
            print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate";
 | 
			
		||||
            print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate";
 | 
			
		||||
            print_out tree "XMP dc:title" dc "title";
 | 
			
		||||
            print_out tree "XMP dc:creator" dc "creator";
 | 
			
		||||
            print_out tree "XMP dc:subject" dc "subject";
 | 
			
		||||
            print_out tree "XMP dc:description" dc "description"
 | 
			
		||||
        with
 | 
			
		||||
          _ -> ()
 | 
			
		||||
 | 
			
		||||
(* Get XMP info equivalent of an old metadata field *)
 | 
			
		||||
let check = function
 | 
			
		||||
  "/Title" -> [(adobe, "Title"); (dc, "title")]
 | 
			
		||||
| "/Author" -> [(adobe, "Author"); (dc, "creator")]
 | 
			
		||||
| "/Subject" -> [(adobe, "Subject"); (dc, "subject")]
 | 
			
		||||
| "/Keywords" -> [(adobe, "Keywords")]
 | 
			
		||||
| "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")]
 | 
			
		||||
| "/Producer" -> [(adobe, "Producer")]
 | 
			
		||||
| "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")]
 | 
			
		||||
| "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")]
 | 
			
		||||
| _ -> failwith "Cpdf.check_name not known"
 | 
			
		||||
 | 
			
		||||
let get_xmp_info pdf name =
 | 
			
		||||
  let tocheck = check name in
 | 
			
		||||
  match get_metadata pdf with
 | 
			
		||||
    None -> ""
 | 
			
		||||
  | Some metadata ->
 | 
			
		||||
      try
 | 
			
		||||
        let _, tree = xmltree_of_bytes metadata in
 | 
			
		||||
          let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in
 | 
			
		||||
            match lose (eq "") results with
 | 
			
		||||
             x::_ -> x
 | 
			
		||||
           | [] -> ""
 | 
			
		||||
      with
 | 
			
		||||
        _ -> ""
 | 
			
		||||
 | 
			
		||||
(* Set XMP info *)
 | 
			
		||||
let rec set_xml_field kind fieldname value = function
 | 
			
		||||
  D data -> D data
 | 
			
		||||
| E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e <tag/> *)
 | 
			
		||||
    E (((n, n'), m), [D value])
 | 
			
		||||
| E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts)
 | 
			
		||||
 | 
			
		||||
let set_pdf_info_xml kind fieldname value xmldata pdf =
 | 
			
		||||
  let dtd, tree = xmltree_of_bytes xmldata in
 | 
			
		||||
  let str =
 | 
			
		||||
    match value with
 | 
			
		||||
      Pdf.String s -> s
 | 
			
		||||
    | Pdf.Boolean true -> "True"
 | 
			
		||||
    | Pdf.Boolean false -> "False"
 | 
			
		||||
    | _ -> failwith "set_pdf_info_xml: not a string"
 | 
			
		||||
  in
 | 
			
		||||
  let newtree = set_xml_field kind fieldname str tree in
 | 
			
		||||
    bytes_of_xmltree (dtd, newtree)
 | 
			
		||||
 | 
			
		||||
let set_pdf_info_xml_many changes value xmldata pdf =
 | 
			
		||||
  let xmldata = ref xmldata in
 | 
			
		||||
    iter
 | 
			
		||||
      (fun (kind, fieldname) ->
 | 
			
		||||
         xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf)
 | 
			
		||||
      changes;
 | 
			
		||||
    !xmldata
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* \section{Set an entry in the /Info dictionary} *)
 | 
			
		||||
 | 
			
		||||
(* We must parse the date to get its components, then use strftime to build the
 | 
			
		||||
 * new string in XMP format *)
 | 
			
		||||
 | 
			
		||||
type date =
 | 
			
		||||
  {mutable year : int;
 | 
			
		||||
   mutable month : int; (* 1 - 12 *)
 | 
			
		||||
   mutable day : int; (* 1 - 31 *)
 | 
			
		||||
   mutable hour : int; (* 0 - 23 *)
 | 
			
		||||
   mutable minute : int; (* 0 - 59 *)
 | 
			
		||||
   mutable second : int; (* 0 - 59 *)
 | 
			
		||||
   mutable ut_relationship : int; (* -1, 0, +1 *)
 | 
			
		||||
   mutable offset_hours : int; (* 0 - 59 *)
 | 
			
		||||
   mutable offset_minutes : int (* 0 - 59 *)}
 | 
			
		||||
 | 
			
		||||
let default_date () =
 | 
			
		||||
  {year = 0;
 | 
			
		||||
   month = 1;
 | 
			
		||||
   day = 1;
 | 
			
		||||
   hour = 0;
 | 
			
		||||
   minute = 0;
 | 
			
		||||
   second = 0;
 | 
			
		||||
   ut_relationship = 0;
 | 
			
		||||
   offset_hours = 0;
 | 
			
		||||
   offset_minutes = 0}
 | 
			
		||||
 | 
			
		||||
(* XMP date format is YYYY-MM-DDThh:mm:ssTZD *)
 | 
			
		||||
let make_xmp_date_from_components d =
 | 
			
		||||
  let tzd =
 | 
			
		||||
    if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else
 | 
			
		||||
    (if d.ut_relationship >=0 then "+" else "-") ^
 | 
			
		||||
    Printf.sprintf "%02i" d.offset_hours ^
 | 
			
		||||
    ":" ^
 | 
			
		||||
    Printf.sprintf "%02i" d.offset_minutes
 | 
			
		||||
  in 
 | 
			
		||||
    Cpdfstrftime.strftime
 | 
			
		||||
      ~time:{Cpdfstrftime._tm_sec = d.second;
 | 
			
		||||
             Cpdfstrftime._tm_min = d.minute;
 | 
			
		||||
             Cpdfstrftime._tm_hour = d.hour;
 | 
			
		||||
             Cpdfstrftime._tm_mday = d.day;
 | 
			
		||||
             Cpdfstrftime._tm_mon = d.month - 1;
 | 
			
		||||
             Cpdfstrftime._tm_year = d.year - 1900;
 | 
			
		||||
             Cpdfstrftime._tm_wday = 0;
 | 
			
		||||
             Cpdfstrftime._tm_yday = 0;
 | 
			
		||||
             Cpdfstrftime._tm_isdst = false}
 | 
			
		||||
      "%Y-%m-%dT%H:%M:%S"
 | 
			
		||||
  ^
 | 
			
		||||
    tzd
 | 
			
		||||
 | 
			
		||||
let xmp_date date =
 | 
			
		||||
  let d = default_date () in
 | 
			
		||||
  try
 | 
			
		||||
    match explode date with
 | 
			
		||||
      'D'::':'::r ->
 | 
			
		||||
        begin match r with
 | 
			
		||||
          y1::y2::y3::y4::r ->
 | 
			
		||||
            d.year <- int_of_string (implode [y1; y2; y3; y4]);
 | 
			
		||||
            begin match r with
 | 
			
		||||
              m1::m2::r ->
 | 
			
		||||
                d.month <- int_of_string (implode [m1; m2]);
 | 
			
		||||
                begin match r with
 | 
			
		||||
                  d1::d2::r ->
 | 
			
		||||
                  d.day <- int_of_string (implode [d1; d2]);
 | 
			
		||||
                  begin match r with
 | 
			
		||||
                    h1::h2::r ->
 | 
			
		||||
                    d.hour <- int_of_string (implode [h1; h2]);
 | 
			
		||||
                    begin match r with
 | 
			
		||||
                      m1::m2::r ->
 | 
			
		||||
                      d.minute <- int_of_string (implode [m1; m2]);
 | 
			
		||||
                      begin match r with
 | 
			
		||||
                       s1::s2::r ->
 | 
			
		||||
                       d.second <- int_of_string (implode [s1; s2]);
 | 
			
		||||
                         begin match r with
 | 
			
		||||
                           o::r ->
 | 
			
		||||
                           d.ut_relationship <-
 | 
			
		||||
                             if o = '+' then 1 else
 | 
			
		||||
                             if o = '-' then -1 else
 | 
			
		||||
                             0;
 | 
			
		||||
                           begin match r with
 | 
			
		||||
                             h1::h2::'\''::r ->
 | 
			
		||||
                             d.offset_hours <- int_of_string (implode [h1; h2]);
 | 
			
		||||
                             begin match r with
 | 
			
		||||
                               m1::m2::_ ->
 | 
			
		||||
                               d.offset_minutes <- int_of_string (implode [m1; m2]);
 | 
			
		||||
                               raise Exit
 | 
			
		||||
                             | _ -> raise Exit
 | 
			
		||||
                             end
 | 
			
		||||
                           | _ -> raise Exit
 | 
			
		||||
                           end
 | 
			
		||||
                         | _ -> raise Exit
 | 
			
		||||
                         end
 | 
			
		||||
                      | _ -> raise Exit
 | 
			
		||||
                      end
 | 
			
		||||
                    | _ -> raise Exit
 | 
			
		||||
                    end
 | 
			
		||||
                  | _ -> raise Exit
 | 
			
		||||
                  end
 | 
			
		||||
                | _ -> raise Exit
 | 
			
		||||
                end
 | 
			
		||||
            | _ -> raise Exit  
 | 
			
		||||
            end
 | 
			
		||||
        | _ ->
 | 
			
		||||
          Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date;
 | 
			
		||||
          make_xmp_date_from_components d
 | 
			
		||||
        end
 | 
			
		||||
    | _ ->
 | 
			
		||||
        Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date;
 | 
			
		||||
        make_xmp_date_from_components d
 | 
			
		||||
  with
 | 
			
		||||
    Exit -> make_xmp_date_from_components d
 | 
			
		||||
 | 
			
		||||
let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf =
 | 
			
		||||
  let infodict =
 | 
			
		||||
    match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
 | 
			
		||||
    | Some d -> d
 | 
			
		||||
    | None -> Pdf.Dictionary []
 | 
			
		||||
  in
 | 
			
		||||
    let infodict' = Pdf.add_dict_entry infodict key value in
 | 
			
		||||
      let objnum = Pdf.addobj pdf infodict' in
 | 
			
		||||
        if not xmp_just_set then
 | 
			
		||||
          begin
 | 
			
		||||
            pdf.Pdf.trailerdict <-
 | 
			
		||||
              Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum);
 | 
			
		||||
            pdf.Pdf.minor <-
 | 
			
		||||
              max pdf.Pdf.minor version
 | 
			
		||||
          end;
 | 
			
		||||
        if xmp_also then
 | 
			
		||||
          begin match get_metadata pdf with
 | 
			
		||||
            None -> pdf
 | 
			
		||||
          | Some xmldata ->
 | 
			
		||||
              let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in
 | 
			
		||||
              let changes, value =
 | 
			
		||||
                match key with
 | 
			
		||||
                | "/Producer" -> [(adobe, "Producer")], value
 | 
			
		||||
                | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value
 | 
			
		||||
                | "/Author" -> [(adobe, "Author")], value
 | 
			
		||||
                | "/Title" -> [(adobe, "Title"); (dc, "title")], value
 | 
			
		||||
                | "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value
 | 
			
		||||
                | "/Keywords" -> [(adobe, "Keywords")], value
 | 
			
		||||
                | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value
 | 
			
		||||
                | "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value
 | 
			
		||||
                | "/Trapped" -> [(adobe, "Trapped")], value
 | 
			
		||||
                | _ -> failwith "Unknown call to set_pdf_info"
 | 
			
		||||
              in
 | 
			
		||||
                set_metadata_from_bytes
 | 
			
		||||
                  true
 | 
			
		||||
                  (set_pdf_info_xml_many changes value xmldata pdf)
 | 
			
		||||
                  pdf
 | 
			
		||||
          end
 | 
			
		||||
       else
 | 
			
		||||
         pdf
 | 
			
		||||
 | 
			
		||||
let expand_date = function
 | 
			
		||||
  | "now" ->
 | 
			
		||||
      begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with
 | 
			
		||||
      | Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S" 
 | 
			
		||||
      | _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S"
 | 
			
		||||
      end
 | 
			
		||||
  | x -> x
 | 
			
		||||
 | 
			
		||||
(* Set metadata date *)
 | 
			
		||||
let set_metadata_date pdf date =
 | 
			
		||||
  match get_metadata pdf with
 | 
			
		||||
    None -> pdf
 | 
			
		||||
  | Some xmldata ->
 | 
			
		||||
      let changes= [(xmp, "MetadataDate")] in
 | 
			
		||||
      let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in
 | 
			
		||||
        set_metadata_from_bytes
 | 
			
		||||
          true
 | 
			
		||||
          (set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf)
 | 
			
		||||
          pdf
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* \section{Copy an /ID from one file to another} *)
 | 
			
		||||
let copy_id keepversion copyfrom copyto =
 | 
			
		||||
  match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with
 | 
			
		||||
  | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *)
 | 
			
		||||
  | Some id ->
 | 
			
		||||
      copyto.Pdf.trailerdict <-
 | 
			
		||||
        Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id;
 | 
			
		||||
      copyto.Pdf.minor <-
 | 
			
		||||
        if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1;
 | 
			
		||||
      copyto
 | 
			
		||||
 | 
			
		||||
let replacements pdf =
 | 
			
		||||
  let info = get_info_utf8 pdf in
 | 
			
		||||
    [("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i));
 | 
			
		||||
     ("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i));
 | 
			
		||||
     ("PRODUCER", info "/Producer");
 | 
			
		||||
     ("CREATOR", info "/Creator");
 | 
			
		||||
     ("TITLE", info "/Title");
 | 
			
		||||
     ("SUBJECT", info "/Subject");
 | 
			
		||||
     ("AUTHOR", info "/Author");
 | 
			
		||||
     ("KEYWORDS", info "/Keywords");
 | 
			
		||||
     ("TRAPPED", info "/Trapped");
 | 
			
		||||
     ("METADATADATE", xmp_date (expand_date "now"))]
 | 
			
		||||
 | 
			
		||||
let create_metadata pdf =
 | 
			
		||||
  let xmp = ref xmp_template in
 | 
			
		||||
  iter
 | 
			
		||||
    (fun (s, r) -> xmp := string_replace_all s r !xmp)
 | 
			
		||||
    (replacements pdf);
 | 
			
		||||
  set_metadata_from_bytes false (bytes_of_string !xmp) pdf
 | 
			
		||||
 | 
			
		||||
(* \section{Set viewer preferences} *)
 | 
			
		||||
let set_viewer_preference (key, value, version) pdf =
 | 
			
		||||
  match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
  | Some catalog ->
 | 
			
		||||
      let viewer_preferences =
 | 
			
		||||
        match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
 | 
			
		||||
        | Some d -> d
 | 
			
		||||
        | None -> Pdf.Dictionary []
 | 
			
		||||
      in
 | 
			
		||||
        let viewer_preferences' =
 | 
			
		||||
          Pdf.add_dict_entry viewer_preferences key value
 | 
			
		||||
        in
 | 
			
		||||
          let catalog' =
 | 
			
		||||
            Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences'
 | 
			
		||||
          in
 | 
			
		||||
            let catalognum = Pdf.addobj pdf catalog' in
 | 
			
		||||
              let trailerdict' =
 | 
			
		||||
                Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
 | 
			
		||||
              in
 | 
			
		||||
                {pdf with
 | 
			
		||||
                  Pdf.minor = max pdf.Pdf.minor version;
 | 
			
		||||
                  Pdf.root = catalognum;
 | 
			
		||||
                  Pdf.trailerdict = trailerdict'}
 | 
			
		||||
  | None -> error "bad root"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* \section{Set page layout} *)
 | 
			
		||||
let set_page_layout pdf s =
 | 
			
		||||
  match s with
 | 
			
		||||
  | "SinglePage" | "OneColumn" | "TwoColumnLeft"
 | 
			
		||||
  | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" ->
 | 
			
		||||
      begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
      | Some catalog ->
 | 
			
		||||
          let catalog' =
 | 
			
		||||
            Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s))
 | 
			
		||||
          in
 | 
			
		||||
            let catalognum = Pdf.addobj pdf catalog' in
 | 
			
		||||
              let trailerdict' =
 | 
			
		||||
                Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
 | 
			
		||||
              in
 | 
			
		||||
                {pdf with
 | 
			
		||||
                  Pdf.root = catalognum;
 | 
			
		||||
                  Pdf.trailerdict = trailerdict'}
 | 
			
		||||
      | None -> error "bad root"
 | 
			
		||||
      end
 | 
			
		||||
  | _ -> error "Unknown page layout"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(* \section{Set page mode} *)
 | 
			
		||||
let set_page_mode pdf s =
 | 
			
		||||
  match s with
 | 
			
		||||
  | "UseNone" | "UseOutlines" | "UseThumbs"
 | 
			
		||||
  | "FullScreen" | "UseOC" | "UseAttachments" ->
 | 
			
		||||
      begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
      | Some catalog ->
 | 
			
		||||
          let catalog' =
 | 
			
		||||
            Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s))
 | 
			
		||||
          in
 | 
			
		||||
            let catalognum = Pdf.addobj pdf catalog' in
 | 
			
		||||
              let trailerdict' =
 | 
			
		||||
                Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
 | 
			
		||||
              in
 | 
			
		||||
                {pdf with
 | 
			
		||||
                  Pdf.root = catalognum;
 | 
			
		||||
                  Pdf.trailerdict = trailerdict'}
 | 
			
		||||
      | None -> error "bad root"
 | 
			
		||||
      end
 | 
			
		||||
  | _ -> error "Unknown page mode"
 | 
			
		||||
 | 
			
		||||
(* Set open action *)
 | 
			
		||||
let set_open_action pdf fit pagenumber =
 | 
			
		||||
  if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then
 | 
			
		||||
    raise (error "set_open_action: invalid page number")
 | 
			
		||||
  else
 | 
			
		||||
    let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in
 | 
			
		||||
      let destination =
 | 
			
		||||
        if fit then
 | 
			
		||||
          Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"]
 | 
			
		||||
        else
 | 
			
		||||
          Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null]
 | 
			
		||||
      in
 | 
			
		||||
        let open_action =
 | 
			
		||||
          Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")]
 | 
			
		||||
        in
 | 
			
		||||
          match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
 | 
			
		||||
          | Some catalog ->
 | 
			
		||||
              let catalog' =
 | 
			
		||||
                Pdf.add_dict_entry catalog "/OpenAction" open_action
 | 
			
		||||
              in
 | 
			
		||||
                let catalognum = Pdf.addobj pdf catalog' in
 | 
			
		||||
                  let trailerdict' =
 | 
			
		||||
                    Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
 | 
			
		||||
                  in
 | 
			
		||||
                    {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'}
 | 
			
		||||
          | None -> error "bad root"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
let set_version v pdf =
 | 
			
		||||
  pdf.Pdf.minor <- v
 | 
			
		||||
							
								
								
									
										74
									
								
								cpdfmetadata.mli
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								cpdfmetadata.mli
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,74 @@
 | 
			
		||||
 | 
			
		||||
(** {2 Types and Exceptions} *)
 | 
			
		||||
 | 
			
		||||
(** Possible output encodings for some function. [Raw] does no processing at
 | 
			
		||||
all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8.
 | 
			
		||||
[Stripped] extracts the unicode codepoints and returns only those which
 | 
			
		||||
correspond to 7 bit ASCII. *)
 | 
			
		||||
type encoding = Raw | UTF8 | Stripped
 | 
			
		||||
 | 
			
		||||
val encode_output : encoding -> string -> string
 | 
			
		||||
 | 
			
		||||
(** {2 Metadata and settings} *)
 | 
			
		||||
 | 
			
		||||
(** [copy_id keepversion copyfrom copyto] copies the ID, if any, from
 | 
			
		||||
[copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto]
 | 
			
		||||
won't be affected. *)
 | 
			
		||||
val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
 | 
			
		||||
the PDF minor version to [version].*)
 | 
			
		||||
val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
val get_xmp_info : Pdf.t -> string -> string
 | 
			
		||||
 | 
			
		||||
(** [set_pdf_info (key, value, version)] sets the entry [key] in the
 | 
			
		||||
/ViewerPreferences directory, updating the PDF minor version to [version].*)
 | 
			
		||||
val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
 | 
			
		||||
val set_page_layout : Pdf.t -> string -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
 | 
			
		||||
val set_page_mode : Pdf.t -> string -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *)
 | 
			
		||||
val set_open_action : Pdf.t -> bool -> int -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Set the PDF version number *)
 | 
			
		||||
val set_version : int -> Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** Given a PDF, returns a function which can lookup a given dictionary entry
 | 
			
		||||
from the /Info dictionary, returning it as a UTF8 string *)
 | 
			
		||||
val get_info_utf8 : Pdf.t -> string -> string
 | 
			
		||||
 | 
			
		||||
(** Output to standard output general information about a PDF. *)
 | 
			
		||||
val output_info : encoding -> Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** Output to standard output information from any XMP metadata stream in a PDF. *)
 | 
			
		||||
val output_xmp_info : encoding -> Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** Create XMP metadata from scratch *)
 | 
			
		||||
val create_metadata : Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** {2 XML Metadata} *)
 | 
			
		||||
 | 
			
		||||
(** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *) 
 | 
			
		||||
val set_metadata : bool -> string -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** The same, but the content comes from [bytes]. *)
 | 
			
		||||
val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Remove the metadata from a file *)
 | 
			
		||||
val remove_metadata : Pdf.t -> Pdf.t
 | 
			
		||||
 | 
			
		||||
(** Extract metadata to a [Pdfio.bytes] *)
 | 
			
		||||
val get_metadata : Pdf.t -> Pdfio.bytes option
 | 
			
		||||
 | 
			
		||||
(** Print metadate to stdout *)
 | 
			
		||||
val print_metadata : Pdf.t -> unit
 | 
			
		||||
 | 
			
		||||
(** Set the metadata date *)
 | 
			
		||||
val set_metadata_date : Pdf.t -> string -> Pdf.t
 | 
			
		||||
 | 
			
		||||
val expand_date : string -> string
 | 
			
		||||
		Reference in New Issue
	
	Block a user