mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	more
This commit is contained in:
		
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,7 +1,7 @@ | |||||||
| # Build the cpdf command line tools and top level | # Build the cpdf command line tools and top level | ||||||
| MODS = cpdfyojson cpdfxmlm \ | MODS = cpdfyojson cpdfxmlm \ | ||||||
|        cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \ |        cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \ | ||||||
|        cpdfpagespec cpdfposition cpdf cpdfpresent cpdffont cpdftype \ |        cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdf cpdffont cpdftype \ | ||||||
|        cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfcommand |        cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfcommand | ||||||
|  |  | ||||||
| SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml | SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml | ||||||
|   | |||||||
							
								
								
									
										675
									
								
								cpdf.ml
									
									
									
									
									
								
							
							
						
						
									
										675
									
								
								cpdf.ml
									
									
									
									
									
								
							| @@ -10,67 +10,11 @@ type color = | |||||||
|  |  | ||||||
| let debug = ref false | let debug = ref false | ||||||
|  |  | ||||||
| let xmp_template = |  | ||||||
| {|<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?> |  | ||||||
|  |  | ||||||
| <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' |  | ||||||
|  xmlns:iX='http://ns.adobe.com/iX/1.0/'> |  | ||||||
|  |  | ||||||
|  <rdf:Description about='' |  | ||||||
|   xmlns='http://ns.adobe.com/pdf/1.3/' |  | ||||||
|   xmlns:pdf='http://ns.adobe.com/pdf/1.3/'> |  | ||||||
|   <pdf:CreationDate>CREATEDATE</pdf:CreationDate> |  | ||||||
|   <pdf:ModDate>MODDATE</pdf:ModDate> |  | ||||||
|   <pdf:Producer>PRODUCER</pdf:Producer> |  | ||||||
|   <pdf:Creator>CREATOR</pdf:Creator> |  | ||||||
|   <pdf:Title>TITLE</pdf:Title> |  | ||||||
|   <pdf:Subject>SUBJECT</pdf:Subject> |  | ||||||
|   <pdf:Author>AUTHOR</pdf:Author> |  | ||||||
|   <pdf:Keywords>KEYWORDS</pdf:Keywords> |  | ||||||
|   <pdf:Trapped>TRAPPED</pdf:Trapped> |  | ||||||
|  </rdf:Description> |  | ||||||
|  |  | ||||||
|  <rdf:Description about='' |  | ||||||
|   xmlns='http://ns.adobe.com/xap/1.0/' |  | ||||||
|   xmlns:xap='http://ns.adobe.com/xap/1.0/'> |  | ||||||
|    <xap:CreateDate>CREATEDATE</xap:CreateDate> |  | ||||||
|    <xap:CreatorTool>CREATOR</xap:CreatorTool> |  | ||||||
|    <xap:ModifyDate>MODDATE</xap:ModifyDate> |  | ||||||
|    <xap:MetadataDate>METADATADATE</xap:MetadataDate> |  | ||||||
|  </rdf:Description> |  | ||||||
|  |  | ||||||
|  <rdf:Description about='' |  | ||||||
|   xmlns='http://purl.org/dc/elements/1.1/' |  | ||||||
|   xmlns:dc='http://purl.org/dc/elements/1.1/'> |  | ||||||
|    <dc:title>TITLE</dc:title> |  | ||||||
|  </rdf:Description> |  | ||||||
|  |  | ||||||
| </rdf:RDF> |  | ||||||
|  |  | ||||||
| <?xpacket end='r'?>|} |  | ||||||
|  |  | ||||||
| type encoding = |  | ||||||
|   | Raw |  | ||||||
|   | UTF8 |  | ||||||
|   | Stripped |  | ||||||
|  |  | ||||||
| (* Just strip everything which isn't 7 bit ASCII *) |  | ||||||
| let crude_de_unicode s = |  | ||||||
|   implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s))) |  | ||||||
|  |  | ||||||
| let encode_output enc s = |  | ||||||
|   match enc with |  | ||||||
|   | Raw -> s |  | ||||||
|   | UTF8 -> Pdftext.utf8_of_pdfdocstring s |  | ||||||
|   | Stripped -> crude_de_unicode s |  | ||||||
|  |  | ||||||
| (* Get the number of pages in file. Doesn't need decryption. *) | (* Get the number of pages in file. Doesn't need decryption. *) | ||||||
| let endpage_io ?revision i user_pw owner_pw = | let endpage_io ?revision i user_pw owner_pw = | ||||||
|   let pdf = Pdfread.pdf_of_input_lazy ?revision user_pw owner_pw i in |   let pdf = Pdfread.pdf_of_input_lazy ?revision user_pw owner_pw i in | ||||||
|     Pdfpage.endpage pdf |     Pdfpage.endpage pdf | ||||||
|  |  | ||||||
|      |  | ||||||
|  |  | ||||||
| let print_pdf_objs pdf = | let print_pdf_objs pdf = | ||||||
|   Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict); |   Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict); | ||||||
|   Printf.printf "Root: %i\n" pdf.Pdf.root; |   Printf.printf "Root: %i\n" pdf.Pdf.root; | ||||||
| @@ -103,14 +47,6 @@ let rec process_text time text m = | |||||||
|   | [] -> Cpdfstrftime.strftime ~time text |   | [] -> Cpdfstrftime.strftime ~time text | ||||||
|   | (s, r)::t -> process_text time (string_replace_all_lazy s r text) t |   | (s, r)::t -> process_text time (string_replace_all_lazy s r text) t | ||||||
|  |  | ||||||
| let expand_date = function |  | ||||||
|   | "now" -> |  | ||||||
|       begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with |  | ||||||
|       | Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S"  |  | ||||||
|       | _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S" |  | ||||||
|       end |  | ||||||
|   | x -> x |  | ||||||
|  |  | ||||||
| (* For uses of process_pages which don't need to deal with matrices, this | (* For uses of process_pages which don't need to deal with matrices, this | ||||||
|    function transforms into one which returns the identity matrix *) |    function transforms into one which returns the identity matrix *) | ||||||
| let ppstub f n p = (f n p, n, Pdftransform.i_matrix) | let ppstub f n p = (f n p, n, Pdftransform.i_matrix) | ||||||
| @@ -205,17 +141,6 @@ let combine_pdf_resources pdf a b = | |||||||
|             (Pdf.Dictionary []) |             (Pdf.Dictionary []) | ||||||
|             (unknown_keys_a @ unknown_keys_b @ combined_known_entries) |             (unknown_keys_a @ unknown_keys_b @ combined_known_entries) | ||||||
|  |  | ||||||
| (* \section{Copy an /ID from one file to another} *) |  | ||||||
| let copy_id keepversion copyfrom copyto = |  | ||||||
|   match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with |  | ||||||
|   | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *) |  | ||||||
|   | Some id -> |  | ||||||
|       copyto.Pdf.trailerdict <- |  | ||||||
|         Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id; |  | ||||||
|       copyto.Pdf.minor <- |  | ||||||
|         if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1; |  | ||||||
|       copyto |  | ||||||
|  |  | ||||||
| (* \section{Remove bookmarks} *) | (* \section{Remove bookmarks} *) | ||||||
|  |  | ||||||
| (* \section{Add bookmarks} *) | (* \section{Add bookmarks} *) | ||||||
| @@ -377,155 +302,6 @@ let add_bookmarks ~json verify input pdf = | |||||||
|     (*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*) |     (*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*) | ||||||
|     Pdfmarks.add_bookmarks parsed pdf  |     Pdfmarks.add_bookmarks parsed pdf  | ||||||
|  |  | ||||||
| (* \section{Set page mode} *) |  | ||||||
| let set_page_mode pdf s = |  | ||||||
|   match s with |  | ||||||
|   | "UseNone" | "UseOutlines" | "UseThumbs" |  | ||||||
|   | "FullScreen" | "UseOC" | "UseAttachments" -> |  | ||||||
|       begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with |  | ||||||
|       | Some catalog -> |  | ||||||
|           let catalog' = |  | ||||||
|             Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s)) |  | ||||||
|           in |  | ||||||
|             let catalognum = Pdf.addobj pdf catalog' in |  | ||||||
|               let trailerdict' = |  | ||||||
|                 Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) |  | ||||||
|               in |  | ||||||
|                 {pdf with |  | ||||||
|                   Pdf.root = catalognum; |  | ||||||
|                   Pdf.trailerdict = trailerdict'} |  | ||||||
|       | None -> error "bad root" |  | ||||||
|       end |  | ||||||
|   | _ -> error "Unknown page mode" |  | ||||||
|  |  | ||||||
| (* Set open action *) |  | ||||||
| let set_open_action pdf fit pagenumber = |  | ||||||
|   if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then |  | ||||||
|     raise (error "set_open_action: invalid page number") |  | ||||||
|   else |  | ||||||
|     let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in |  | ||||||
|       let destination = |  | ||||||
|         if fit then |  | ||||||
|           Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"] |  | ||||||
|         else |  | ||||||
|           Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null] |  | ||||||
|       in |  | ||||||
|         let open_action = |  | ||||||
|           Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")] |  | ||||||
|         in |  | ||||||
|           match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with |  | ||||||
|           | Some catalog -> |  | ||||||
|               let catalog' = |  | ||||||
|                 Pdf.add_dict_entry catalog "/OpenAction" open_action |  | ||||||
|               in |  | ||||||
|                 let catalognum = Pdf.addobj pdf catalog' in |  | ||||||
|                   let trailerdict' = |  | ||||||
|                     Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) |  | ||||||
|                   in |  | ||||||
|                     {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} |  | ||||||
|           | None -> error "bad root" |  | ||||||
|  |  | ||||||
| (* \section{Set viewer preferences} *) |  | ||||||
| let set_viewer_preference (key, value, version) pdf = |  | ||||||
|   match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with |  | ||||||
|   | Some catalog -> |  | ||||||
|       let viewer_preferences = |  | ||||||
|         match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with |  | ||||||
|         | Some d -> d |  | ||||||
|         | None -> Pdf.Dictionary [] |  | ||||||
|       in |  | ||||||
|         let viewer_preferences' = |  | ||||||
|           Pdf.add_dict_entry viewer_preferences key value |  | ||||||
|         in |  | ||||||
|           let catalog' = |  | ||||||
|             Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences' |  | ||||||
|           in |  | ||||||
|             let catalognum = Pdf.addobj pdf catalog' in |  | ||||||
|               let trailerdict' = |  | ||||||
|                 Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) |  | ||||||
|               in |  | ||||||
|                 {pdf with |  | ||||||
|                   Pdf.minor = max pdf.Pdf.minor version; |  | ||||||
|                   Pdf.root = catalognum; |  | ||||||
|                   Pdf.trailerdict = trailerdict'} |  | ||||||
|   | None -> error "bad root" |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| (* \section{Set page layout} *) |  | ||||||
| let set_page_layout pdf s = |  | ||||||
|   match s with |  | ||||||
|   | "SinglePage" | "OneColumn" | "TwoColumnLeft" |  | ||||||
|   | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" -> |  | ||||||
|       begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with |  | ||||||
|       | Some catalog -> |  | ||||||
|           let catalog' = |  | ||||||
|             Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s)) |  | ||||||
|           in |  | ||||||
|             let catalognum = Pdf.addobj pdf catalog' in |  | ||||||
|               let trailerdict' = |  | ||||||
|                 Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) |  | ||||||
|               in |  | ||||||
|                 {pdf with |  | ||||||
|                   Pdf.root = catalognum; |  | ||||||
|                   Pdf.trailerdict = trailerdict'} |  | ||||||
|       | None -> error "bad root" |  | ||||||
|       end |  | ||||||
|   | _ -> error "Unknown page layout" |  | ||||||
|  |  | ||||||
| (* \section{Set or replace metadata} *) |  | ||||||
| let set_metadata_from_bytes keepversion data pdf = |  | ||||||
|   let metadata_stream = |  | ||||||
|     Pdf.Stream |  | ||||||
|       {contents = |  | ||||||
|         (Pdf.Dictionary |  | ||||||
|          ["/Length", Pdf.Integer (bytes_size data); |  | ||||||
|           "/Type", Pdf.Name "/Metadata"; |  | ||||||
|           "/Subtype", Pdf.Name "/XML"], |  | ||||||
|          Pdf.Got data)} |  | ||||||
|   in |  | ||||||
|     let objnum = Pdf.addobj pdf metadata_stream in  |  | ||||||
|       let document_catalog = |  | ||||||
|         match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with |  | ||||||
|         | Some s -> s |  | ||||||
|         | None -> error "Malformed PDF: No root." |  | ||||||
|       in |  | ||||||
|         let document_catalog' = |  | ||||||
|           Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum) |  | ||||||
|         in |  | ||||||
|           let rootnum = Pdf.addobj pdf document_catalog' in |  | ||||||
|             let trailerdict = |  | ||||||
|               Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum) |  | ||||||
|             in |  | ||||||
|             {pdf with |  | ||||||
|                Pdf.trailerdict = trailerdict; |  | ||||||
|                Pdf.root = rootnum; |  | ||||||
|                Pdf.minor = |  | ||||||
|                  if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor} |  | ||||||
|  |  | ||||||
| let set_metadata keepversion filename pdf = |  | ||||||
|   let ch = open_in_bin filename in |  | ||||||
|     let data = mkbytes (in_channel_length ch) in |  | ||||||
|       for x = 0 to bytes_size data - 1 do |  | ||||||
|         bset data x (input_byte ch) |  | ||||||
|       done; |  | ||||||
|       set_metadata_from_bytes keepversion data pdf |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| (* \section{Remove metadata} *) |  | ||||||
| let remove_metadata pdf = |  | ||||||
|   match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with |  | ||||||
|   | None -> error "malformed file"  |  | ||||||
|   | Some root -> |  | ||||||
|       let root' = Pdf.remove_dict_entry root "/Metadata" in |  | ||||||
|         let rootnum = Pdf.addobj pdf root' in |  | ||||||
|           {pdf with |  | ||||||
|              Pdf.trailerdict = |  | ||||||
|                Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum); |  | ||||||
|              Pdf.root = |  | ||||||
|                rootnum} |  | ||||||
|  |  | ||||||
| (* List bookmarks *) | (* List bookmarks *) | ||||||
| let output_string_of_target pdf fastrefnums x = | let output_string_of_target pdf fastrefnums x = | ||||||
|   match Pdfdest.pdfobject_of_destination x with |   match Pdfdest.pdfobject_of_destination x with | ||||||
| @@ -584,9 +360,9 @@ let list_bookmarks ~json encoding range pdf output = | |||||||
|             replace q bs q (replace nl bs n (replace bs bs bs codepoints)) |             replace q bs q (replace nl bs n (replace bs bs bs codepoints)) | ||||||
|         in |         in | ||||||
|           match encoding with |           match encoding with | ||||||
|           | UTF8 -> Pdftext.utf8_of_codepoints escaped |           | Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped | ||||||
|           | Stripped -> process_stripped escaped |           | Cpdfmetadata.Stripped -> process_stripped escaped | ||||||
|           | Raw -> s |           | Cpdfmetadata.Raw -> s | ||||||
|     in |     in | ||||||
|       let bookmarks = Pdfmarks.read_bookmarks pdf in |       let bookmarks = Pdfmarks.read_bookmarks pdf in | ||||||
|       let refnums = Pdf.page_reference_numbers pdf in |       let refnums = Pdf.page_reference_numbers pdf in | ||||||
| @@ -719,27 +495,6 @@ let hasbox pdf page boxname = | |||||||
|         | _ -> false |         | _ -> false | ||||||
|  |  | ||||||
|  |  | ||||||
| (* Print metadata *) |  | ||||||
| let get_metadata pdf = |  | ||||||
|   match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with |  | ||||||
|   | None -> error "malformed file" |  | ||||||
|   | Some root -> |  | ||||||
|       match Pdf.lookup_direct pdf "/Metadata" root with |  | ||||||
|       | Some ((Pdf.Stream _) as s) -> |  | ||||||
|           Pdfcodec.decode_pdfstream pdf s; |  | ||||||
|           begin match s with |  | ||||||
|           | Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data  |  | ||||||
|           | _ -> assert false |  | ||||||
|           end |  | ||||||
|       | _ -> None |  | ||||||
|  |  | ||||||
| let print_metadata pdf = |  | ||||||
|   match get_metadata pdf with |  | ||||||
|     None -> () |  | ||||||
|   | Some data -> |  | ||||||
|       for x = 0 to bytes_size data - 1 do |  | ||||||
|         Printf.printf "%c" (char_of_int (bget data x)) |  | ||||||
|       done |  | ||||||
|  |  | ||||||
| (* List fonts *) | (* List fonts *) | ||||||
| let list_font pdf page (name, dict) = | let list_font pdf page (name, dict) = | ||||||
| @@ -1728,7 +1483,7 @@ let stamp relative_to_cropbox position topline midline fast scale_to_fit isover | |||||||
|         let merged = |         let merged = | ||||||
|           {merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption} |           {merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption} | ||||||
|         in |         in | ||||||
|           let merged = copy_id true pdf merged in |           let merged = Cpdfmetadata.copy_id true pdf merged in | ||||||
|             let merged_pages = Pdfpage.pages_of_pagetree merged in |             let merged_pages = Pdfpage.pages_of_pagetree merged in | ||||||
|               let under_pages, over_page = |               let under_pages, over_page = | ||||||
|                 all_but_last merged_pages, last merged_pages |                 all_but_last merged_pages, last merged_pages | ||||||
| @@ -1819,7 +1574,7 @@ let stamp_as_xobject pdf range over = | |||||||
|         let merged = |         let merged = | ||||||
|           {merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption} |           {merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption} | ||||||
|         in |         in | ||||||
|           let merged = copy_id true pdf merged in |           let merged = Cpdfmetadata.copy_id true pdf merged in | ||||||
|             let merged_pages = Pdfpage.pages_of_pagetree merged in |             let merged_pages = Pdfpage.pages_of_pagetree merged in | ||||||
|               let under_pages, over_page = |               let under_pages, over_page = | ||||||
|                 all_but_last merged_pages, last merged_pages |                 all_but_last merged_pages, last merged_pages | ||||||
| @@ -2185,7 +1940,7 @@ let scale_contents ?(fast=false) position scale pdf range = | |||||||
| (* \section{List annotations} *) | (* \section{List annotations} *) | ||||||
| let get_annotation_string encoding pdf annot = | let get_annotation_string encoding pdf annot = | ||||||
|   match Pdf.lookup_direct pdf "/Contents" annot with |   match Pdf.lookup_direct pdf "/Contents" annot with | ||||||
|   | Some (Pdf.String s) -> encode_output encoding s |   | Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s | ||||||
|   | _ -> "" |   | _ -> "" | ||||||
|  |  | ||||||
| let print_annotation encoding pdf num s = | let print_annotation encoding pdf num s = | ||||||
| @@ -2696,415 +2451,6 @@ let twoup fast pdf = | |||||||
|         let pdf = upright all (rotate_pdf ~-90 pdf all) in |         let pdf = upright all (rotate_pdf ~-90 pdf all) in | ||||||
|           scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all |           scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all | ||||||
|  |  | ||||||
| (* \section{Output info} *) |  | ||||||
| let get_info raw pdf = |  | ||||||
|   let infodict = |  | ||||||
|     match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with |  | ||||||
|     | Some infodict -> infodict |  | ||||||
|     | _ -> Pdf.Dictionary [] |  | ||||||
|   in |  | ||||||
|     let getstring name = |  | ||||||
|       match Pdf.lookup_direct pdf name infodict with |  | ||||||
|       | Some (Pdf.String s) -> |  | ||||||
|           if raw then s else crude_de_unicode s |  | ||||||
|       | Some (Pdf.Boolean false) -> "False" |  | ||||||
|       | Some (Pdf.Boolean true) -> "True" |  | ||||||
|       | _ -> if name = "/Trapped" then "False" else "" |  | ||||||
|     in |  | ||||||
|       getstring |  | ||||||
|         |  | ||||||
| let get_info_utf8 pdf = |  | ||||||
|   let infodict = |  | ||||||
|     match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with |  | ||||||
|     | Some infodict -> infodict |  | ||||||
|     | _ -> Pdf.Dictionary [] |  | ||||||
|   in |  | ||||||
|     (function name -> |  | ||||||
|       match Pdf.lookup_direct pdf name infodict with |  | ||||||
|       | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s |  | ||||||
|       | Some (Pdf.Boolean false) -> "False" |  | ||||||
|       | Some (Pdf.Boolean true) -> "True" |  | ||||||
|       | _ -> if name = "/Trapped" then "False" else "") |  | ||||||
|  |  | ||||||
| let getstring encoding pdf = |  | ||||||
|   match encoding with |  | ||||||
|   | Raw -> get_info true pdf |  | ||||||
|   | Stripped -> get_info false pdf |  | ||||||
|   | UTF8 -> get_info_utf8 pdf |  | ||||||
|  |  | ||||||
| let output_info encoding pdf = |  | ||||||
|   let getstring = getstring encoding pdf in |  | ||||||
|     Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor; |  | ||||||
|     Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf); |  | ||||||
|     Printf.printf "Title: %s\n" (getstring "/Title"); |  | ||||||
|     Printf.printf "Author: %s\n" (getstring "/Author"); |  | ||||||
|     Printf.printf "Subject: %s\n" (getstring "/Subject"); |  | ||||||
|     Printf.printf "Keywords: %s\n" (getstring "/Keywords"); |  | ||||||
|     Printf.printf "Creator: %s\n" (getstring "/Creator"); |  | ||||||
|     Printf.printf "Producer: %s\n" (getstring "/Producer"); |  | ||||||
|     Printf.printf "Created: %s\n" (getstring "/CreationDate"); |  | ||||||
|     Printf.printf "Modified: %s\n" (getstring "/ModDate"); |  | ||||||
|     Printf.printf "Trapped: %s\n" (getstring "/Trapped") |  | ||||||
|  |  | ||||||
| type xmltree = |  | ||||||
|     E of Cpdfxmlm.tag * xmltree list |  | ||||||
|   | D of string |  | ||||||
|  |  | ||||||
| let xmltree_of_bytes b = |  | ||||||
|   let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in |  | ||||||
|     let el tag childs = E (tag, childs) |  | ||||||
|     and data d = D d in |  | ||||||
|       Cpdfxmlm.input_doc_tree ~el ~data i |  | ||||||
|  |  | ||||||
| let bytes_of_xmltree t = |  | ||||||
|   let buf = Buffer.create 1024 in |  | ||||||
|   let o = Cpdfxmlm.make_output (`Buffer buf) in |  | ||||||
|   let frag = function |  | ||||||
|       E (tag, childs) -> `El (tag, childs) |  | ||||||
|     | D d -> `Data d |  | ||||||
|   in |  | ||||||
|     Cpdfxmlm.output_doc_tree frag o t; |  | ||||||
|     bytes_of_string (Buffer.contents buf) |  | ||||||
|  |  | ||||||
| let rec string_of_xmltree = function |  | ||||||
|    D d -> |  | ||||||
|      Printf.sprintf "DATA {%s}" d |  | ||||||
|  | E (tag, trees) -> |  | ||||||
|      Printf.sprintf "ELT (%s, %s)" |  | ||||||
|        (string_of_tag tag) |  | ||||||
|        (string_of_xmltrees trees) |  | ||||||
|  |  | ||||||
| and string_of_tag ((n, n'), attributes) = |  | ||||||
|   Printf.sprintf |  | ||||||
|     "NAME |%s| |%s|, ATTRIBUTES {%s}" n n' |  | ||||||
|     (string_of_attributes attributes) |  | ||||||
|  |  | ||||||
| and string_of_attribute ((n, n'), str) = |  | ||||||
|   Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str |  | ||||||
|  |  | ||||||
| and string_of_attributes attrs = |  | ||||||
|   fold_left |  | ||||||
|     (fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs) |  | ||||||
|  |  | ||||||
| and string_of_xmltrees trees = |  | ||||||
|   fold_left |  | ||||||
|     (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees) |  | ||||||
|  |  | ||||||
| let adobe = "http://ns.adobe.com/pdf/1.3/" |  | ||||||
|  |  | ||||||
| let xmp = "http://ns.adobe.com/xap/1.0/" |  | ||||||
|  |  | ||||||
| let dc = "http://purl.org/dc/elements/1.1/" |  | ||||||
|  |  | ||||||
| let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" |  | ||||||
|  |  | ||||||
| let combine_with_spaces strs = |  | ||||||
|   String.trim |  | ||||||
|     (fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs) |  | ||||||
|  |  | ||||||
| (* Collect all <li> elements inside a seq, bag, or alt. Combine with commas. If |  | ||||||
| none found, return empty string instead. *) |  | ||||||
| let collect_list_items = function |  | ||||||
|    E (((n, n'), _), elts) when |  | ||||||
|      n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag") |  | ||||||
|    -> |  | ||||||
|      combine_with_spaces |  | ||||||
|        (option_map |  | ||||||
|          (function |  | ||||||
|              E (((n, n'), _), [D d]) when n = rdf && n' = "li" -> |  | ||||||
|                Some d |  | ||||||
|            | _ -> None) |  | ||||||
|          elts) |  | ||||||
|  | _ -> "" |  | ||||||
|  |  | ||||||
| let collect_list_items_all all = |  | ||||||
|   match keep (function E _ -> true | _ -> false) all with |  | ||||||
|     h::_ -> Some (collect_list_items h) |  | ||||||
|   | [] -> None |  | ||||||
|  |  | ||||||
| let rec get_data_for namespace name = function |  | ||||||
|    D _ -> None |  | ||||||
|  | E (((n, n'), _), [D d]) when n = namespace && n' = name -> |  | ||||||
|      Some d |  | ||||||
|  | E (((n, n'), _), e) when n = namespace && n' = name -> |  | ||||||
|      collect_list_items_all e |  | ||||||
|  | E (_, l) -> |  | ||||||
|      match option_map (get_data_for namespace name) l with |  | ||||||
|        x :: _ -> Some x |  | ||||||
|      | _ -> None |  | ||||||
|  |  | ||||||
| let output_xmp_info encoding pdf = |  | ||||||
|   let print_out tree title namespace name = |  | ||||||
|     match get_data_for namespace name tree with |  | ||||||
|       None -> () |  | ||||||
|     | Some data -> |  | ||||||
|         Printf.printf "%s: " title; |  | ||||||
|         print_endline data |  | ||||||
|   in |  | ||||||
|     match get_metadata pdf with |  | ||||||
|       None -> () |  | ||||||
|     | Some metadata -> |  | ||||||
|         try |  | ||||||
|           let dtd, tree = xmltree_of_bytes metadata in |  | ||||||
|             print_out tree "XMP pdf:Keywords" adobe "Keywords"; |  | ||||||
|             print_out tree "XMP pdf:Producer" adobe "Producer"; |  | ||||||
|             print_out tree "XMP pdf:Trapped" adobe "Trapped"; |  | ||||||
|             print_out tree "XMP pdf:Title" adobe "Title"; |  | ||||||
|             print_out tree "XMP pdf:Creator" adobe "Creator"; |  | ||||||
|             print_out tree "XMP pdf:Subject" adobe "Subject"; |  | ||||||
|             print_out tree "XMP pdf:Author" adobe "Author"; |  | ||||||
|             print_out tree "XMP pdf:CreationDate" adobe "CreationDate"; |  | ||||||
|             print_out tree "XMP pdf:ModDate" adobe "ModDate"; |  | ||||||
|             print_out tree "XMP xmp:CreateDate" xmp "CreateDate"; |  | ||||||
|             print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool"; |  | ||||||
|             print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate"; |  | ||||||
|             print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate"; |  | ||||||
|             print_out tree "XMP dc:title" dc "title"; |  | ||||||
|             print_out tree "XMP dc:creator" dc "creator"; |  | ||||||
|             print_out tree "XMP dc:subject" dc "subject"; |  | ||||||
|             print_out tree "XMP dc:description" dc "description" |  | ||||||
|         with |  | ||||||
|           _ -> () |  | ||||||
|  |  | ||||||
| (* Get XMP info equivalent of an old metadata field *) |  | ||||||
| let check = function |  | ||||||
|   "/Title" -> [(adobe, "Title"); (dc, "title")] |  | ||||||
| | "/Author" -> [(adobe, "Author"); (dc, "creator")] |  | ||||||
| | "/Subject" -> [(adobe, "Subject"); (dc, "subject")] |  | ||||||
| | "/Keywords" -> [(adobe, "Keywords")] |  | ||||||
| | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")] |  | ||||||
| | "/Producer" -> [(adobe, "Producer")] |  | ||||||
| | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")] |  | ||||||
| | "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")] |  | ||||||
| | _ -> failwith "Cpdf.check_name not known" |  | ||||||
|  |  | ||||||
| let get_xmp_info pdf name = |  | ||||||
|   let tocheck = check name in |  | ||||||
|   match get_metadata pdf with |  | ||||||
|     None -> "" |  | ||||||
|   | Some metadata -> |  | ||||||
|       try |  | ||||||
|         let _, tree = xmltree_of_bytes metadata in |  | ||||||
|           let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in |  | ||||||
|             match lose (eq "") results with |  | ||||||
|              x::_ -> x |  | ||||||
|            | [] -> "" |  | ||||||
|       with |  | ||||||
|         _ -> "" |  | ||||||
|  |  | ||||||
| (* Set XMP info *) |  | ||||||
| let rec set_xml_field kind fieldname value = function |  | ||||||
|   D data -> D data |  | ||||||
| | E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e <tag/> *) |  | ||||||
|     E (((n, n'), m), [D value]) |  | ||||||
| | E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts) |  | ||||||
|  |  | ||||||
| let set_pdf_info_xml kind fieldname value xmldata pdf = |  | ||||||
|   let dtd, tree = xmltree_of_bytes xmldata in |  | ||||||
|   let str = |  | ||||||
|     match value with |  | ||||||
|       Pdf.String s -> s |  | ||||||
|     | Pdf.Boolean true -> "True" |  | ||||||
|     | Pdf.Boolean false -> "False" |  | ||||||
|     | _ -> failwith "set_pdf_info_xml: not a string" |  | ||||||
|   in |  | ||||||
|   let newtree = set_xml_field kind fieldname str tree in |  | ||||||
|     bytes_of_xmltree (dtd, newtree) |  | ||||||
|  |  | ||||||
| let set_pdf_info_xml_many changes value xmldata pdf = |  | ||||||
|   let xmldata = ref xmldata in |  | ||||||
|     iter |  | ||||||
|       (fun (kind, fieldname) -> |  | ||||||
|          xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf) |  | ||||||
|       changes; |  | ||||||
|     !xmldata |  | ||||||
|  |  | ||||||
|  |  | ||||||
| (* \section{Set an entry in the /Info dictionary} *) |  | ||||||
|  |  | ||||||
| (* We must parse the date to get its components, then use strftime to build the |  | ||||||
|  * new string in XMP format *) |  | ||||||
|  |  | ||||||
| type date = |  | ||||||
|   {mutable year : int; |  | ||||||
|    mutable month : int; (* 1 - 12 *) |  | ||||||
|    mutable day : int; (* 1 - 31 *) |  | ||||||
|    mutable hour : int; (* 0 - 23 *) |  | ||||||
|    mutable minute : int; (* 0 - 59 *) |  | ||||||
|    mutable second : int; (* 0 - 59 *) |  | ||||||
|    mutable ut_relationship : int; (* -1, 0, +1 *) |  | ||||||
|    mutable offset_hours : int; (* 0 - 59 *) |  | ||||||
|    mutable offset_minutes : int (* 0 - 59 *)} |  | ||||||
|  |  | ||||||
| let default_date () = |  | ||||||
|   {year = 0; |  | ||||||
|    month = 1; |  | ||||||
|    day = 1; |  | ||||||
|    hour = 0; |  | ||||||
|    minute = 0; |  | ||||||
|    second = 0; |  | ||||||
|    ut_relationship = 0; |  | ||||||
|    offset_hours = 0; |  | ||||||
|    offset_minutes = 0} |  | ||||||
|  |  | ||||||
| (* XMP date format is YYYY-MM-DDThh:mm:ssTZD *) |  | ||||||
| let make_xmp_date_from_components d = |  | ||||||
|   let tzd = |  | ||||||
|     if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else |  | ||||||
|     (if d.ut_relationship >=0 then "+" else "-") ^ |  | ||||||
|     Printf.sprintf "%02i" d.offset_hours ^ |  | ||||||
|     ":" ^ |  | ||||||
|     Printf.sprintf "%02i" d.offset_minutes |  | ||||||
|   in  |  | ||||||
|     Cpdfstrftime.strftime |  | ||||||
|       ~time:{Cpdfstrftime._tm_sec = d.second; |  | ||||||
|              Cpdfstrftime._tm_min = d.minute; |  | ||||||
|              Cpdfstrftime._tm_hour = d.hour; |  | ||||||
|              Cpdfstrftime._tm_mday = d.day; |  | ||||||
|              Cpdfstrftime._tm_mon = d.month - 1; |  | ||||||
|              Cpdfstrftime._tm_year = d.year - 1900; |  | ||||||
|              Cpdfstrftime._tm_wday = 0; |  | ||||||
|              Cpdfstrftime._tm_yday = 0; |  | ||||||
|              Cpdfstrftime._tm_isdst = false} |  | ||||||
|       "%Y-%m-%dT%H:%M:%S" |  | ||||||
|   ^ |  | ||||||
|     tzd |  | ||||||
|  |  | ||||||
| let xmp_date date = |  | ||||||
|   let d = default_date () in |  | ||||||
|   try |  | ||||||
|     match explode date with |  | ||||||
|       'D'::':'::r -> |  | ||||||
|         begin match r with |  | ||||||
|           y1::y2::y3::y4::r -> |  | ||||||
|             d.year <- int_of_string (implode [y1; y2; y3; y4]); |  | ||||||
|             begin match r with |  | ||||||
|               m1::m2::r -> |  | ||||||
|                 d.month <- int_of_string (implode [m1; m2]); |  | ||||||
|                 begin match r with |  | ||||||
|                   d1::d2::r -> |  | ||||||
|                   d.day <- int_of_string (implode [d1; d2]); |  | ||||||
|                   begin match r with |  | ||||||
|                     h1::h2::r -> |  | ||||||
|                     d.hour <- int_of_string (implode [h1; h2]); |  | ||||||
|                     begin match r with |  | ||||||
|                       m1::m2::r -> |  | ||||||
|                       d.minute <- int_of_string (implode [m1; m2]); |  | ||||||
|                       begin match r with |  | ||||||
|                        s1::s2::r -> |  | ||||||
|                        d.second <- int_of_string (implode [s1; s2]); |  | ||||||
|                          begin match r with |  | ||||||
|                            o::r -> |  | ||||||
|                            d.ut_relationship <- |  | ||||||
|                              if o = '+' then 1 else |  | ||||||
|                              if o = '-' then -1 else |  | ||||||
|                              0; |  | ||||||
|                            begin match r with |  | ||||||
|                              h1::h2::'\''::r -> |  | ||||||
|                              d.offset_hours <- int_of_string (implode [h1; h2]); |  | ||||||
|                              begin match r with |  | ||||||
|                                m1::m2::_ -> |  | ||||||
|                                d.offset_minutes <- int_of_string (implode [m1; m2]); |  | ||||||
|                                raise Exit |  | ||||||
|                              | _ -> raise Exit |  | ||||||
|                              end |  | ||||||
|                            | _ -> raise Exit |  | ||||||
|                            end |  | ||||||
|                          | _ -> raise Exit |  | ||||||
|                          end |  | ||||||
|                       | _ -> raise Exit |  | ||||||
|                       end |  | ||||||
|                     | _ -> raise Exit |  | ||||||
|                     end |  | ||||||
|                   | _ -> raise Exit |  | ||||||
|                   end |  | ||||||
|                 | _ -> raise Exit |  | ||||||
|                 end |  | ||||||
|             | _ -> raise Exit   |  | ||||||
|             end |  | ||||||
|         | _ -> |  | ||||||
|           Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date; |  | ||||||
|           make_xmp_date_from_components d |  | ||||||
|         end |  | ||||||
|     | _ -> |  | ||||||
|         Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date; |  | ||||||
|         make_xmp_date_from_components d |  | ||||||
|   with |  | ||||||
|     Exit -> make_xmp_date_from_components d |  | ||||||
|  |  | ||||||
| let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf = |  | ||||||
|   let infodict = |  | ||||||
|     match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with |  | ||||||
|     | Some d -> d |  | ||||||
|     | None -> Pdf.Dictionary [] |  | ||||||
|   in |  | ||||||
|     let infodict' = Pdf.add_dict_entry infodict key value in |  | ||||||
|       let objnum = Pdf.addobj pdf infodict' in |  | ||||||
|         if not xmp_just_set then |  | ||||||
|           begin |  | ||||||
|             pdf.Pdf.trailerdict <- |  | ||||||
|               Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum); |  | ||||||
|             pdf.Pdf.minor <- |  | ||||||
|               max pdf.Pdf.minor version |  | ||||||
|           end; |  | ||||||
|         if xmp_also then |  | ||||||
|           begin match get_metadata pdf with |  | ||||||
|             None -> pdf |  | ||||||
|           | Some xmldata -> |  | ||||||
|               let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in |  | ||||||
|               let changes, value = |  | ||||||
|                 match key with |  | ||||||
|                 | "/Producer" -> [(adobe, "Producer")], value |  | ||||||
|                 | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value |  | ||||||
|                 | "/Author" -> [(adobe, "Author")], value |  | ||||||
|                 | "/Title" -> [(adobe, "Title"); (dc, "title")], value |  | ||||||
|                 | "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value |  | ||||||
|                 | "/Keywords" -> [(adobe, "Keywords")], value |  | ||||||
|                 | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value |  | ||||||
|                 | "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value |  | ||||||
|                 | "/Trapped" -> [(adobe, "Trapped")], value |  | ||||||
|                 | _ -> failwith "Unknown call to set_pdf_info" |  | ||||||
|               in |  | ||||||
|                 set_metadata_from_bytes |  | ||||||
|                   true |  | ||||||
|                   (set_pdf_info_xml_many changes value xmldata pdf) |  | ||||||
|                   pdf |  | ||||||
|           end |  | ||||||
|        else |  | ||||||
|          pdf |  | ||||||
|  |  | ||||||
| (* Set metadata date *) |  | ||||||
| let set_metadata_date pdf date = |  | ||||||
|   match get_metadata pdf with |  | ||||||
|     None -> pdf |  | ||||||
|   | Some xmldata -> |  | ||||||
|       let changes= [(xmp, "MetadataDate")] in |  | ||||||
|       let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in |  | ||||||
|         set_metadata_from_bytes |  | ||||||
|           true |  | ||||||
|           (set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf) |  | ||||||
|           pdf |  | ||||||
|  |  | ||||||
| let replacements pdf = |  | ||||||
|   let info = get_info_utf8 pdf in |  | ||||||
|     [("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i)); |  | ||||||
|      ("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i)); |  | ||||||
|      ("PRODUCER", info "/Producer"); |  | ||||||
|      ("CREATOR", info "/Creator"); |  | ||||||
|      ("TITLE", info "/Title"); |  | ||||||
|      ("SUBJECT", info "/Subject"); |  | ||||||
|      ("AUTHOR", info "/Author"); |  | ||||||
|      ("KEYWORDS", info "/Keywords"); |  | ||||||
|      ("TRAPPED", info "/Trapped"); |  | ||||||
|      ("METADATADATE", xmp_date (expand_date "now"))] |  | ||||||
|  |  | ||||||
| let create_metadata pdf = |  | ||||||
|   let xmp = ref xmp_template in |  | ||||||
|   iter |  | ||||||
|     (fun (s, r) -> xmp := string_replace_all s r !xmp) |  | ||||||
|     (replacements pdf); |  | ||||||
|   set_metadata_from_bytes false (bytes_of_string !xmp) pdf |  | ||||||
|  |  | ||||||
| (* \section{Blacken text} *) | (* \section{Blacken text} *) | ||||||
|  |  | ||||||
| (* | (* | ||||||
| @@ -3497,9 +2843,6 @@ let draft onlyremove boxes range pdf = | |||||||
|        pagenums; |        pagenums; | ||||||
|       Pdfpage.change_pages true !pdf (rev !pages') |       Pdfpage.change_pages true !pdf (rev !pages') | ||||||
|  |  | ||||||
| let set_version v pdf = |  | ||||||
|   pdf.Pdf.minor <- v |  | ||||||
|          |  | ||||||
| let blank_document width height pages = | let blank_document width height pages = | ||||||
|   let pdf_pages = |   let pdf_pages = | ||||||
|     map (fun () -> Pdfpage.blankpage (Pdfpaper.make Pdfunits.PdfPoint width height)) (many () pages) |     map (fun () -> Pdfpage.blankpage (Pdfpaper.make Pdfunits.PdfPoint width height)) (many () pages) | ||||||
| @@ -4022,7 +3365,7 @@ let list_spot_colours pdf = | |||||||
| let add_bookmark_title filename use_title pdf = | let add_bookmark_title filename use_title pdf = | ||||||
|   let title = |   let title = | ||||||
|     if use_title then |     if use_title then | ||||||
|       match get_info_utf8 pdf "/Title", get_xmp_info pdf "/Title" with |       match Cpdfmetadata.get_info_utf8 pdf "/Title", Cpdfmetadata.get_xmp_info pdf "/Title" with | ||||||
|         "", x | x, "" | _, x -> x |         "", x | x, "" | _, x -> x | ||||||
|     else |     else | ||||||
|       Filename.basename filename |       Filename.basename filename | ||||||
| @@ -4062,13 +3405,13 @@ let create_pdf pages pagesize = | |||||||
|  |  | ||||||
| (* Remove characters which might not make good filenames. *) | (* Remove characters which might not make good filenames. *) | ||||||
| let remove_unsafe_characters encoding s = | let remove_unsafe_characters encoding s = | ||||||
|   if encoding = Raw then s else |   if encoding = Cpdfmetadata.Raw then s else | ||||||
|     let chars = |     let chars = | ||||||
|       lose |       lose | ||||||
|         (function x -> |         (function x -> | ||||||
|            match x with |            match x with | ||||||
|            '/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true |            '/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true | ||||||
|            | x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Stripped) -> true |            | x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Cpdfmetadata.Stripped) -> true | ||||||
|            | _ -> false) |            | _ -> false) | ||||||
|         (explode s) |         (explode s) | ||||||
|     in |     in | ||||||
|   | |||||||
							
								
								
									
										80
									
								
								cpdf.mli
									
									
									
									
									
								
							
							
						
						
									
										80
									
								
								cpdf.mli
									
									
									
									
									
								
							| @@ -1,13 +1,6 @@ | |||||||
| (** Coherent PDF Tools Core Routines *) | (** Coherent PDF Tools Core Routines *) | ||||||
| open Pdfutil | open Pdfutil | ||||||
|  |  | ||||||
| (** {2 Types and Exceptions} *) |  | ||||||
|  |  | ||||||
| (** Possible output encodings for some function. [Raw] does no processing at |  | ||||||
| all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8. |  | ||||||
| [Stripped] extracts the unicode codepoints and returns only those which |  | ||||||
| correspond to 7 bit ASCII. *) |  | ||||||
| type encoding = Raw | UTF8 | Stripped |  | ||||||
|  |  | ||||||
| type color = | type color = | ||||||
|   Grey of float |   Grey of float | ||||||
| @@ -38,45 +31,6 @@ val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list | |||||||
|  |  | ||||||
| val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t | val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t | ||||||
|  |  | ||||||
| (** {2 Metadata and settings} *) |  | ||||||
|  |  | ||||||
| (** [copy_id keepversion copyfrom copyto] copies the ID, if any, from |  | ||||||
| [copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto] |  | ||||||
| won't be affected. *) |  | ||||||
| val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating |  | ||||||
| the PDF minor version to [version].*) |  | ||||||
| val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| val get_xmp_info : Pdf.t -> string -> string |  | ||||||
|  |  | ||||||
| (** [set_pdf_info (key, value, version)] sets the entry [key] in the |  | ||||||
| /ViewerPreferences directory, updating the PDF minor version to [version].*) |  | ||||||
| val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Set the page layout to the given name (sans slash) e.g SinglePage *) |  | ||||||
| val set_page_layout : Pdf.t -> string -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Set the page layout to the given name (sans slash) e.g SinglePage *) |  | ||||||
| val set_page_mode : Pdf.t -> string -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *) |  | ||||||
| val set_open_action : Pdf.t -> bool -> int -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Set the PDF version number *) |  | ||||||
| val set_version : int -> Pdf.t -> unit |  | ||||||
|  |  | ||||||
| (** Given a PDF, returns a function which can lookup a given dictionary entry |  | ||||||
| from the /Info dictionary, returning it as a UTF8 string *) |  | ||||||
| val get_info_utf8 : Pdf.t -> string -> string |  | ||||||
|  |  | ||||||
| (** Output to standard output general information about a PDF. *) |  | ||||||
| val output_info : encoding -> Pdf.t -> unit |  | ||||||
|  |  | ||||||
| (** Output to standard output information from any XMP metadata stream in a PDF. *) |  | ||||||
| val output_xmp_info : encoding -> Pdf.t -> unit |  | ||||||
|  |  | ||||||
| (** {2 Bookmarks} *) | (** {2 Bookmarks} *) | ||||||
|  |  | ||||||
| (** [parse_bookmark_file verify pdf input] parses the bookmark file in [input]. | (** [parse_bookmark_file verify pdf input] parses the bookmark file in [input]. | ||||||
| @@ -90,30 +44,9 @@ val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t | |||||||
|  |  | ||||||
| (** [list_bookmarks encoding range pdf output] lists the bookmarks to the given | (** [list_bookmarks encoding range pdf output] lists the bookmarks to the given | ||||||
| output in the format specified in cpdfmanual.pdf *) | output in the format specified in cpdfmanual.pdf *) | ||||||
| val list_bookmarks : json:bool -> encoding -> int list -> Pdf.t -> Pdfio.output -> unit | val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit | ||||||
|  |  | ||||||
| (** {2 XML Metadata} *) |  | ||||||
|  |  | ||||||
| (** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *)  |  | ||||||
| val set_metadata : bool -> string -> Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** The same, but the content comes from [bytes]. *) |  | ||||||
| val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Remove the metadata from a file *) |  | ||||||
| val remove_metadata : Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Extract metadata to a [Pdfio.bytes] *) |  | ||||||
| val get_metadata : Pdf.t -> Pdfio.bytes option |  | ||||||
|  |  | ||||||
| (** Print metadate to stdout *) |  | ||||||
| val print_metadata : Pdf.t -> unit |  | ||||||
|  |  | ||||||
| (** Set the metadata date *) |  | ||||||
| val set_metadata_date : Pdf.t -> string -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Create XMP metadata from scratch *) |  | ||||||
| val create_metadata : Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** {2 Stamping} *) | (** {2 Stamping} *) | ||||||
|  |  | ||||||
| @@ -145,9 +78,6 @@ val list_fonts : Pdf.t -> int list -> (int * string * string * string * string) | |||||||
|  |  | ||||||
| (** {2 Adding text} *) | (** {2 Adding text} *) | ||||||
|  |  | ||||||
| (** Expand the string "now" to a PDF date string, ignoring any other string *) |  | ||||||
| val expand_date : string -> string |  | ||||||
|  |  | ||||||
| (** Justification of multiline text *) | (** Justification of multiline text *) | ||||||
| type justification = | type justification = | ||||||
|   | LeftJustify |   | LeftJustify | ||||||
| @@ -281,10 +211,10 @@ val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t | |||||||
| (** {2 Annotations} *) | (** {2 Annotations} *) | ||||||
|  |  | ||||||
| (** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *) | (** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *) | ||||||
| val list_annotations : json:bool -> encoding -> Pdf.t -> unit | val list_annotations : json:bool -> Cpdfmetadata.encoding -> Pdf.t -> unit | ||||||
|  |  | ||||||
| (** Return the annotations as a (pagenumber, content) list *) | (** Return the annotations as a (pagenumber, content) list *) | ||||||
| val get_annotations : encoding -> Pdf.t -> (int * string) list | val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list | ||||||
|  |  | ||||||
| (** Copy the annotations on a given set of pages from a to b. b is returned. *) | (** Copy the annotations on a given set of pages from a to b. b is returned. *) | ||||||
| val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t | val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t | ||||||
| @@ -375,12 +305,12 @@ val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t | |||||||
|  |  | ||||||
| val create_pdf : int -> Pdfpaper.t -> Pdf.t | val create_pdf : int -> Pdfpaper.t -> Pdf.t | ||||||
|  |  | ||||||
| val name_of_spec : encoding -> | val name_of_spec : Cpdfmetadata.encoding -> | ||||||
|            Pdfmarks.t list -> |            Pdfmarks.t list -> | ||||||
|            Pdf.t -> int -> string -> int -> string -> int -> int -> string |            Pdf.t -> int -> string -> int -> string -> int -> int -> string | ||||||
|  |  | ||||||
| val extract_images : string -> | val extract_images : string -> | ||||||
|            string -> |            string -> | ||||||
|            encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit |            Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -417,7 +417,7 @@ type args = | |||||||
|    mutable retain_numbering : bool; |    mutable retain_numbering : bool; | ||||||
|    mutable remove_duplicate_fonts : bool; |    mutable remove_duplicate_fonts : bool; | ||||||
|    mutable remove_duplicate_streams : bool; |    mutable remove_duplicate_streams : bool; | ||||||
|    mutable encoding : Cpdf.encoding; |    mutable encoding : Cpdfmetadata.encoding; | ||||||
|    mutable scale : float; |    mutable scale : float; | ||||||
|    mutable copyfontpage : int; |    mutable copyfontpage : int; | ||||||
|    mutable copyfontname : string option; |    mutable copyfontname : string option; | ||||||
| @@ -536,7 +536,7 @@ let args = | |||||||
|    retain_numbering = false; |    retain_numbering = false; | ||||||
|    remove_duplicate_fonts = false; |    remove_duplicate_fonts = false; | ||||||
|    remove_duplicate_streams = false; |    remove_duplicate_streams = false; | ||||||
|    encoding = Cpdf.Stripped; |    encoding = Cpdfmetadata.Stripped; | ||||||
|    scale = 1.; |    scale = 1.; | ||||||
|    copyfontpage = 1; |    copyfontpage = 1; | ||||||
|    copyfontname = None; |    copyfontname = None; | ||||||
| @@ -655,7 +655,7 @@ let reset_arguments () = | |||||||
|   args.retain_numbering <- false; |   args.retain_numbering <- false; | ||||||
|   args.remove_duplicate_fonts <- false; |   args.remove_duplicate_fonts <- false; | ||||||
|   args.remove_duplicate_streams <- false; |   args.remove_duplicate_streams <- false; | ||||||
|   args.encoding <- Cpdf.Stripped; |   args.encoding <- Cpdfmetadata.Stripped; | ||||||
|   args.scale <- 1.; |   args.scale <- 1.; | ||||||
|   args.copyfontpage <- 1; |   args.copyfontpage <- 1; | ||||||
|   args.copyfontname <- None; |   args.copyfontname <- None; | ||||||
| @@ -1779,13 +1779,13 @@ and specs = | |||||||
|        Arg.Unit setrecrypt, |        Arg.Unit setrecrypt, | ||||||
|        " Keep this file's encryption when writing"); |        " Keep this file's encryption when writing"); | ||||||
|    ("-raw", |    ("-raw", | ||||||
|       Arg.Unit (setencoding Cpdf.Raw), |       Arg.Unit (setencoding Cpdfmetadata.Raw), | ||||||
|       " Do not process text"); |       " Do not process text"); | ||||||
|    ("-stripped", |    ("-stripped", | ||||||
|       Arg.Unit (setencoding Cpdf.Stripped), |       Arg.Unit (setencoding Cpdfmetadata.Stripped), | ||||||
|       " Process text by simple stripping to ASCII"); |       " Process text by simple stripping to ASCII"); | ||||||
|    ("-utf8", |    ("-utf8", | ||||||
|       Arg.Unit (setencoding Cpdf.UTF8), |       Arg.Unit (setencoding Cpdfmetadata.UTF8), | ||||||
|       " Process text by conversion to UTF8 Unicode"); |       " Process text by conversion to UTF8 Unicode"); | ||||||
|    ("-fast", |    ("-fast", | ||||||
|       Arg.Unit setfast, |       Arg.Unit setfast, | ||||||
| @@ -2724,15 +2724,15 @@ let unescape_octals s = | |||||||
|   implode (unescape_octals [] (explode s)) |   implode (unescape_octals [] (explode s)) | ||||||
|  |  | ||||||
| let process s =  | let process s =  | ||||||
|   if args.encoding <> Cpdf.Raw |   if args.encoding <> Cpdfmetadata.Raw | ||||||
|     then Pdftext.pdfdocstring_of_utf8 s |     then Pdftext.pdfdocstring_of_utf8 s | ||||||
|     else unescape_octals s |     else unescape_octals s | ||||||
|  |  | ||||||
| let set_producer s pdf = | let set_producer s pdf = | ||||||
|   ignore (Cpdf.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf) |   ignore (Cpdfmetadata.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf) | ||||||
|  |  | ||||||
| let set_creator s pdf = | let set_creator s pdf = | ||||||
|   ignore (Cpdf.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf) |   ignore (Cpdfmetadata.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf) | ||||||
|  |  | ||||||
| let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outname = | let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outname = | ||||||
|   if args.producer <> None then set_producer (unopt args.producer) pdf; |   if args.producer <> None then set_producer (unopt args.producer) pdf; | ||||||
| @@ -3146,8 +3146,8 @@ let go () = | |||||||
|         if inname <> "" then |         if inname <> "" then | ||||||
|           Printf.printf "Linearized: %b\n" (Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname))); |           Printf.printf "Linearized: %b\n" (Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname))); | ||||||
|         let pdf = decrypt_if_necessary input (Some Info) pdf in |         let pdf = decrypt_if_necessary input (Some Info) pdf in | ||||||
|           Cpdf.output_info args.encoding pdf; |           Cpdfmetadata.output_info args.encoding pdf; | ||||||
|           Cpdf.output_xmp_info args.encoding pdf |           Cpdfmetadata.output_xmp_info args.encoding pdf | ||||||
|   | Some PageInfo -> |   | Some PageInfo -> | ||||||
|       begin match args.inputs, args.out with |       begin match args.inputs, args.out with | ||||||
|       | (_, pagespec, _, _, _, _)::_, _ -> |       | (_, pagespec, _, _, _, _)::_, _ -> | ||||||
| @@ -3157,7 +3157,7 @@ let go () = | |||||||
|       | _ -> error "list-bookmarks: bad command line" |       | _ -> error "list-bookmarks: bad command line" | ||||||
|       end |       end | ||||||
|   | Some Metadata -> |   | Some Metadata -> | ||||||
|       Cpdf.print_metadata (get_single_pdf (Some Metadata) true) |       Cpdfmetadata.print_metadata (get_single_pdf (Some Metadata) true) | ||||||
|   | Some Fonts -> |   | Some Fonts -> | ||||||
|       begin match args.inputs, args.out with |       begin match args.inputs, args.out with | ||||||
|       | (_, pagespec, _, _, _, _)::_, _ -> |       | (_, pagespec, _, _, _, _)::_, _ -> | ||||||
| @@ -3357,14 +3357,14 @@ let go () = | |||||||
|           | SetCreate _ | SetModify _ | SetCreator _ | SetProducer _ |           | SetCreate _ | SetModify _ | SetCreator _ | SetProducer _ | ||||||
|           | SetTrapped | SetUntrapped) as op) -> |           | SetTrapped | SetUntrapped) as op) -> | ||||||
|       let key, value, version  = |       let key, value, version  = | ||||||
|         let f s = if args.encoding <> Cpdf.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in |         let f s = if args.encoding <> Cpdfmetadata.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in | ||||||
|           match op with |           match op with | ||||||
|           | SetAuthor s -> "/Author", Pdf.String (f s), 0 |           | SetAuthor s -> "/Author", Pdf.String (f s), 0 | ||||||
|           | SetTitle s -> "/Title", Pdf.String (f s), 1 |           | SetTitle s -> "/Title", Pdf.String (f s), 1 | ||||||
|           | SetSubject s -> "/Subject", Pdf.String (f s), 1 |           | SetSubject s -> "/Subject", Pdf.String (f s), 1 | ||||||
|           | SetKeywords s -> "/Keywords", Pdf.String (f s), 1 |           | SetKeywords s -> "/Keywords", Pdf.String (f s), 1 | ||||||
|           | SetCreate s -> "/CreationDate", Pdf.String (Cpdf.expand_date s), 0 |           | SetCreate s -> "/CreationDate", Pdf.String (Cpdfmetadata.expand_date s), 0 | ||||||
|           | SetModify s -> "/ModDate", Pdf.String (Cpdf.expand_date s), 0 |           | SetModify s -> "/ModDate", Pdf.String (Cpdfmetadata.expand_date s), 0 | ||||||
|           | SetCreator s -> "/Creator", Pdf.String (f s), 0 |           | SetCreator s -> "/Creator", Pdf.String (f s), 0 | ||||||
|           | SetProducer s -> "/Producer", Pdf.String (f s), 0 |           | SetProducer s -> "/Producer", Pdf.String (f s), 0 | ||||||
|           | SetTrapped -> "/Trapped", Pdf.Boolean true, 3 |           | SetTrapped -> "/Trapped", Pdf.Boolean true, 3 | ||||||
| @@ -3374,12 +3374,12 @@ let go () = | |||||||
|         let pdf = get_single_pdf args.op false in |         let pdf = get_single_pdf args.op false in | ||||||
|           let version = if args.keepversion then pdf.Pdf.minor else version in |           let version = if args.keepversion then pdf.Pdf.minor else version in | ||||||
|             write_pdf false |             write_pdf false | ||||||
|               (Cpdf.set_pdf_info  |               (Cpdfmetadata.set_pdf_info  | ||||||
|                  ~xmp_also:args.alsosetxml |                  ~xmp_also:args.alsosetxml | ||||||
|                  ~xmp_just_set:args.justsetxml |                  ~xmp_just_set:args.justsetxml | ||||||
|                  (key, value, version) pdf) |                  (key, value, version) pdf) | ||||||
|   | Some (SetMetadataDate date) -> |   | Some (SetMetadataDate date) -> | ||||||
|       write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date) |       write_pdf false (Cpdfmetadata.set_metadata_date (get_single_pdf args.op false) date) | ||||||
|   | Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _ |   | Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _ | ||||||
|           | FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) -> |           | FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) -> | ||||||
|       begin match args.out with |       begin match args.out with | ||||||
| @@ -3396,20 +3396,20 @@ let go () = | |||||||
|         in |         in | ||||||
|       let pdf = get_single_pdf args.op false in |       let pdf = get_single_pdf args.op false in | ||||||
|      let version = if args.keepversion then pdf.Pdf.minor else version in |      let version = if args.keepversion then pdf.Pdf.minor else version in | ||||||
|           write_pdf false (Cpdf.set_viewer_preference (key, value, version) pdf) |           write_pdf false (Cpdfmetadata.set_viewer_preference (key, value, version) pdf) | ||||||
|       end |       end | ||||||
|   | Some (OpenAtPage str) -> |   | Some (OpenAtPage str) -> | ||||||
|       let pdf = get_single_pdf args.op false in |       let pdf = get_single_pdf args.op false in | ||||||
|       let range = parse_pagespec_allow_empty pdf str in |       let range = parse_pagespec_allow_empty pdf str in | ||||||
|       let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in |       let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in | ||||||
|         write_pdf false (Cpdf.set_open_action pdf false n) |         write_pdf false (Cpdfmetadata.set_open_action pdf false n) | ||||||
|   | Some (OpenAtPageFit str) -> |   | Some (OpenAtPageFit str) -> | ||||||
|       let pdf = get_single_pdf args.op false in |       let pdf = get_single_pdf args.op false in | ||||||
|       let range = parse_pagespec_allow_empty pdf str in |       let range = parse_pagespec_allow_empty pdf str in | ||||||
|       let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in |       let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in | ||||||
|         write_pdf false (Cpdf.set_open_action pdf true n) |         write_pdf false (Cpdfmetadata.set_open_action pdf true n) | ||||||
|   | Some (SetMetadata metadata_file) -> |   | Some (SetMetadata metadata_file) -> | ||||||
|       write_pdf false (Cpdf.set_metadata args.keepversion metadata_file (get_single_pdf args.op false)) |       write_pdf false (Cpdfmetadata.set_metadata args.keepversion metadata_file (get_single_pdf args.op false)) | ||||||
|   | Some (SetVersion v) -> |   | Some (SetVersion v) -> | ||||||
|       let pdf = get_single_pdf args.op false in |       let pdf = get_single_pdf args.op false in | ||||||
|       let pdf = |       let pdf = | ||||||
| @@ -3419,9 +3419,9 @@ let go () = | |||||||
|       in |       in | ||||||
|          write_pdf false pdf |          write_pdf false pdf | ||||||
|   | Some (SetPageLayout s) -> |   | Some (SetPageLayout s) -> | ||||||
|       write_pdf false (Cpdf.set_page_layout (get_single_pdf args.op false) s) |       write_pdf false (Cpdfmetadata.set_page_layout (get_single_pdf args.op false) s) | ||||||
|   | Some (SetPageMode s) -> |   | Some (SetPageMode s) -> | ||||||
|       write_pdf false (Cpdf.set_page_mode (get_single_pdf args.op false) s) |       write_pdf false (Cpdfmetadata.set_page_mode (get_single_pdf args.op false) s) | ||||||
|   | Some Split -> |   | Some Split -> | ||||||
|       begin match args.inputs, args.out with |       begin match args.inputs, args.out with | ||||||
|         | [(f, ranges, _, _, _, _)], File output_spec -> |         | [(f, ranges, _, _, _, _)], File output_spec -> | ||||||
| @@ -3514,7 +3514,7 @@ let go () = | |||||||
|       begin match args.inputs with |       begin match args.inputs with | ||||||
|       | [(k, _, u, o, _, _) as input] -> |       | [(k, _, u, o, _, _) as input] -> | ||||||
|           let pdf = |           let pdf = | ||||||
|             Cpdf.copy_id |             Cpdfmetadata.copy_id | ||||||
|               args.keepversion |               args.keepversion | ||||||
|               (pdfread_pdf_of_file (optstring u) (optstring o) getfrom) |               (pdfread_pdf_of_file (optstring u) (optstring o) getfrom) | ||||||
|               (get_pdf_from_input_kind input args.op k) |               (get_pdf_from_input_kind input args.op k) | ||||||
| @@ -3765,7 +3765,7 @@ let go () = | |||||||
|       args.recrypt <- false; |       args.recrypt <- false; | ||||||
|       write_pdf false (get_single_pdf args.op false) |       write_pdf false (get_single_pdf args.op false) | ||||||
|   | Some RemoveMetadata -> |   | Some RemoveMetadata -> | ||||||
|       write_pdf false (Cpdf.remove_metadata (get_single_pdf args.op false)) |       write_pdf false (Cpdfmetadata.remove_metadata (get_single_pdf args.op false)) | ||||||
|   | Some ExtractImages -> |   | Some ExtractImages -> | ||||||
|       let output_spec = |       let output_spec = | ||||||
|         begin match args.out with |         begin match args.out with | ||||||
| @@ -3838,7 +3838,7 @@ let go () = | |||||||
|           write_pdf false (Cpdf.remove_clipping pdf range) |           write_pdf false (Cpdf.remove_clipping pdf range) | ||||||
|   | Some CreateMetadata -> |   | Some CreateMetadata -> | ||||||
|       let pdf = get_single_pdf args.op false in |       let pdf = get_single_pdf args.op false in | ||||||
|         write_pdf false (Cpdf.create_metadata pdf) |         write_pdf false (Cpdfmetadata.create_metadata pdf) | ||||||
|   | Some EmbedMissingFonts -> |   | Some EmbedMissingFonts -> | ||||||
|       let fi = |       let fi = | ||||||
|         match args.inputs with |         match args.inputs with | ||||||
|   | |||||||
							
								
								
									
										661
									
								
								cpdfmetadata.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										661
									
								
								cpdfmetadata.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,661 @@ | |||||||
|  | open Pdfutil | ||||||
|  | open Pdfio | ||||||
|  | open Cpdferror | ||||||
|  |  | ||||||
|  | type encoding = | ||||||
|  |   | Raw | ||||||
|  |   | UTF8 | ||||||
|  |   | Stripped | ||||||
|  |  | ||||||
|  | (* Just strip everything which isn't 7 bit ASCII *) | ||||||
|  | let crude_de_unicode s = | ||||||
|  |   implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s))) | ||||||
|  |  | ||||||
|  | let encode_output enc s = | ||||||
|  |   match enc with | ||||||
|  |   | Raw -> s | ||||||
|  |   | UTF8 -> Pdftext.utf8_of_pdfdocstring s | ||||||
|  |   | Stripped -> crude_de_unicode s | ||||||
|  |  | ||||||
|  | let xmp_template = | ||||||
|  | {|<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?> | ||||||
|  |  | ||||||
|  | <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' | ||||||
|  |  xmlns:iX='http://ns.adobe.com/iX/1.0/'> | ||||||
|  |  | ||||||
|  |  <rdf:Description about='' | ||||||
|  |   xmlns='http://ns.adobe.com/pdf/1.3/' | ||||||
|  |   xmlns:pdf='http://ns.adobe.com/pdf/1.3/'> | ||||||
|  |   <pdf:CreationDate>CREATEDATE</pdf:CreationDate> | ||||||
|  |   <pdf:ModDate>MODDATE</pdf:ModDate> | ||||||
|  |   <pdf:Producer>PRODUCER</pdf:Producer> | ||||||
|  |   <pdf:Creator>CREATOR</pdf:Creator> | ||||||
|  |   <pdf:Title>TITLE</pdf:Title> | ||||||
|  |   <pdf:Subject>SUBJECT</pdf:Subject> | ||||||
|  |   <pdf:Author>AUTHOR</pdf:Author> | ||||||
|  |   <pdf:Keywords>KEYWORDS</pdf:Keywords> | ||||||
|  |   <pdf:Trapped>TRAPPED</pdf:Trapped> | ||||||
|  |  </rdf:Description> | ||||||
|  |  | ||||||
|  |  <rdf:Description about='' | ||||||
|  |   xmlns='http://ns.adobe.com/xap/1.0/' | ||||||
|  |   xmlns:xap='http://ns.adobe.com/xap/1.0/'> | ||||||
|  |    <xap:CreateDate>CREATEDATE</xap:CreateDate> | ||||||
|  |    <xap:CreatorTool>CREATOR</xap:CreatorTool> | ||||||
|  |    <xap:ModifyDate>MODDATE</xap:ModifyDate> | ||||||
|  |    <xap:MetadataDate>METADATADATE</xap:MetadataDate> | ||||||
|  |  </rdf:Description> | ||||||
|  |  | ||||||
|  |  <rdf:Description about='' | ||||||
|  |   xmlns='http://purl.org/dc/elements/1.1/' | ||||||
|  |   xmlns:dc='http://purl.org/dc/elements/1.1/'> | ||||||
|  |    <dc:title>TITLE</dc:title> | ||||||
|  |  </rdf:Description> | ||||||
|  |  | ||||||
|  | </rdf:RDF> | ||||||
|  |  | ||||||
|  | <?xpacket end='r'?>|} | ||||||
|  |  | ||||||
|  | (* \section{Set or replace metadata} *) | ||||||
|  | let set_metadata_from_bytes keepversion data pdf = | ||||||
|  |   let metadata_stream = | ||||||
|  |     Pdf.Stream | ||||||
|  |       {contents = | ||||||
|  |         (Pdf.Dictionary | ||||||
|  |          ["/Length", Pdf.Integer (bytes_size data); | ||||||
|  |           "/Type", Pdf.Name "/Metadata"; | ||||||
|  |           "/Subtype", Pdf.Name "/XML"], | ||||||
|  |          Pdf.Got data)} | ||||||
|  |   in | ||||||
|  |     let objnum = Pdf.addobj pdf metadata_stream in  | ||||||
|  |       let document_catalog = | ||||||
|  |         match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | ||||||
|  |         | Some s -> s | ||||||
|  |         | None -> error "Malformed PDF: No root." | ||||||
|  |       in | ||||||
|  |         let document_catalog' = | ||||||
|  |           Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum) | ||||||
|  |         in | ||||||
|  |           let rootnum = Pdf.addobj pdf document_catalog' in | ||||||
|  |             let trailerdict = | ||||||
|  |               Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum) | ||||||
|  |             in | ||||||
|  |             {pdf with | ||||||
|  |                Pdf.trailerdict = trailerdict; | ||||||
|  |                Pdf.root = rootnum; | ||||||
|  |                Pdf.minor = | ||||||
|  |                  if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor} | ||||||
|  |  | ||||||
|  | let set_metadata keepversion filename pdf = | ||||||
|  |   let ch = open_in_bin filename in | ||||||
|  |     let data = mkbytes (in_channel_length ch) in | ||||||
|  |       for x = 0 to bytes_size data - 1 do | ||||||
|  |         bset data x (input_byte ch) | ||||||
|  |       done; | ||||||
|  |       set_metadata_from_bytes keepversion data pdf | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | (* \section{Remove metadata} *) | ||||||
|  | let remove_metadata pdf = | ||||||
|  |   match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | ||||||
|  |   | None -> error "malformed file"  | ||||||
|  |   | Some root -> | ||||||
|  |       let root' = Pdf.remove_dict_entry root "/Metadata" in | ||||||
|  |         let rootnum = Pdf.addobj pdf root' in | ||||||
|  |           {pdf with | ||||||
|  |              Pdf.trailerdict = | ||||||
|  |                Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum); | ||||||
|  |              Pdf.root = | ||||||
|  |                rootnum} | ||||||
|  | (* Print metadata *) | ||||||
|  | let get_metadata pdf = | ||||||
|  |   match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | ||||||
|  |   | None -> error "malformed file" | ||||||
|  |   | Some root -> | ||||||
|  |       match Pdf.lookup_direct pdf "/Metadata" root with | ||||||
|  |       | Some ((Pdf.Stream _) as s) -> | ||||||
|  |           Pdfcodec.decode_pdfstream pdf s; | ||||||
|  |           begin match s with | ||||||
|  |           | Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data  | ||||||
|  |           | _ -> assert false | ||||||
|  |           end | ||||||
|  |       | _ -> None | ||||||
|  |  | ||||||
|  | let print_metadata pdf = | ||||||
|  |   match get_metadata pdf with | ||||||
|  |     None -> () | ||||||
|  |   | Some data -> | ||||||
|  |       for x = 0 to bytes_size data - 1 do | ||||||
|  |         Printf.printf "%c" (char_of_int (bget data x)) | ||||||
|  |       done | ||||||
|  |  | ||||||
|  |  | ||||||
|  | let get_info raw pdf = | ||||||
|  |   let infodict = | ||||||
|  |     match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | ||||||
|  |     | Some infodict -> infodict | ||||||
|  |     | _ -> Pdf.Dictionary [] | ||||||
|  |   in | ||||||
|  |     let getstring name = | ||||||
|  |       match Pdf.lookup_direct pdf name infodict with | ||||||
|  |       | Some (Pdf.String s) -> | ||||||
|  |           if raw then s else crude_de_unicode s | ||||||
|  |       | Some (Pdf.Boolean false) -> "False" | ||||||
|  |       | Some (Pdf.Boolean true) -> "True" | ||||||
|  |       | _ -> if name = "/Trapped" then "False" else "" | ||||||
|  |     in | ||||||
|  |       getstring | ||||||
|  |         | ||||||
|  | let get_info_utf8 pdf = | ||||||
|  |   let infodict = | ||||||
|  |     match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | ||||||
|  |     | Some infodict -> infodict | ||||||
|  |     | _ -> Pdf.Dictionary [] | ||||||
|  |   in | ||||||
|  |     (function name -> | ||||||
|  |       match Pdf.lookup_direct pdf name infodict with | ||||||
|  |       | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s | ||||||
|  |       | Some (Pdf.Boolean false) -> "False" | ||||||
|  |       | Some (Pdf.Boolean true) -> "True" | ||||||
|  |       | _ -> if name = "/Trapped" then "False" else "") | ||||||
|  |  | ||||||
|  | let getstring encoding pdf = | ||||||
|  |   match encoding with | ||||||
|  |   | Raw -> get_info true pdf | ||||||
|  |   | Stripped -> get_info false pdf | ||||||
|  |   | UTF8 -> get_info_utf8 pdf | ||||||
|  |  | ||||||
|  | let output_info encoding pdf = | ||||||
|  |   let getstring = getstring encoding pdf in | ||||||
|  |     Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor; | ||||||
|  |     Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf); | ||||||
|  |     Printf.printf "Title: %s\n" (getstring "/Title"); | ||||||
|  |     Printf.printf "Author: %s\n" (getstring "/Author"); | ||||||
|  |     Printf.printf "Subject: %s\n" (getstring "/Subject"); | ||||||
|  |     Printf.printf "Keywords: %s\n" (getstring "/Keywords"); | ||||||
|  |     Printf.printf "Creator: %s\n" (getstring "/Creator"); | ||||||
|  |     Printf.printf "Producer: %s\n" (getstring "/Producer"); | ||||||
|  |     Printf.printf "Created: %s\n" (getstring "/CreationDate"); | ||||||
|  |     Printf.printf "Modified: %s\n" (getstring "/ModDate"); | ||||||
|  |     Printf.printf "Trapped: %s\n" (getstring "/Trapped") | ||||||
|  |  | ||||||
|  | type xmltree = | ||||||
|  |     E of Cpdfxmlm.tag * xmltree list | ||||||
|  |   | D of string | ||||||
|  |  | ||||||
|  | let xmltree_of_bytes b = | ||||||
|  |   let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in | ||||||
|  |     let el tag childs = E (tag, childs) | ||||||
|  |     and data d = D d in | ||||||
|  |       Cpdfxmlm.input_doc_tree ~el ~data i | ||||||
|  |  | ||||||
|  | let bytes_of_xmltree t = | ||||||
|  |   let buf = Buffer.create 1024 in | ||||||
|  |   let o = Cpdfxmlm.make_output (`Buffer buf) in | ||||||
|  |   let frag = function | ||||||
|  |       E (tag, childs) -> `El (tag, childs) | ||||||
|  |     | D d -> `Data d | ||||||
|  |   in | ||||||
|  |     Cpdfxmlm.output_doc_tree frag o t; | ||||||
|  |     bytes_of_string (Buffer.contents buf) | ||||||
|  |  | ||||||
|  | let rec string_of_xmltree = function | ||||||
|  |    D d -> | ||||||
|  |      Printf.sprintf "DATA {%s}" d | ||||||
|  |  | E (tag, trees) -> | ||||||
|  |      Printf.sprintf "ELT (%s, %s)" | ||||||
|  |        (string_of_tag tag) | ||||||
|  |        (string_of_xmltrees trees) | ||||||
|  |  | ||||||
|  | and string_of_tag ((n, n'), attributes) = | ||||||
|  |   Printf.sprintf | ||||||
|  |     "NAME |%s| |%s|, ATTRIBUTES {%s}" n n' | ||||||
|  |     (string_of_attributes attributes) | ||||||
|  |  | ||||||
|  | and string_of_attribute ((n, n'), str) = | ||||||
|  |   Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str | ||||||
|  |  | ||||||
|  | and string_of_attributes attrs = | ||||||
|  |   fold_left | ||||||
|  |     (fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs) | ||||||
|  |  | ||||||
|  | and string_of_xmltrees trees = | ||||||
|  |   fold_left | ||||||
|  |     (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees) | ||||||
|  |  | ||||||
|  | let adobe = "http://ns.adobe.com/pdf/1.3/" | ||||||
|  |  | ||||||
|  | let xmp = "http://ns.adobe.com/xap/1.0/" | ||||||
|  |  | ||||||
|  | let dc = "http://purl.org/dc/elements/1.1/" | ||||||
|  |  | ||||||
|  | let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" | ||||||
|  |  | ||||||
|  | let combine_with_spaces strs = | ||||||
|  |   String.trim | ||||||
|  |     (fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs) | ||||||
|  |  | ||||||
|  | (* Collect all <li> elements inside a seq, bag, or alt. Combine with commas. If | ||||||
|  | none found, return empty string instead. *) | ||||||
|  | let collect_list_items = function | ||||||
|  |    E (((n, n'), _), elts) when | ||||||
|  |      n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag") | ||||||
|  |    -> | ||||||
|  |      combine_with_spaces | ||||||
|  |        (option_map | ||||||
|  |          (function | ||||||
|  |              E (((n, n'), _), [D d]) when n = rdf && n' = "li" -> | ||||||
|  |                Some d | ||||||
|  |            | _ -> None) | ||||||
|  |          elts) | ||||||
|  |  | _ -> "" | ||||||
|  |  | ||||||
|  | let collect_list_items_all all = | ||||||
|  |   match keep (function E _ -> true | _ -> false) all with | ||||||
|  |     h::_ -> Some (collect_list_items h) | ||||||
|  |   | [] -> None | ||||||
|  |  | ||||||
|  | let rec get_data_for namespace name = function | ||||||
|  |    D _ -> None | ||||||
|  |  | E (((n, n'), _), [D d]) when n = namespace && n' = name -> | ||||||
|  |      Some d | ||||||
|  |  | E (((n, n'), _), e) when n = namespace && n' = name -> | ||||||
|  |      collect_list_items_all e | ||||||
|  |  | E (_, l) -> | ||||||
|  |      match option_map (get_data_for namespace name) l with | ||||||
|  |        x :: _ -> Some x | ||||||
|  |      | _ -> None | ||||||
|  |  | ||||||
|  | let output_xmp_info encoding pdf = | ||||||
|  |   let print_out tree title namespace name = | ||||||
|  |     match get_data_for namespace name tree with | ||||||
|  |       None -> () | ||||||
|  |     | Some data -> | ||||||
|  |         Printf.printf "%s: " title; | ||||||
|  |         print_endline data | ||||||
|  |   in | ||||||
|  |     match get_metadata pdf with | ||||||
|  |       None -> () | ||||||
|  |     | Some metadata -> | ||||||
|  |         try | ||||||
|  |           let dtd, tree = xmltree_of_bytes metadata in | ||||||
|  |             print_out tree "XMP pdf:Keywords" adobe "Keywords"; | ||||||
|  |             print_out tree "XMP pdf:Producer" adobe "Producer"; | ||||||
|  |             print_out tree "XMP pdf:Trapped" adobe "Trapped"; | ||||||
|  |             print_out tree "XMP pdf:Title" adobe "Title"; | ||||||
|  |             print_out tree "XMP pdf:Creator" adobe "Creator"; | ||||||
|  |             print_out tree "XMP pdf:Subject" adobe "Subject"; | ||||||
|  |             print_out tree "XMP pdf:Author" adobe "Author"; | ||||||
|  |             print_out tree "XMP pdf:CreationDate" adobe "CreationDate"; | ||||||
|  |             print_out tree "XMP pdf:ModDate" adobe "ModDate"; | ||||||
|  |             print_out tree "XMP xmp:CreateDate" xmp "CreateDate"; | ||||||
|  |             print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool"; | ||||||
|  |             print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate"; | ||||||
|  |             print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate"; | ||||||
|  |             print_out tree "XMP dc:title" dc "title"; | ||||||
|  |             print_out tree "XMP dc:creator" dc "creator"; | ||||||
|  |             print_out tree "XMP dc:subject" dc "subject"; | ||||||
|  |             print_out tree "XMP dc:description" dc "description" | ||||||
|  |         with | ||||||
|  |           _ -> () | ||||||
|  |  | ||||||
|  | (* Get XMP info equivalent of an old metadata field *) | ||||||
|  | let check = function | ||||||
|  |   "/Title" -> [(adobe, "Title"); (dc, "title")] | ||||||
|  | | "/Author" -> [(adobe, "Author"); (dc, "creator")] | ||||||
|  | | "/Subject" -> [(adobe, "Subject"); (dc, "subject")] | ||||||
|  | | "/Keywords" -> [(adobe, "Keywords")] | ||||||
|  | | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")] | ||||||
|  | | "/Producer" -> [(adobe, "Producer")] | ||||||
|  | | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")] | ||||||
|  | | "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")] | ||||||
|  | | _ -> failwith "Cpdf.check_name not known" | ||||||
|  |  | ||||||
|  | let get_xmp_info pdf name = | ||||||
|  |   let tocheck = check name in | ||||||
|  |   match get_metadata pdf with | ||||||
|  |     None -> "" | ||||||
|  |   | Some metadata -> | ||||||
|  |       try | ||||||
|  |         let _, tree = xmltree_of_bytes metadata in | ||||||
|  |           let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in | ||||||
|  |             match lose (eq "") results with | ||||||
|  |              x::_ -> x | ||||||
|  |            | [] -> "" | ||||||
|  |       with | ||||||
|  |         _ -> "" | ||||||
|  |  | ||||||
|  | (* Set XMP info *) | ||||||
|  | let rec set_xml_field kind fieldname value = function | ||||||
|  |   D data -> D data | ||||||
|  | | E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e <tag/> *) | ||||||
|  |     E (((n, n'), m), [D value]) | ||||||
|  | | E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts) | ||||||
|  |  | ||||||
|  | let set_pdf_info_xml kind fieldname value xmldata pdf = | ||||||
|  |   let dtd, tree = xmltree_of_bytes xmldata in | ||||||
|  |   let str = | ||||||
|  |     match value with | ||||||
|  |       Pdf.String s -> s | ||||||
|  |     | Pdf.Boolean true -> "True" | ||||||
|  |     | Pdf.Boolean false -> "False" | ||||||
|  |     | _ -> failwith "set_pdf_info_xml: not a string" | ||||||
|  |   in | ||||||
|  |   let newtree = set_xml_field kind fieldname str tree in | ||||||
|  |     bytes_of_xmltree (dtd, newtree) | ||||||
|  |  | ||||||
|  | let set_pdf_info_xml_many changes value xmldata pdf = | ||||||
|  |   let xmldata = ref xmldata in | ||||||
|  |     iter | ||||||
|  |       (fun (kind, fieldname) -> | ||||||
|  |          xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf) | ||||||
|  |       changes; | ||||||
|  |     !xmldata | ||||||
|  |  | ||||||
|  |  | ||||||
|  | (* \section{Set an entry in the /Info dictionary} *) | ||||||
|  |  | ||||||
|  | (* We must parse the date to get its components, then use strftime to build the | ||||||
|  |  * new string in XMP format *) | ||||||
|  |  | ||||||
|  | type date = | ||||||
|  |   {mutable year : int; | ||||||
|  |    mutable month : int; (* 1 - 12 *) | ||||||
|  |    mutable day : int; (* 1 - 31 *) | ||||||
|  |    mutable hour : int; (* 0 - 23 *) | ||||||
|  |    mutable minute : int; (* 0 - 59 *) | ||||||
|  |    mutable second : int; (* 0 - 59 *) | ||||||
|  |    mutable ut_relationship : int; (* -1, 0, +1 *) | ||||||
|  |    mutable offset_hours : int; (* 0 - 59 *) | ||||||
|  |    mutable offset_minutes : int (* 0 - 59 *)} | ||||||
|  |  | ||||||
|  | let default_date () = | ||||||
|  |   {year = 0; | ||||||
|  |    month = 1; | ||||||
|  |    day = 1; | ||||||
|  |    hour = 0; | ||||||
|  |    minute = 0; | ||||||
|  |    second = 0; | ||||||
|  |    ut_relationship = 0; | ||||||
|  |    offset_hours = 0; | ||||||
|  |    offset_minutes = 0} | ||||||
|  |  | ||||||
|  | (* XMP date format is YYYY-MM-DDThh:mm:ssTZD *) | ||||||
|  | let make_xmp_date_from_components d = | ||||||
|  |   let tzd = | ||||||
|  |     if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else | ||||||
|  |     (if d.ut_relationship >=0 then "+" else "-") ^ | ||||||
|  |     Printf.sprintf "%02i" d.offset_hours ^ | ||||||
|  |     ":" ^ | ||||||
|  |     Printf.sprintf "%02i" d.offset_minutes | ||||||
|  |   in  | ||||||
|  |     Cpdfstrftime.strftime | ||||||
|  |       ~time:{Cpdfstrftime._tm_sec = d.second; | ||||||
|  |              Cpdfstrftime._tm_min = d.minute; | ||||||
|  |              Cpdfstrftime._tm_hour = d.hour; | ||||||
|  |              Cpdfstrftime._tm_mday = d.day; | ||||||
|  |              Cpdfstrftime._tm_mon = d.month - 1; | ||||||
|  |              Cpdfstrftime._tm_year = d.year - 1900; | ||||||
|  |              Cpdfstrftime._tm_wday = 0; | ||||||
|  |              Cpdfstrftime._tm_yday = 0; | ||||||
|  |              Cpdfstrftime._tm_isdst = false} | ||||||
|  |       "%Y-%m-%dT%H:%M:%S" | ||||||
|  |   ^ | ||||||
|  |     tzd | ||||||
|  |  | ||||||
|  | let xmp_date date = | ||||||
|  |   let d = default_date () in | ||||||
|  |   try | ||||||
|  |     match explode date with | ||||||
|  |       'D'::':'::r -> | ||||||
|  |         begin match r with | ||||||
|  |           y1::y2::y3::y4::r -> | ||||||
|  |             d.year <- int_of_string (implode [y1; y2; y3; y4]); | ||||||
|  |             begin match r with | ||||||
|  |               m1::m2::r -> | ||||||
|  |                 d.month <- int_of_string (implode [m1; m2]); | ||||||
|  |                 begin match r with | ||||||
|  |                   d1::d2::r -> | ||||||
|  |                   d.day <- int_of_string (implode [d1; d2]); | ||||||
|  |                   begin match r with | ||||||
|  |                     h1::h2::r -> | ||||||
|  |                     d.hour <- int_of_string (implode [h1; h2]); | ||||||
|  |                     begin match r with | ||||||
|  |                       m1::m2::r -> | ||||||
|  |                       d.minute <- int_of_string (implode [m1; m2]); | ||||||
|  |                       begin match r with | ||||||
|  |                        s1::s2::r -> | ||||||
|  |                        d.second <- int_of_string (implode [s1; s2]); | ||||||
|  |                          begin match r with | ||||||
|  |                            o::r -> | ||||||
|  |                            d.ut_relationship <- | ||||||
|  |                              if o = '+' then 1 else | ||||||
|  |                              if o = '-' then -1 else | ||||||
|  |                              0; | ||||||
|  |                            begin match r with | ||||||
|  |                              h1::h2::'\''::r -> | ||||||
|  |                              d.offset_hours <- int_of_string (implode [h1; h2]); | ||||||
|  |                              begin match r with | ||||||
|  |                                m1::m2::_ -> | ||||||
|  |                                d.offset_minutes <- int_of_string (implode [m1; m2]); | ||||||
|  |                                raise Exit | ||||||
|  |                              | _ -> raise Exit | ||||||
|  |                              end | ||||||
|  |                            | _ -> raise Exit | ||||||
|  |                            end | ||||||
|  |                          | _ -> raise Exit | ||||||
|  |                          end | ||||||
|  |                       | _ -> raise Exit | ||||||
|  |                       end | ||||||
|  |                     | _ -> raise Exit | ||||||
|  |                     end | ||||||
|  |                   | _ -> raise Exit | ||||||
|  |                   end | ||||||
|  |                 | _ -> raise Exit | ||||||
|  |                 end | ||||||
|  |             | _ -> raise Exit   | ||||||
|  |             end | ||||||
|  |         | _ -> | ||||||
|  |           Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date; | ||||||
|  |           make_xmp_date_from_components d | ||||||
|  |         end | ||||||
|  |     | _ -> | ||||||
|  |         Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date; | ||||||
|  |         make_xmp_date_from_components d | ||||||
|  |   with | ||||||
|  |     Exit -> make_xmp_date_from_components d | ||||||
|  |  | ||||||
|  | let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf = | ||||||
|  |   let infodict = | ||||||
|  |     match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | ||||||
|  |     | Some d -> d | ||||||
|  |     | None -> Pdf.Dictionary [] | ||||||
|  |   in | ||||||
|  |     let infodict' = Pdf.add_dict_entry infodict key value in | ||||||
|  |       let objnum = Pdf.addobj pdf infodict' in | ||||||
|  |         if not xmp_just_set then | ||||||
|  |           begin | ||||||
|  |             pdf.Pdf.trailerdict <- | ||||||
|  |               Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum); | ||||||
|  |             pdf.Pdf.minor <- | ||||||
|  |               max pdf.Pdf.minor version | ||||||
|  |           end; | ||||||
|  |         if xmp_also then | ||||||
|  |           begin match get_metadata pdf with | ||||||
|  |             None -> pdf | ||||||
|  |           | Some xmldata -> | ||||||
|  |               let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in | ||||||
|  |               let changes, value = | ||||||
|  |                 match key with | ||||||
|  |                 | "/Producer" -> [(adobe, "Producer")], value | ||||||
|  |                 | "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value | ||||||
|  |                 | "/Author" -> [(adobe, "Author")], value | ||||||
|  |                 | "/Title" -> [(adobe, "Title"); (dc, "title")], value | ||||||
|  |                 | "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value | ||||||
|  |                 | "/Keywords" -> [(adobe, "Keywords")], value | ||||||
|  |                 | "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value | ||||||
|  |                 | "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value | ||||||
|  |                 | "/Trapped" -> [(adobe, "Trapped")], value | ||||||
|  |                 | _ -> failwith "Unknown call to set_pdf_info" | ||||||
|  |               in | ||||||
|  |                 set_metadata_from_bytes | ||||||
|  |                   true | ||||||
|  |                   (set_pdf_info_xml_many changes value xmldata pdf) | ||||||
|  |                   pdf | ||||||
|  |           end | ||||||
|  |        else | ||||||
|  |          pdf | ||||||
|  |  | ||||||
|  | let expand_date = function | ||||||
|  |   | "now" -> | ||||||
|  |       begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with | ||||||
|  |       | Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S"  | ||||||
|  |       | _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S" | ||||||
|  |       end | ||||||
|  |   | x -> x | ||||||
|  |  | ||||||
|  | (* Set metadata date *) | ||||||
|  | let set_metadata_date pdf date = | ||||||
|  |   match get_metadata pdf with | ||||||
|  |     None -> pdf | ||||||
|  |   | Some xmldata -> | ||||||
|  |       let changes= [(xmp, "MetadataDate")] in | ||||||
|  |       let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in | ||||||
|  |         set_metadata_from_bytes | ||||||
|  |           true | ||||||
|  |           (set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf) | ||||||
|  |           pdf | ||||||
|  |  | ||||||
|  |  | ||||||
|  | (* \section{Copy an /ID from one file to another} *) | ||||||
|  | let copy_id keepversion copyfrom copyto = | ||||||
|  |   match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with | ||||||
|  |   | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *) | ||||||
|  |   | Some id -> | ||||||
|  |       copyto.Pdf.trailerdict <- | ||||||
|  |         Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id; | ||||||
|  |       copyto.Pdf.minor <- | ||||||
|  |         if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1; | ||||||
|  |       copyto | ||||||
|  |  | ||||||
|  | let replacements pdf = | ||||||
|  |   let info = get_info_utf8 pdf in | ||||||
|  |     [("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i)); | ||||||
|  |      ("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i)); | ||||||
|  |      ("PRODUCER", info "/Producer"); | ||||||
|  |      ("CREATOR", info "/Creator"); | ||||||
|  |      ("TITLE", info "/Title"); | ||||||
|  |      ("SUBJECT", info "/Subject"); | ||||||
|  |      ("AUTHOR", info "/Author"); | ||||||
|  |      ("KEYWORDS", info "/Keywords"); | ||||||
|  |      ("TRAPPED", info "/Trapped"); | ||||||
|  |      ("METADATADATE", xmp_date (expand_date "now"))] | ||||||
|  |  | ||||||
|  | let create_metadata pdf = | ||||||
|  |   let xmp = ref xmp_template in | ||||||
|  |   iter | ||||||
|  |     (fun (s, r) -> xmp := string_replace_all s r !xmp) | ||||||
|  |     (replacements pdf); | ||||||
|  |   set_metadata_from_bytes false (bytes_of_string !xmp) pdf | ||||||
|  |  | ||||||
|  | (* \section{Set viewer preferences} *) | ||||||
|  | let set_viewer_preference (key, value, version) pdf = | ||||||
|  |   match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | ||||||
|  |   | Some catalog -> | ||||||
|  |       let viewer_preferences = | ||||||
|  |         match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with | ||||||
|  |         | Some d -> d | ||||||
|  |         | None -> Pdf.Dictionary [] | ||||||
|  |       in | ||||||
|  |         let viewer_preferences' = | ||||||
|  |           Pdf.add_dict_entry viewer_preferences key value | ||||||
|  |         in | ||||||
|  |           let catalog' = | ||||||
|  |             Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences' | ||||||
|  |           in | ||||||
|  |             let catalognum = Pdf.addobj pdf catalog' in | ||||||
|  |               let trailerdict' = | ||||||
|  |                 Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) | ||||||
|  |               in | ||||||
|  |                 {pdf with | ||||||
|  |                   Pdf.minor = max pdf.Pdf.minor version; | ||||||
|  |                   Pdf.root = catalognum; | ||||||
|  |                   Pdf.trailerdict = trailerdict'} | ||||||
|  |   | None -> error "bad root" | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | (* \section{Set page layout} *) | ||||||
|  | let set_page_layout pdf s = | ||||||
|  |   match s with | ||||||
|  |   | "SinglePage" | "OneColumn" | "TwoColumnLeft" | ||||||
|  |   | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" -> | ||||||
|  |       begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | ||||||
|  |       | Some catalog -> | ||||||
|  |           let catalog' = | ||||||
|  |             Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s)) | ||||||
|  |           in | ||||||
|  |             let catalognum = Pdf.addobj pdf catalog' in | ||||||
|  |               let trailerdict' = | ||||||
|  |                 Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) | ||||||
|  |               in | ||||||
|  |                 {pdf with | ||||||
|  |                   Pdf.root = catalognum; | ||||||
|  |                   Pdf.trailerdict = trailerdict'} | ||||||
|  |       | None -> error "bad root" | ||||||
|  |       end | ||||||
|  |   | _ -> error "Unknown page layout" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | (* \section{Set page mode} *) | ||||||
|  | let set_page_mode pdf s = | ||||||
|  |   match s with | ||||||
|  |   | "UseNone" | "UseOutlines" | "UseThumbs" | ||||||
|  |   | "FullScreen" | "UseOC" | "UseAttachments" -> | ||||||
|  |       begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | ||||||
|  |       | Some catalog -> | ||||||
|  |           let catalog' = | ||||||
|  |             Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s)) | ||||||
|  |           in | ||||||
|  |             let catalognum = Pdf.addobj pdf catalog' in | ||||||
|  |               let trailerdict' = | ||||||
|  |                 Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) | ||||||
|  |               in | ||||||
|  |                 {pdf with | ||||||
|  |                   Pdf.root = catalognum; | ||||||
|  |                   Pdf.trailerdict = trailerdict'} | ||||||
|  |       | None -> error "bad root" | ||||||
|  |       end | ||||||
|  |   | _ -> error "Unknown page mode" | ||||||
|  |  | ||||||
|  | (* Set open action *) | ||||||
|  | let set_open_action pdf fit pagenumber = | ||||||
|  |   if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then | ||||||
|  |     raise (error "set_open_action: invalid page number") | ||||||
|  |   else | ||||||
|  |     let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in | ||||||
|  |       let destination = | ||||||
|  |         if fit then | ||||||
|  |           Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"] | ||||||
|  |         else | ||||||
|  |           Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null] | ||||||
|  |       in | ||||||
|  |         let open_action = | ||||||
|  |           Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")] | ||||||
|  |         in | ||||||
|  |           match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with | ||||||
|  |           | Some catalog -> | ||||||
|  |               let catalog' = | ||||||
|  |                 Pdf.add_dict_entry catalog "/OpenAction" open_action | ||||||
|  |               in | ||||||
|  |                 let catalognum = Pdf.addobj pdf catalog' in | ||||||
|  |                   let trailerdict' = | ||||||
|  |                     Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) | ||||||
|  |                   in | ||||||
|  |                     {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} | ||||||
|  |           | None -> error "bad root" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | let set_version v pdf = | ||||||
|  |   pdf.Pdf.minor <- v | ||||||
							
								
								
									
										74
									
								
								cpdfmetadata.mli
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								cpdfmetadata.mli
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | |||||||
|  |  | ||||||
|  | (** {2 Types and Exceptions} *) | ||||||
|  |  | ||||||
|  | (** Possible output encodings for some function. [Raw] does no processing at | ||||||
|  | all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8. | ||||||
|  | [Stripped] extracts the unicode codepoints and returns only those which | ||||||
|  | correspond to 7 bit ASCII. *) | ||||||
|  | type encoding = Raw | UTF8 | Stripped | ||||||
|  |  | ||||||
|  | val encode_output : encoding -> string -> string | ||||||
|  |  | ||||||
|  | (** {2 Metadata and settings} *) | ||||||
|  |  | ||||||
|  | (** [copy_id keepversion copyfrom copyto] copies the ID, if any, from | ||||||
|  | [copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto] | ||||||
|  | won't be affected. *) | ||||||
|  | val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating | ||||||
|  | the PDF minor version to [version].*) | ||||||
|  | val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | val get_xmp_info : Pdf.t -> string -> string | ||||||
|  |  | ||||||
|  | (** [set_pdf_info (key, value, version)] sets the entry [key] in the | ||||||
|  | /ViewerPreferences directory, updating the PDF minor version to [version].*) | ||||||
|  | val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** Set the page layout to the given name (sans slash) e.g SinglePage *) | ||||||
|  | val set_page_layout : Pdf.t -> string -> Pdf.t | ||||||
|  |  | ||||||
|  | (** Set the page layout to the given name (sans slash) e.g SinglePage *) | ||||||
|  | val set_page_mode : Pdf.t -> string -> Pdf.t | ||||||
|  |  | ||||||
|  | (** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *) | ||||||
|  | val set_open_action : Pdf.t -> bool -> int -> Pdf.t | ||||||
|  |  | ||||||
|  | (** Set the PDF version number *) | ||||||
|  | val set_version : int -> Pdf.t -> unit | ||||||
|  |  | ||||||
|  | (** Given a PDF, returns a function which can lookup a given dictionary entry | ||||||
|  | from the /Info dictionary, returning it as a UTF8 string *) | ||||||
|  | val get_info_utf8 : Pdf.t -> string -> string | ||||||
|  |  | ||||||
|  | (** Output to standard output general information about a PDF. *) | ||||||
|  | val output_info : encoding -> Pdf.t -> unit | ||||||
|  |  | ||||||
|  | (** Output to standard output information from any XMP metadata stream in a PDF. *) | ||||||
|  | val output_xmp_info : encoding -> Pdf.t -> unit | ||||||
|  |  | ||||||
|  | (** Create XMP metadata from scratch *) | ||||||
|  | val create_metadata : Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** {2 XML Metadata} *) | ||||||
|  |  | ||||||
|  | (** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *)  | ||||||
|  | val set_metadata : bool -> string -> Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** The same, but the content comes from [bytes]. *) | ||||||
|  | val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** Remove the metadata from a file *) | ||||||
|  | val remove_metadata : Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** Extract metadata to a [Pdfio.bytes] *) | ||||||
|  | val get_metadata : Pdf.t -> Pdfio.bytes option | ||||||
|  |  | ||||||
|  | (** Print metadate to stdout *) | ||||||
|  | val print_metadata : Pdf.t -> unit | ||||||
|  |  | ||||||
|  | (** Set the metadata date *) | ||||||
|  | val set_metadata_date : Pdf.t -> string -> Pdf.t | ||||||
|  |  | ||||||
|  | val expand_date : string -> string | ||||||
		Reference in New Issue
	
	Block a user