mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	more
This commit is contained in:
		
							
								
								
									
										4
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,8 +1,8 @@ | |||||||
| # Build the cpdf command line tools and top level | # Build the cpdf command line tools and top level | ||||||
| MODS = cpdfyojson cpdfxmlm \ | MODS = cpdfyojson cpdfxmlm \ | ||||||
|        cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord cpdfattach \ |        cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord cpdfattach \ | ||||||
|        cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdf cpdffont cpdftype \ |        cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdfbookmarks cpdfpage cpdf cpdffont cpdftype \ | ||||||
|        cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfspot cpdfpagelabels cpdfcreate cpdfcommand |        cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfspot cpdfpagelabels cpdfcreate cpdfannot cpdfcommand | ||||||
|  |  | ||||||
| SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml | SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										650
									
								
								cpdf.ml
									
									
									
									
									
								
							
							
						
						
									
										650
									
								
								cpdf.ml
									
									
									
									
									
								
							| @@ -23,30 +23,6 @@ let rec process_text time text m = | |||||||
|    function transforms into one which returns the identity matrix *) |    function transforms into one which returns the identity matrix *) | ||||||
| let ppstub f n p = (f n p, n, Pdftransform.i_matrix) | let ppstub f n p = (f n p, n, Pdftransform.i_matrix) | ||||||
|  |  | ||||||
| let process_pages f pdf range = |  | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf in |  | ||||||
|     let pages', pagenumbers, matrices = (* new page objects, page number, matrix *) |  | ||||||
|       split3 |  | ||||||
|         (map2 |  | ||||||
|           (fun n p -> if mem n range then f n p else (p, n, Pdftransform.i_matrix)) |  | ||||||
|           (ilist 1 (length pages)) |  | ||||||
|           pages) |  | ||||||
|     in |  | ||||||
|       Pdfpage.change_pages ~matrices:(combine pagenumbers matrices) true pdf pages' |  | ||||||
|  |  | ||||||
| let iter_pages f pdf range = |  | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf in |  | ||||||
|     iter2 |  | ||||||
|       (fun n p -> if mem n range then f n p) |  | ||||||
|       (ilist 1 (length pages)) |  | ||||||
|       pages |  | ||||||
|  |  | ||||||
| let map_pages f pdf range = |  | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf in |  | ||||||
|     option_map2 |  | ||||||
|       (fun n p -> if mem n range then Some (f n p) else None) |  | ||||||
|       (ilist 1 (length pages)) |  | ||||||
|       pages |  | ||||||
|  |  | ||||||
| (* Add stack operators to a content stream to ensure it is composeable. On | (* Add stack operators to a content stream to ensure it is composeable. On | ||||||
| -fast, we don't check for Q deficit, assuming PDF is ISO. *) | -fast, we don't check for Q deficit, assuming PDF is ISO. *) | ||||||
| @@ -67,7 +43,7 @@ let protect fast pdf resources content = | |||||||
|  |  | ||||||
| (* If a cropbox exists, make it the mediabox. If not, change nothing. *) | (* If a cropbox exists, make it the mediabox. If not, change nothing. *) | ||||||
| let copy_cropbox_to_mediabox pdf range = | let copy_cropbox_to_mediabox pdf range = | ||||||
|   process_pages |   Cpdfpage.process_pages | ||||||
|     (ppstub (fun _ page -> |     (ppstub (fun _ page -> | ||||||
|        match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with |        match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with | ||||||
|        | Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject} |        | Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject} | ||||||
| @@ -113,333 +89,6 @@ let combine_pdf_resources pdf a b = | |||||||
|             (Pdf.Dictionary []) |             (Pdf.Dictionary []) | ||||||
|             (unknown_keys_a @ unknown_keys_b @ combined_known_entries) |             (unknown_keys_a @ unknown_keys_b @ combined_known_entries) | ||||||
|  |  | ||||||
| (* \section{Remove bookmarks} *) |  | ||||||
|  |  | ||||||
| (* \section{Add bookmarks} *) |  | ||||||
| let read_lines input = |  | ||||||
|   let lines = ref [] in |  | ||||||
|    try |  | ||||||
|      while true do |  | ||||||
|        let c = read_line input in |  | ||||||
|          lines =| c |  | ||||||
|     done; [] |  | ||||||
|    with |  | ||||||
|      _ -> rev !lines |  | ||||||
|  |  | ||||||
| (* Verify a list of bookmarks. Positive jumps of > 1 not allowed, no numbers |  | ||||||
| smaller than 0. *) |  | ||||||
| let rec verify_bookmarks pdf lastlevel fastrefnums endpage = function |  | ||||||
|   | [] -> true |  | ||||||
|   | {Pdfmarks.level = level; Pdfmarks.target = target}::more -> |  | ||||||
|       let page = Pdfpage.pagenumber_of_target pdf ~fastrefnums target in |  | ||||||
|         level < lastlevel + 2 && |  | ||||||
|         level >= 0 && |  | ||||||
|         page <= endpage && |  | ||||||
|         page >= 0 && |  | ||||||
|         verify_bookmarks pdf level fastrefnums endpage more |  | ||||||
|  |  | ||||||
| let verify_bookmarks pdf lastlevel endpage marks = |  | ||||||
|   let refnums = Pdf.page_reference_numbers pdf in |  | ||||||
|   let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in |  | ||||||
|     match marks with |  | ||||||
|     | [] -> true |  | ||||||
|     | m::more -> m.Pdfmarks.level = 0 && verify_bookmarks pdf lastlevel fastrefnums endpage more |  | ||||||
|  |  | ||||||
| (* Parse a line of the bookmarks file. *) |  | ||||||
|  |  | ||||||
| (* Un-escape things which are escaped. Quotes, newlines and backslashes *) |  | ||||||
| let rec fixup_characters prev = function |  | ||||||
|   | [] -> rev prev |  | ||||||
|   | '\\'::'\\'::t -> fixup_characters ('\\'::prev) t |  | ||||||
|   | '\\'::'"'::t -> fixup_characters ('"'::prev) t |  | ||||||
|   | '\\'::'n'::t -> fixup_characters ('\n'::prev) t |  | ||||||
|   | h::t -> fixup_characters (h::prev) t |  | ||||||
|  |  | ||||||
| let debug_bookmark_string s = |  | ||||||
|   Printf.printf "STR: %s\n" s |  | ||||||
|  |  | ||||||
| (* If optionaldest = [Pdfgenlex.LexString s], we parse the string, convert the |  | ||||||
|  * integer to an indirect of the real page target, and then put it in. *) |  | ||||||
| let target_of_markfile_obj pdf i' pdfobj = |  | ||||||
|   (*Printf.printf "Parsed %s\n" (Pdfwrite.string_of_pdf pdfobj);*) |  | ||||||
|   match pdfobj with |  | ||||||
|     Pdf.Array (Pdf.Integer x::more) -> |  | ||||||
|       let pageobjnum = Pdfpage.page_object_number pdf i' in |  | ||||||
|         begin match pageobjnum with |  | ||||||
|           None -> |  | ||||||
|             raise (Pdf.PDFError "bookmark_of_data: page obj num not found") |  | ||||||
|         | Some p -> |  | ||||||
|             Pdfdest.read_destination pdf (Pdf.Array (Pdf.Indirect p::more)) |  | ||||||
|         end |  | ||||||
|   (* Need to deal with "null", "(string)", and "<<other thing like action" *)  |  | ||||||
|   | Pdf.Null -> Pdfdest.NullDestination |  | ||||||
|   | Pdf.String s -> Pdfdest.read_destination pdf (Pdf.String s) |  | ||||||
|   | x -> Pdfdest.Action x |  | ||||||
|  |  | ||||||
| let target_of_markfile_target pdf i' = function |  | ||||||
|   | [Pdfgenlex.LexString s] -> |  | ||||||
|       let pdfobj = Pdfread.parse_single_object s in |  | ||||||
|         target_of_markfile_obj pdf i' pdfobj |  | ||||||
|   | _ -> Pdfpage.target_of_pagenumber pdf i' |  | ||||||
|  |  | ||||||
| let bookmark_of_data pdf i s i' isopen optionaldest = |  | ||||||
|     (*debug_bookmark_string s; |  | ||||||
|     debug_bookmark_string (implode (fixup_characters [] (explode s))); |  | ||||||
|     debug_bookmark_string (Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))));*) |  | ||||||
|     {Pdfmarks.level = i; |  | ||||||
|      Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))); |  | ||||||
|      Pdfmarks.target = target_of_markfile_target pdf i' optionaldest; |  | ||||||
|      Pdfmarks.isopen = isopen} |  | ||||||
|  |  | ||||||
| let target_of_json_target pdf pagenumber target =  |  | ||||||
|   target_of_markfile_obj pdf pagenumber (Cpdfjson.object_of_json target) |  | ||||||
|  |  | ||||||
| let mark_of_json pdf = function |  | ||||||
|   | `Assoc [("level", `Int level); |  | ||||||
|             ("text", `String text); |  | ||||||
|             ("page", `Int pagenumber); |  | ||||||
|             ("open", `Bool openstatus); |  | ||||||
|             ("target", target)] -> |  | ||||||
|        {Pdfmarks.level = level; |  | ||||||
|         Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 text; |  | ||||||
|         Pdfmarks.target = target_of_json_target pdf pagenumber target; |  | ||||||
|         Pdfmarks.isopen = openstatus} |  | ||||||
|   | _ -> error "malformed mark in mark_of_json" |  | ||||||
|  |  | ||||||
| let marks_of_json pdf = function |  | ||||||
|   | `List ms -> map (mark_of_json pdf) ms |  | ||||||
|   | _ -> error "top level of JSON boomark file not a list" |  | ||||||
|  |  | ||||||
| let parse_bookmark_file_json verify pdf i = |  | ||||||
|   let module J = Cpdfyojson.Safe in |  | ||||||
|     try |  | ||||||
|       let json = |  | ||||||
|         match i.Pdfio.caml_channel with |  | ||||||
|         | Some ch -> J.from_channel ch |  | ||||||
|         | None -> |  | ||||||
|           let content = Pdfio.string_of_bytes (Pdfio.bytes_of_input i 0 i.Pdfio.in_channel_length) in |  | ||||||
|             J.from_string content |  | ||||||
|       in |  | ||||||
|       let marks = marks_of_json pdf json in |  | ||||||
|         if verify then |  | ||||||
|           if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) marks then marks else |  | ||||||
|             error "Bad bookmark file (References non-existant pages or is malformed)" |  | ||||||
|         else |  | ||||||
|           marks |  | ||||||
|     with |  | ||||||
|       e -> |  | ||||||
|         error (Printf.sprintf "Malformed JSON bookmark file (%s)" (Printexc.to_string e))  |  | ||||||
|  |  | ||||||
| let parse_bookmark_file verify pdf input = |  | ||||||
|   let currline = ref 0 in |  | ||||||
|   try |  | ||||||
|     let lines = Pdfio.read_lines input in |  | ||||||
|       let currline = ref 0 in |  | ||||||
|       let bookmarks = ref [] in |  | ||||||
|         iter |  | ||||||
|           (function line -> |  | ||||||
|              match |  | ||||||
|                incr currline; |  | ||||||
|                Pdfgenlex.lex_string line |  | ||||||
|              with |  | ||||||
|              | Pdfgenlex.LexInt i::Pdfgenlex.LexString s::Pdfgenlex.LexInt i'::Pdfgenlex.LexName "open"::optionaldest -> |  | ||||||
|                  bookmarks =| bookmark_of_data pdf i s i' true optionaldest |  | ||||||
|              | Pdfgenlex.LexInt i::Pdfgenlex.LexString s::Pdfgenlex.LexInt i'::optionaldest -> |  | ||||||
|                  bookmarks =| bookmark_of_data pdf i s i' false optionaldest |  | ||||||
|              | [] -> () (* ignore blank lines *) |  | ||||||
|              | _ -> |  | ||||||
|                  error ("Bad bookmark file, line " ^ (string_of_int !currline))) |  | ||||||
|           lines; |  | ||||||
|         let bookmarks = rev !bookmarks in |  | ||||||
|           if verify then |  | ||||||
|             if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) bookmarks |  | ||||||
|                 then bookmarks |  | ||||||
|                 else |  | ||||||
|                   error |  | ||||||
|                     "Bad bookmark file (References non-existant pages or is malformed)" |  | ||||||
|             else |  | ||||||
|               bookmarks |  | ||||||
|   with |  | ||||||
|     e -> |  | ||||||
|       error |  | ||||||
|         (Printf.sprintf  |  | ||||||
|            "Bad bookmark file (syntax) at line %i (error was %s)" |  | ||||||
|            !currline |  | ||||||
|            (Printexc.to_string e)) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| let add_bookmarks ~json verify input pdf = |  | ||||||
|   let parsed = |  | ||||||
|     (if json then parse_bookmark_file_json else parse_bookmark_file) verify pdf input in |  | ||||||
|     (*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*) |  | ||||||
|     Pdfmarks.add_bookmarks parsed pdf  |  | ||||||
|  |  | ||||||
| (* List bookmarks *) |  | ||||||
| let output_string_of_target pdf fastrefnums x = |  | ||||||
|   match Pdfdest.pdfobject_of_destination x with |  | ||||||
|   | Pdf.Array (_::more) -> |  | ||||||
|       let a = |  | ||||||
|         Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more) |  | ||||||
|       in |  | ||||||
|         "\"" ^ Pdfwrite.string_of_pdf a ^ "\""  |  | ||||||
|   | x -> "\"" ^ Pdfwrite.string_of_pdf x ^ "\"" |  | ||||||
|  |  | ||||||
| let json_of_target pdf fastrefnums x = |  | ||||||
|   match Pdfdest.pdfobject_of_destination x with |  | ||||||
|   | Pdf.Array (_::more) -> |  | ||||||
|       let a = |  | ||||||
|         Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more) |  | ||||||
|       in |  | ||||||
|         Cpdfjson.json_of_object pdf (fun _ -> ()) false false a |  | ||||||
|   | x -> Cpdfjson.json_of_object pdf (fun _ -> ()) false false x |  | ||||||
|  |  | ||||||
| let output_json_marks ch calculate_page_number pdf fastrefnums marks = |  | ||||||
|   let module J = Cpdfyojson.Safe in |  | ||||||
|   let json_of_mark m = |  | ||||||
|     `Assoc |  | ||||||
|        [("level", `Int m.Pdfmarks.level); |  | ||||||
|         ("text", `String (Pdftext.utf8_of_pdfdocstring m.Pdfmarks.text)); |  | ||||||
|         ("page", `Int (calculate_page_number m)); |  | ||||||
|         ("open", `Bool m.Pdfmarks.isopen); |  | ||||||
|         ("target", json_of_target pdf fastrefnums m.Pdfmarks.target)] |  | ||||||
|   in |  | ||||||
|   let json = `List (map json_of_mark marks) in |  | ||||||
|     J.pretty_to_channel ch json |  | ||||||
|  |  | ||||||
| (* List the bookmarks in the given range to the given output *) |  | ||||||
| let list_bookmarks ~json encoding range pdf output = |  | ||||||
|   let process_stripped escaped = |  | ||||||
|     let b = Buffer.create 200 in |  | ||||||
|       iter |  | ||||||
|         (fun x -> |  | ||||||
|            if x <= 127 then Buffer.add_char b (char_of_int x)) |  | ||||||
|         escaped; |  | ||||||
|       Buffer.contents b |  | ||||||
|   in |  | ||||||
|   let process_string s = |  | ||||||
|     let rec replace c x y = function |  | ||||||
|     | [] -> [] |  | ||||||
|     | h::t when h = c -> x::y::replace c x y t |  | ||||||
|     | h::t -> h::replace c x y t |  | ||||||
|     in |  | ||||||
|       (* Convert to UTF8, raw, or stripped, and escape backslashed and quotation marks *) |  | ||||||
|       let codepoints = Pdftext.codepoints_of_pdfdocstring s in |  | ||||||
|         let escaped = |  | ||||||
|           let bs = int_of_char '\\' |  | ||||||
|           and nl = int_of_char '\n' |  | ||||||
|           and n = int_of_char 'n' |  | ||||||
|           and q = int_of_char '\"' in |  | ||||||
|             replace q bs q (replace nl bs n (replace bs bs bs codepoints)) |  | ||||||
|         in |  | ||||||
|           match encoding with |  | ||||||
|           | Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped |  | ||||||
|           | Cpdfmetadata.Stripped -> process_stripped escaped |  | ||||||
|           | Cpdfmetadata.Raw -> s |  | ||||||
|     in |  | ||||||
|       let bookmarks = Pdfmarks.read_bookmarks pdf in |  | ||||||
|       let refnums = Pdf.page_reference_numbers pdf in |  | ||||||
|       let rangetable = hashset_of_list range in |  | ||||||
|       let range_is_all = range = ilist 1 (Pdfpage.endpage pdf) in |  | ||||||
|       let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in |  | ||||||
|         (* Find the pagenumber of each bookmark target. If it is in the range, |  | ||||||
|          * keep that bookmark. Also keep the bookmark if its target is the null |  | ||||||
|          * destination. *) |  | ||||||
|         let inrange = |  | ||||||
|           keep |  | ||||||
|             (function x -> |  | ||||||
|                range_is_all ||  |  | ||||||
|                x.Pdfmarks.target = Pdfdest.NullDestination || |  | ||||||
|                (match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) || |  | ||||||
|                Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks |  | ||||||
|         in |  | ||||||
|           let calculate_page_number mark = |  | ||||||
|             (* Some buggy PDFs use integers for page numbers instead of page |  | ||||||
|              * object references. Adobe Reader and Preview seem to support |  | ||||||
|              * this, for presumably historical reasons. So if we see a |  | ||||||
|              * OtherDocPageNumber (which is what Pdfdest parses these as, |  | ||||||
|              * because that's what they are legitimately, we use this as the |  | ||||||
|              * page number. It is zero based, though, and we are one-based, so |  | ||||||
|              * we add one. Pdfpage.pagenumber_of_target has been modified to support this.*) |  | ||||||
|             Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target |  | ||||||
|           in |  | ||||||
|             if json then |  | ||||||
|               output_json_marks stdout calculate_page_number pdf fastrefnums inrange |  | ||||||
|             else |  | ||||||
|               iter |  | ||||||
|                 (function mark -> |  | ||||||
|                    output.Pdfio.output_string |  | ||||||
|                      (Printf.sprintf "%i \"%s\" %i%s %s\n" |  | ||||||
|                        mark.Pdfmarks.level |  | ||||||
|                        (process_string mark.Pdfmarks.text) |  | ||||||
|                        (calculate_page_number mark) |  | ||||||
|                        (if mark.Pdfmarks.isopen then " open" else "") |  | ||||||
|                        (output_string_of_target pdf fastrefnums mark.Pdfmarks.target))) |  | ||||||
|                 inrange |  | ||||||
|  |  | ||||||
| (* o is the stamp, u is the main pdf page *) |  | ||||||
|  |  | ||||||
| (* \section{Split at bookmarks} *) |  | ||||||
|  |  | ||||||
| let get_bookmark_name pdf marks splitlevel n _ = |  | ||||||
|   let refnums = Pdf.page_reference_numbers pdf in |  | ||||||
|   let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in |  | ||||||
|   match keep (function m -> n = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with |  | ||||||
|   | {Pdfmarks.text = title}::_ -> Cpdfattach.remove_unsafe_characters Cpdfmetadata.UTF8 title |  | ||||||
|   | _ -> "" |  | ||||||
|  |  | ||||||
| (* Find the stem of a filename *) |  | ||||||
| (*let stem s = |  | ||||||
|   implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s))))))*) |  | ||||||
|  |  | ||||||
| (* Return list, in order, a *set* of page numbers of bookmarks at a given level *) |  | ||||||
| let bookmark_pages level pdf = |  | ||||||
|   let refnums = Pdf.page_reference_numbers pdf in |  | ||||||
|   let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in |  | ||||||
|   setify_preserving_order |  | ||||||
|     (option_map |  | ||||||
|       (function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target ~fastrefnums pdf l.Pdfmarks.target) | _ -> None) |  | ||||||
|       (Pdfmarks.read_bookmarks pdf)) |  | ||||||
|  |  | ||||||
| (* Called from cpdflib.ml - different from above *) |  | ||||||
| let split_on_bookmarks pdf level = |  | ||||||
|   let points = lose (eq 0) (map pred (bookmark_pages level pdf)) |  | ||||||
|   in let pdf_pages = Pdfpage.pages_of_pagetree pdf in |  | ||||||
|     let ranges = splitat points (indx pdf_pages) in |  | ||||||
|       map (fun rs -> Pdfpage.pdf_of_pages pdf rs) ranges |  | ||||||
|  |  | ||||||
| (* Output information for each page *) |  | ||||||
| let output_page_info pdf range = |  | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf |  | ||||||
|   and labels = Pdfpagelabels.read pdf in |  | ||||||
|     let getbox page box = |  | ||||||
|       if box = "/MediaBox" then |  | ||||||
|         match page.Pdfpage.mediabox with |  | ||||||
|         | Pdf.Array [a; b; c; d] -> |  | ||||||
|            Printf.sprintf "%f %f %f %f" |  | ||||||
|              (Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d) |  | ||||||
|         | _ -> "" |  | ||||||
|       else |  | ||||||
|         match Pdf.lookup_direct pdf box page.Pdfpage.rest with |  | ||||||
|         | Some (Pdf.Array [a; b; c; d]) -> |  | ||||||
|            Printf.sprintf "%f %f %f %f" |  | ||||||
|              (Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d) |  | ||||||
|         | _ -> "" |  | ||||||
|     and rotation page = |  | ||||||
|       Pdfpage.int_of_rotation page.Pdfpage.rotate |  | ||||||
|     in |  | ||||||
|       iter |  | ||||||
|         (fun pnum -> |  | ||||||
|            let page = select pnum pages in |  | ||||||
|              Printf.printf "Page %i:\n" pnum; |  | ||||||
|              Printf.printf "Label: %s\n" |  | ||||||
|                (try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> ""); |  | ||||||
|              Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox"); |  | ||||||
|              Printf.printf "CropBox: %s\n" (getbox page "/CropBox"); |  | ||||||
|              Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox"); |  | ||||||
|              Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox"); |  | ||||||
|              Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox"); |  | ||||||
|              Printf.printf "Rotation: %i\n" (rotation page)) |  | ||||||
|         range |  | ||||||
|  |  | ||||||
| (* Does the page have a defined box e.g "/CropBox" *) | (* Does the page have a defined box e.g "/CropBox" *) | ||||||
| let hasbox pdf page boxname = | let hasbox pdf page boxname = | ||||||
| @@ -450,47 +99,6 @@ let hasbox pdf page boxname = | |||||||
|         | Some _ -> true |         | Some _ -> true | ||||||
|         | _ -> false |         | _ -> false | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| (* List fonts *) |  | ||||||
| let list_font pdf page (name, dict) = |  | ||||||
|   let subtype = |  | ||||||
|     match Pdf.lookup_direct pdf "/Subtype" dict with |  | ||||||
|     | Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n) |  | ||||||
|     | _ -> "" |  | ||||||
|   in let basefont = |  | ||||||
|     match Pdf.lookup_direct pdf "/BaseFont" dict with |  | ||||||
|     | Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n) |  | ||||||
|     | _ -> "" |  | ||||||
|   in let encoding = |  | ||||||
|    match Pdf.lookup_direct pdf "/Encoding" dict with |  | ||||||
|     | Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n) |  | ||||||
|     | _ -> "" |  | ||||||
|   in  |  | ||||||
|     (page, name, subtype, basefont, encoding) |  | ||||||
|  |  | ||||||
| let list_fonts pdf range = |  | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf in |  | ||||||
|     flatten |  | ||||||
|       (map |  | ||||||
|         (fun (num, page) -> |  | ||||||
|            if mem num range then |  | ||||||
|              begin match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with |  | ||||||
|              | Some (Pdf.Dictionary fontdict) -> |  | ||||||
|                  map (list_font pdf num) fontdict |  | ||||||
|              | _ -> [] |  | ||||||
|              end |  | ||||||
|            else |  | ||||||
|              []) |  | ||||||
|         (combine (ilist 1 (length pages)) pages)) |  | ||||||
|  |  | ||||||
| let string_of_font (p, n, s, b, e) = |  | ||||||
|   Printf.sprintf "%i %s %s %s %s\n" p n s b e |  | ||||||
|  |  | ||||||
| let print_fonts pdf range = |  | ||||||
|   flprint |  | ||||||
|     (fold_left ( ^ ) "" (map string_of_font (list_fonts pdf range))) |  | ||||||
|  |  | ||||||
| (* \section{Superimpose text, page numbers etc.} *) | (* \section{Superimpose text, page numbers etc.} *) | ||||||
|  |  | ||||||
| (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever | (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever | ||||||
| @@ -744,9 +352,7 @@ let extract_page_text only_fontsize pdf _ page = | |||||||
| (* For each page, extract all the ops with text in them, and concatenate it all together *) | (* For each page, extract all the ops with text in them, and concatenate it all together *) | ||||||
| let extract_text extract_text_font_size pdf range = | let extract_text extract_text_font_size pdf range = | ||||||
|   fold_left (fun x y -> x ^ (if x <> "" && y <> "" then "\n" else "") ^ y) "" |   fold_left (fun x y -> x ^ (if x <> "" && y <> "" then "\n" else "") ^ y) "" | ||||||
|     (map_pages (extract_page_text extract_text_font_size pdf) pdf range) |     (Cpdfpage.map_pages (extract_page_text extract_text_font_size pdf) pdf range) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| let addtext | let addtext | ||||||
|   metrics lines linewidth outline fast colour fontname embed bates batespad fontsize font |   metrics lines linewidth outline fast colour fontname embed bates batespad fontsize font | ||||||
| @@ -883,9 +489,9 @@ let addtext | |||||||
|                 else Pdfpage.postpend_operators pdf ops ~fast:fast page |                 else Pdfpage.postpend_operators pdf ops ~fast:fast page | ||||||
|   in |   in | ||||||
|     if metrics then |     if metrics then | ||||||
|       (ignore (iter_pages (fun a b -> ignore (addtext_page a b)) pdf pages); pdf) |       (ignore (Cpdfpage.iter_pages (fun a b -> ignore (addtext_page a b)) pdf pages); pdf) | ||||||
|     else |     else | ||||||
|       process_pages (ppstub addtext_page) pdf pages |       Cpdfpage.process_pages (ppstub addtext_page) pdf pages | ||||||
|  |  | ||||||
| (* Prev is a list of lists of characters *) | (* Prev is a list of lists of characters *) | ||||||
| let split_at_newline t = | let split_at_newline t = | ||||||
| @@ -1050,8 +656,7 @@ let removetext range pdf = | |||||||
|              let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in |              let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in | ||||||
|                [Pdfops.stream_of_ops (remove_stamps [] ops)]} |                [Pdfops.stream_of_ops (remove_stamps [] ops)]} | ||||||
|       in |       in | ||||||
|         process_pages (ppstub removetext_page) pdf range |         Cpdfpage.process_pages (ppstub removetext_page) pdf range | ||||||
|  |  | ||||||
|  |  | ||||||
| (* \section{Shift page data} *) | (* \section{Shift page data} *) | ||||||
| let make_mediabox (xmin, ymin, xmax, ymax) = | let make_mediabox (xmin, ymin, xmax, ymax) = | ||||||
| @@ -1255,7 +860,7 @@ let shift_page ?(fast=false) dxdylist pdf pnum page = | |||||||
|         (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, Pdftransform.mktranslate dx dy) |         (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, Pdftransform.mktranslate dx dy) | ||||||
|  |  | ||||||
| let shift_pdf ?(fast=false) dxdylist pdf range = | let shift_pdf ?(fast=false) dxdylist pdf range = | ||||||
|   process_pages (shift_page ~fast dxdylist pdf) pdf range |   Cpdfpage.process_pages (shift_page ~fast dxdylist pdf) pdf range | ||||||
|  |  | ||||||
| (* Change a page's media box so its minimum x and y are 0, making other | (* Change a page's media box so its minimum x and y are 0, making other | ||||||
| operations simpler to think about. Any shift that is done is reflected in | operations simpler to think about. Any shift that is done is reflected in | ||||||
| @@ -1288,14 +893,14 @@ let vflip_pdf ?(fast=false) pdf range = | |||||||
|     Pdftransform.matrix_of_op |     Pdftransform.matrix_of_op | ||||||
|       (Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.)) |       (Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.)) | ||||||
|   in |   in | ||||||
|     process_pages (flip_page ~fast transform_op pdf) pdf range |     Cpdfpage.process_pages (flip_page ~fast transform_op pdf) pdf range | ||||||
|  |  | ||||||
| let hflip_pdf ?(fast=false) pdf range = | let hflip_pdf ?(fast=false) pdf range = | ||||||
|   let transform_op minx _ maxx _ = |   let transform_op minx _ maxx _ = | ||||||
|     Pdftransform.matrix_of_op |     Pdftransform.matrix_of_op | ||||||
|       (Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.)) |       (Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.)) | ||||||
|   in |   in | ||||||
|     process_pages (flip_page ~fast transform_op pdf) pdf range |     Cpdfpage.process_pages (flip_page ~fast transform_op pdf) pdf range | ||||||
|  |  | ||||||
| let stamp_shift_of_position topline midline sw sh w h p = | let stamp_shift_of_position topline midline sw sh w h p = | ||||||
|   let half x = x /. 2. |   let half x = x /. 2. | ||||||
| @@ -1625,7 +1230,7 @@ let set_mediabox xywhlist pdf range = | |||||||
|            [Pdf.Real x; Pdf.Real y; |            [Pdf.Real x; Pdf.Real y; | ||||||
|             Pdf.Real (x +.  w); Pdf.Real (y +. h)])} |             Pdf.Real (x +.  w); Pdf.Real (y +. h)])} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub crop_page) pdf range |     Cpdfpage.process_pages (ppstub crop_page) pdf range | ||||||
|  |  | ||||||
| (* Just used by cpdflib for historical reasons *) | (* Just used by cpdflib for historical reasons *) | ||||||
| let setBox box minx maxx miny maxy pdf range = | let setBox box minx maxx miny maxy pdf range = | ||||||
| @@ -1636,7 +1241,7 @@ let setBox box minx maxx miny maxy pdf range = | |||||||
|            page.Pdfpage.rest box |            page.Pdfpage.rest box | ||||||
|            (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} |            (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub set_box_page) pdf range |     Cpdfpage.process_pages (ppstub set_box_page) pdf range | ||||||
|  |  | ||||||
| (* \section{Cropping} *) | (* \section{Cropping} *) | ||||||
| let crop_pdf ?(box="/CropBox") xywhlist pdf range = | let crop_pdf ?(box="/CropBox") xywhlist pdf range = | ||||||
| @@ -1651,14 +1256,14 @@ let crop_pdf ?(box="/CropBox") xywhlist pdf range = | |||||||
|                  [Pdf.Real x; Pdf.Real y; |                  [Pdf.Real x; Pdf.Real y; | ||||||
|                   Pdf.Real (x +.  w); Pdf.Real (y +. h)])))} |                   Pdf.Real (x +.  w); Pdf.Real (y +. h)])))} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub crop_page) pdf range |     Cpdfpage.process_pages (ppstub crop_page) pdf range | ||||||
|  |  | ||||||
| (* Clip a page to one of its boxes, or the media box if that box is not | (* Clip a page to one of its boxes, or the media box if that box is not | ||||||
|  * present. This is a hard clip, done by using a clipping rectangle, so that |  * present. This is a hard clip, done by using a clipping rectangle, so that | ||||||
|  * the page may then be used as a stamp without extraneous material reapearing. |  * the page may then be used as a stamp without extraneous material reapearing. | ||||||
|  * *) |  * *) | ||||||
| let hard_box pdf range boxname mediabox_if_missing fast = | let hard_box pdf range boxname mediabox_if_missing fast = | ||||||
|   process_pages |   Cpdfpage.process_pages | ||||||
|     (ppstub (fun pagenum page -> |     (ppstub (fun pagenum page -> | ||||||
|        let minx, miny, maxx, maxy = |        let minx, miny, maxx, maxy = | ||||||
|          if boxname = "/MediaBox" then |          if boxname = "/MediaBox" then | ||||||
| @@ -1682,7 +1287,7 @@ let remove_cropping_pdf pdf range = | |||||||
|        Pdfpage.rest = |        Pdfpage.rest = | ||||||
|          (Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")} |          (Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub remove_cropping_page) pdf range |     Cpdfpage.process_pages (ppstub remove_cropping_page) pdf range | ||||||
|  |  | ||||||
| let remove_trim_pdf pdf range = | let remove_trim_pdf pdf range = | ||||||
|   let remove_trim_page _ page = |   let remove_trim_page _ page = | ||||||
| @@ -1690,7 +1295,7 @@ let remove_trim_pdf pdf range = | |||||||
|        Pdfpage.rest = |        Pdfpage.rest = | ||||||
|          (Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")} |          (Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub remove_trim_page) pdf range |     Cpdfpage.process_pages (ppstub remove_trim_page) pdf range | ||||||
|  |  | ||||||
| let remove_art_pdf pdf range = | let remove_art_pdf pdf range = | ||||||
|   let remove_art_page _ page = |   let remove_art_page _ page = | ||||||
| @@ -1698,7 +1303,7 @@ let remove_art_pdf pdf range = | |||||||
|        Pdfpage.rest = |        Pdfpage.rest = | ||||||
|          (Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")} |          (Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub remove_art_page) pdf range |     Cpdfpage.process_pages (ppstub remove_art_page) pdf range | ||||||
|  |  | ||||||
| let remove_bleed_pdf pdf range = | let remove_bleed_pdf pdf range = | ||||||
|   let remove_bleed_page _ page = |   let remove_bleed_page _ page = | ||||||
| @@ -1706,7 +1311,7 @@ let remove_bleed_pdf pdf range = | |||||||
|        Pdfpage.rest = |        Pdfpage.rest = | ||||||
|          (Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")} |          (Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub remove_bleed_page) pdf range |     Cpdfpage.process_pages (ppstub remove_bleed_page) pdf range | ||||||
|  |  | ||||||
| (* \section{Rotating pages} *) | (* \section{Rotating pages} *) | ||||||
| let rotate_pdf r pdf range = | let rotate_pdf r pdf range = | ||||||
| @@ -1714,14 +1319,14 @@ let rotate_pdf r pdf range = | |||||||
|     {page with Pdfpage.rotate = |     {page with Pdfpage.rotate = | ||||||
|        Pdfpage.rotation_of_int r} |        Pdfpage.rotation_of_int r} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub rotate_page) pdf range |     Cpdfpage.process_pages (ppstub rotate_page) pdf range | ||||||
|  |  | ||||||
| let rotate_pdf_by r pdf range = | let rotate_pdf_by r pdf range = | ||||||
|   let rotate_page_by _ page = |   let rotate_page_by _ page = | ||||||
|     {page with Pdfpage.rotate = |     {page with Pdfpage.rotate = | ||||||
|        Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)} |        Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub rotate_page_by) pdf range |     Cpdfpage.process_pages (ppstub rotate_page_by) pdf range | ||||||
|  |  | ||||||
| let rotate_page_contents ~fast rotpoint r pdf pnum page = | let rotate_page_contents ~fast rotpoint r pdf pnum page = | ||||||
|   let rotation_point = |   let rotation_point = | ||||||
| @@ -1744,7 +1349,7 @@ let rotate_page_contents ~fast rotpoint r pdf pnum page = | |||||||
|         (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr) |         (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr) | ||||||
|  |  | ||||||
| let rotate_contents ?(fast=false) r pdf range = | let rotate_contents ?(fast=false) r pdf range = | ||||||
|   process_pages (rotate_page_contents ~fast None r pdf) pdf range |   Cpdfpage.process_pages (rotate_page_contents ~fast None r pdf) pdf range | ||||||
|  |  | ||||||
| (* Return the pages from the pdf in the range, unordered. *) | (* Return the pages from the pdf in the range, unordered. *) | ||||||
| let select_pages range pdf = | let select_pages range pdf = | ||||||
| @@ -1795,7 +1400,7 @@ let upright ?(fast=false) range pdf = | |||||||
|           let page = transform_contents ~fast tr pdf page in |           let page = transform_contents ~fast tr pdf page in | ||||||
|             (rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr) |             (rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr) | ||||||
|     in |     in | ||||||
|       process_pages (upright_page pdf) pdf range |       Cpdfpage.process_pages (upright_page pdf) pdf range | ||||||
|  |  | ||||||
| (* \section{Scale page data} *) | (* \section{Scale page data} *) | ||||||
| let scale_pdf ?(fast=false) sxsylist pdf range = | let scale_pdf ?(fast=false) sxsylist pdf range = | ||||||
| @@ -1814,7 +1419,7 @@ let scale_pdf ?(fast=false) sxsylist pdf range = | |||||||
|            transform_annotations pdf matrix page.Pdfpage.rest; |            transform_annotations pdf matrix page.Pdfpage.rest; | ||||||
|            (Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix) |            (Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix) | ||||||
|       in |       in | ||||||
|         process_pages scale_page pdf range |         Cpdfpage.process_pages scale_page pdf range | ||||||
|  |  | ||||||
| (* Scale to fit page of size x * y *) | (* Scale to fit page of size x * y *) | ||||||
| let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range = | let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range = | ||||||
| @@ -1855,7 +1460,7 @@ let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range = | |||||||
|         (Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast |         (Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast | ||||||
|          (change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page), pnum, matrix) |          (change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page), pnum, matrix) | ||||||
|   in |   in | ||||||
|     process_pages scale_page_to_fit pdf range |     Cpdfpage.process_pages scale_page_to_fit pdf range | ||||||
|  |  | ||||||
| (* Scale contents *) | (* Scale contents *) | ||||||
| let scale_page_contents ?(fast=false) scale position pdf pnum page = | let scale_page_contents ?(fast=false) scale position pdf pnum page = | ||||||
| @@ -1891,177 +1496,8 @@ let scale_page_contents ?(fast=false) scale position pdf pnum page = | |||||||
|           (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform) |           (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform) | ||||||
|  |  | ||||||
| let scale_contents ?(fast=false) position scale pdf range = | let scale_contents ?(fast=false) position scale pdf range = | ||||||
|   process_pages (scale_page_contents ~fast scale position pdf) pdf range |   Cpdfpage.process_pages (scale_page_contents ~fast scale position pdf) pdf range | ||||||
|  |  | ||||||
| (* \section{List annotations} *) |  | ||||||
| let get_annotation_string encoding pdf annot = |  | ||||||
|   match Pdf.lookup_direct pdf "/Contents" annot with |  | ||||||
|   | Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s |  | ||||||
|   | _ -> "" |  | ||||||
|  |  | ||||||
| let print_annotation encoding pdf num s = |  | ||||||
|   let s = get_annotation_string encoding pdf s in |  | ||||||
|   match s with |  | ||||||
|   | "" -> () |  | ||||||
|   | s -> |  | ||||||
|     flprint (Printf.sprintf "Page %d: " num); |  | ||||||
|     flprint s; |  | ||||||
|     flprint "\n" |  | ||||||
|  |  | ||||||
| let list_page_annotations encoding pdf num page = |  | ||||||
|   match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with |  | ||||||
|   | Some (Pdf.Array annots) -> |  | ||||||
|       iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots) |  | ||||||
|   | _ -> () |  | ||||||
|  |  | ||||||
| let annotations_json_page pdf page pagenum = |  | ||||||
|   match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with |  | ||||||
|   | Some (Pdf.Array annots) -> |  | ||||||
|       map |  | ||||||
|         (fun annot -> |  | ||||||
|            `List [`Int pagenum; Cpdfjson.json_of_object pdf (fun _ -> ()) false false annot]) |  | ||||||
|         (map (Pdf.direct pdf) annots) |  | ||||||
|   | _ -> [] |  | ||||||
|  |  | ||||||
| let list_annotations_json pdf = |  | ||||||
|   let module J = Cpdfyojson.Safe in |  | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf in |  | ||||||
|   let pagenums = indx pages in |  | ||||||
|   let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in |  | ||||||
|     J.pretty_to_channel stdout json |  | ||||||
|  |  | ||||||
| let list_annotations ~json encoding pdf = |  | ||||||
|   let range = Cpdfpagespec.parse_pagespec pdf "all" in |  | ||||||
|   if json |  | ||||||
|     then list_annotations_json pdf |  | ||||||
|     else iter_pages (list_page_annotations encoding pdf) pdf range |  | ||||||
|  |  | ||||||
| let get_annotations encoding pdf = |  | ||||||
|   let pages = Pdfpage.pages_of_pagetree pdf in |  | ||||||
|     flatten |  | ||||||
|       (map2 |  | ||||||
|        (fun page pagenumber -> |  | ||||||
|          match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with |  | ||||||
|          | Some (Pdf.Array annots) -> |  | ||||||
|              let strings = |  | ||||||
|                map (get_annotation_string encoding pdf) (map (Pdf.direct pdf) annots) |  | ||||||
|              in |  | ||||||
|                combine (many pagenumber (length strings)) strings |  | ||||||
|          | _ -> []) |  | ||||||
|         pages |  | ||||||
|         (ilist 1 (length pages)))  |  | ||||||
|  |  | ||||||
| (* Equalise the page lengths of two PDFs by chopping or extending the first one. |  | ||||||
| *) |  | ||||||
| let equalise_lengths a b = |  | ||||||
|   let a' = |  | ||||||
|     if Pdfpage.endpage a < Pdfpage.endpage b then |  | ||||||
|       Pdfpage.change_pages false a |  | ||||||
|         (Pdfpage.pages_of_pagetree a @ |  | ||||||
|            many (Pdfpage.blankpage Pdfpaper.a4) (Pdfpage.endpage b - Pdfpage.endpage a)) |  | ||||||
|     else if Pdfpage.endpage a > Pdfpage.endpage b then |  | ||||||
|       Pdfpage.change_pages false a |  | ||||||
|         (take (Pdfpage.pages_of_pagetree a) (Pdfpage.endpage b)) |  | ||||||
|     else a  |  | ||||||
|   in |  | ||||||
|     a', b |  | ||||||
|  |  | ||||||
| (* Copy annotations *) |  | ||||||
|  |  | ||||||
| (* FIXME: Why does this chop the files to the same length? Should be able to |  | ||||||
| apply annotations from a longer file to a shorter? *) |  | ||||||
|  |  | ||||||
| (* Rewrite any annotation destinations to point to pages in the |  | ||||||
| destination file. This prevents pages being copied, and ensures the links are |  | ||||||
| correct Any Indirect link inside a /Dest is rewritten if in the table. If not |  | ||||||
| inside a /Dest, nothing is rewritten. *) |  | ||||||
| let rec renumber_in_dest table indest = function |  | ||||||
|     Pdf.Indirect i ->  |  | ||||||
|       begin |  | ||||||
|         try Pdf.Indirect (Hashtbl.find table i) with _ -> Pdf.Indirect i |  | ||||||
|       end |  | ||||||
|   | Pdf.Array a -> |  | ||||||
|       Pdf.recurse_array (renumber_in_dest table indest) a |  | ||||||
|   | Pdf.Dictionary d -> |  | ||||||
|       Pdf.Dictionary |  | ||||||
|         (map |  | ||||||
|           (function |  | ||||||
|              ("/Dest", v) -> ("/Dest", renumber_in_dest table true v) |  | ||||||
|            | (k, v) -> (k, renumber_in_dest table indest v)) |  | ||||||
|           d) |  | ||||||
|   | x -> x  |  | ||||||
|  |  | ||||||
| let renumber_in_object pdf objnum table = |  | ||||||
|   Pdf.addobj_given_num |  | ||||||
|     pdf (objnum, (renumber_in_dest table false (Pdf.lookup_obj pdf objnum))) |  | ||||||
|  |  | ||||||
| let copy_annotations_page topdf frompdf frompage topage = |  | ||||||
|   match Pdf.lookup_direct frompdf "/Annots" frompage.Pdfpage.rest with |  | ||||||
|     Some (Pdf.Array frompage_annots as annots) -> |  | ||||||
|       let table = |  | ||||||
|         hashtable_of_dictionary |  | ||||||
|           (combine |  | ||||||
|              (Pdf.page_reference_numbers frompdf) |  | ||||||
|              (Pdf.page_reference_numbers topdf)) |  | ||||||
|       in |  | ||||||
|         iter |  | ||||||
|          (function |  | ||||||
|             (* FIXME: We assume they are indirects. Must also do direct, though rare.*) |  | ||||||
|             Pdf.Indirect x -> |  | ||||||
|               (*Printf.printf "Copying annotation %s which is\n%s\n" |  | ||||||
|                 (Pdfwrite.string_of_pdf (Pdf.Indirect x)) |  | ||||||
|                 (Pdfwrite.string_of_pdf (Pdf.direct frompdf (Pdf.Indirect |  | ||||||
|                 x)));*) |  | ||||||
|               renumber_in_object frompdf x table |  | ||||||
|           | _ -> ()) |  | ||||||
|          frompage_annots; |  | ||||||
|         let objects_to_copy = Pdf.objects_referenced [] [] frompdf annots in |  | ||||||
|           iter |  | ||||||
|             (fun n -> |  | ||||||
|                ignore (Pdf.addobj_given_num topdf (n, Pdf.lookup_obj frompdf n))) |  | ||||||
|             objects_to_copy; |  | ||||||
|           let topage_annots = |  | ||||||
|             match Pdf.lookup_direct frompdf "/Annots" topage.Pdfpage.rest with |  | ||||||
|             | Some (Pdf.Array annots) -> annots |  | ||||||
|             | _ -> [] |  | ||||||
|           in |  | ||||||
|             let merged_dict = Pdf.Array (frompage_annots @ topage_annots) in |  | ||||||
|               let topage' = |  | ||||||
|                 {topage with Pdfpage.rest = |  | ||||||
|                    Pdf.add_dict_entry topage.Pdfpage.rest "/Annots" merged_dict} |  | ||||||
|               in |  | ||||||
|                 topdf, topage' |  | ||||||
|   | Some x -> topdf, topage |  | ||||||
|   | None -> topdf, topage |  | ||||||
|  |  | ||||||
| let copy_annotations range frompdf topdf = |  | ||||||
|   let frompdf, topdf = equalise_lengths frompdf topdf in |  | ||||||
|     match Pdf.renumber_pdfs [frompdf; topdf] with  |  | ||||||
|     | [frompdf; topdf] -> |  | ||||||
|         let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in |  | ||||||
|         let topdf_pages = Pdfpage.pages_of_pagetree topdf in |  | ||||||
|           let pdf = ref topdf |  | ||||||
|           and pages = ref [] |  | ||||||
|           and pnum = ref 1 |  | ||||||
|           and frompdf_pages = ref frompdf_pages |  | ||||||
|           and topdf_pages = ref topdf_pages in |  | ||||||
|             (* Go through, updating pdf and collecting new pages. *) |  | ||||||
|             while not (isnull !frompdf_pages) do |  | ||||||
|               let frompdf_page = hd !frompdf_pages |  | ||||||
|               and topdf_page = hd !topdf_pages in |  | ||||||
|                 let pdf', page = |  | ||||||
|                   if mem !pnum range |  | ||||||
|                     then copy_annotations_page !pdf frompdf frompdf_page topdf_page |  | ||||||
|                     else !pdf, topdf_page |  | ||||||
|                 in |  | ||||||
|                   pdf := pdf'; |  | ||||||
|                   pages =| page; |  | ||||||
|                   incr pnum; |  | ||||||
|                   frompdf_pages := tl !frompdf_pages; |  | ||||||
|                   topdf_pages := tl !topdf_pages |  | ||||||
|             done; |  | ||||||
|             Pdfpage.change_pages true !pdf (rev !pages) |  | ||||||
|     | _ -> assert false |  | ||||||
|  |  | ||||||
| let addrectangle | let addrectangle | ||||||
|   fast (w, h) colour outline linewidth opacity position relative_to_cropbox |   fast (w, h) colour outline linewidth opacity position relative_to_cropbox | ||||||
| @@ -2125,7 +1561,7 @@ let addrectangle | |||||||
|           then Pdfpage.prepend_operators pdf ops ~fast:fast page |           then Pdfpage.prepend_operators pdf ops ~fast:fast page | ||||||
|           else Pdfpage.postpend_operators pdf ops ~fast:fast page |           else Pdfpage.postpend_operators pdf ops ~fast:fast page | ||||||
|   in |   in | ||||||
|     process_pages (ppstub addrectangle_page) pdf range |     Cpdfpage.process_pages (ppstub addrectangle_page) pdf range | ||||||
|  |  | ||||||
|  |  | ||||||
| (* Imposition *) | (* Imposition *) | ||||||
| @@ -2497,7 +1933,7 @@ let blacktext c range pdf = | |||||||
|       process_xobjects pdf page (blacktext_ops c); |       process_xobjects pdf page (blacktext_ops c); | ||||||
|       {page with Pdfpage.content = content'} |       {page with Pdfpage.content = content'} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub blacktext_page) pdf range |     Cpdfpage.process_pages (ppstub blacktext_page) pdf range | ||||||
|  |  | ||||||
| (* \section{Blacken lines} *) | (* \section{Blacken lines} *) | ||||||
| let blacklines_ops c pdf resources content = | let blacklines_ops c pdf resources content = | ||||||
| @@ -2523,7 +1959,7 @@ let blacklines c range pdf = | |||||||
|       process_xobjects pdf page (blacklines_ops c); |       process_xobjects pdf page (blacklines_ops c); | ||||||
|       {page with Pdfpage.content = content'} |       {page with Pdfpage.content = content'} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub blacklines_page) pdf range |     Cpdfpage.process_pages (ppstub blacklines_page) pdf range | ||||||
|  |  | ||||||
| (* \section{Blacken Fills} *) | (* \section{Blacken Fills} *) | ||||||
| let blackfills_ops c pdf resources content = | let blackfills_ops c pdf resources content = | ||||||
| @@ -2549,7 +1985,7 @@ let blackfills c range pdf = | |||||||
|       process_xobjects pdf page (blackfills_ops c); |       process_xobjects pdf page (blackfills_ops c); | ||||||
|       {page with Pdfpage.content = content'} |       {page with Pdfpage.content = content'} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub blackfills_page) pdf range |     Cpdfpage.process_pages (ppstub blackfills_page) pdf range | ||||||
|  |  | ||||||
| (* \section{Set a minimum line width to avoid dropout} *) | (* \section{Set a minimum line width to avoid dropout} *) | ||||||
| let thinlines range width pdf = | let thinlines range width pdf = | ||||||
| @@ -2624,20 +2060,8 @@ let thinlines range width pdf = | |||||||
|                 let content' = [Pdfops.stream_of_ops operators] in |                 let content' = [Pdfops.stream_of_ops operators] in | ||||||
|                   {page with Pdfpage.content = content'}  |                   {page with Pdfpage.content = content'}  | ||||||
|   in |   in | ||||||
|     process_pages (ppstub thinpage) pdf range |     Cpdfpage.process_pages (ppstub thinpage) pdf range | ||||||
|  |  | ||||||
| (* \section{Remove annotations} *) |  | ||||||
| let remove_annotations range pdf = |  | ||||||
|   let remove_annotations_page pagenum page = |  | ||||||
|     if mem pagenum range then |  | ||||||
|       let rest' = |  | ||||||
|         Pdf.remove_dict_entry page.Pdfpage.rest "/Annots" |  | ||||||
|       in |  | ||||||
|         {page with Pdfpage.rest = rest'} |  | ||||||
|     else |  | ||||||
|       page |  | ||||||
|   in |  | ||||||
|     process_pages (ppstub remove_annotations_page) pdf range |  | ||||||
|  |  | ||||||
| (* \section{Making draft documents} *) | (* \section{Making draft documents} *) | ||||||
|  |  | ||||||
| @@ -2809,7 +2233,7 @@ let append_page_content_page fast s before pdf n page = | |||||||
|     pdf ops ~fast page |     pdf ops ~fast page | ||||||
|  |  | ||||||
| let append_page_content s before fast range pdf = | let append_page_content s before fast range pdf = | ||||||
|   process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range |   Cpdfpage.process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range | ||||||
|  |  | ||||||
| (* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. | (* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. | ||||||
|  * |  * | ||||||
| @@ -2855,7 +2279,9 @@ let show_boxes_page fast pdf _ page = | |||||||
|       Pdfpage.postpend_operators pdf ops ~fast page |       Pdfpage.postpend_operators pdf ops ~fast page | ||||||
|  |  | ||||||
| let show_boxes ?(fast=false) pdf range = | let show_boxes ?(fast=false) pdf range = | ||||||
|   process_pages (ppstub (show_boxes_page fast pdf)) pdf range |   Cpdfpage.process_pages (ppstub (show_boxes_page fast pdf)) pdf range | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| let allowance = 9. | let allowance = 9. | ||||||
|  |  | ||||||
| @@ -2887,7 +2313,7 @@ let trim_marks_page fast pdf n page = | |||||||
|       page |       page | ||||||
|  |  | ||||||
| let trim_marks ?(fast=false) pdf range = | let trim_marks ?(fast=false) pdf range = | ||||||
|   process_pages (ppstub (trim_marks_page fast pdf)) pdf range |   Cpdfpage.process_pages (ppstub (trim_marks_page fast pdf)) pdf range | ||||||
|  |  | ||||||
| let rec remove_all_text_ops pdf resources content = | let rec remove_all_text_ops pdf resources content = | ||||||
|   let is_textop = function |   let is_textop = function | ||||||
| @@ -2989,7 +2415,7 @@ let remove_clipping pdf range = | |||||||
|       process_xobjects pdf page remove_clipping_ops; |       process_xobjects pdf page remove_clipping_ops; | ||||||
|       {page with Pdfpage.content = content'} |       {page with Pdfpage.content = content'} | ||||||
|   in |   in | ||||||
|     process_pages (ppstub remove_clipping_page) pdf range |     Cpdfpage.process_pages (ppstub remove_clipping_page) pdf range | ||||||
|  |  | ||||||
| (* Image resolution *) | (* Image resolution *) | ||||||
| type xobj = | type xobj = | ||||||
| @@ -3079,7 +2505,7 @@ let rec image_resolution_page pdf page pagenum dpi (images : (int * string * xob | |||||||
|  |  | ||||||
| and image_resolution pdf range dpi = | and image_resolution pdf range dpi = | ||||||
|   let images = ref [] in |   let images = ref [] in | ||||||
|     iter_pages |     Cpdfpage.iter_pages | ||||||
|       (fun pagenum page -> |       (fun pagenum page -> | ||||||
|          (* 1. Get all image names and their native resolutions from resources as string * int * int *) |          (* 1. Get all image names and their native resolutions from resources as string * int * int *) | ||||||
|          match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with |          match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with | ||||||
| @@ -3142,7 +2568,7 @@ let image_resolution pdf range dpi = | |||||||
| the contents of the mediabox will be used if the from fox is not available. If | the contents of the mediabox will be used if the from fox is not available. If | ||||||
| mediabox_is_missing is false, the page is unaltered. *) | mediabox_is_missing is false, the page is unaltered. *) | ||||||
| let copy_box f t mediabox_if_missing pdf range = | let copy_box f t mediabox_if_missing pdf range = | ||||||
|   process_pages |   Cpdfpage.process_pages | ||||||
|     (ppstub (fun _ page -> |     (ppstub (fun _ page -> | ||||||
|        if f = "/MediaBox" then |        if f = "/MediaBox" then | ||||||
|          {page with Pdfpage.rest = |          {page with Pdfpage.rest = | ||||||
| @@ -3178,7 +2604,7 @@ let remove_unused_resources_page pdf n page = | |||||||
|           {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources  "/XObject" xobjdict} |           {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources  "/XObject" xobjdict} | ||||||
|  |  | ||||||
| let remove_unused_resources pdf = | let remove_unused_resources pdf = | ||||||
|   process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) |   Cpdfpage.process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) | ||||||
|  |  | ||||||
| (* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *) | (* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *) | ||||||
| let add_bookmark_title filename use_title pdf = | let add_bookmark_title filename use_title pdf = | ||||||
| @@ -3413,5 +2839,3 @@ let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf rang | |||||||
|                  iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms) |                  iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms) | ||||||
|           pages |           pages | ||||||
|           (indx pages) |           (indx pages) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										55
									
								
								cpdf.mli
									
									
									
									
									
								
							
							
						
						
									
										55
									
								
								cpdf.mli
									
									
									
									
									
								
							| @@ -8,34 +8,9 @@ type color = | |||||||
|  |  | ||||||
| (** {2 Working with pages} *) | (** {2 Working with pages} *) | ||||||
|  |  | ||||||
| (** Given a function from page number and page to page, a document, and a list |  | ||||||
| of page numbers to apply it to, apply the function to all those pages. *) |  | ||||||
| val process_pages : (int -> Pdfpage.t -> Pdfpage.t * int * Pdftransform.transform_matrix) -> |  | ||||||
|                     Pdf.t -> int list -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Same as [process_pages], but iterate rather than map. *) |  | ||||||
| val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit |  | ||||||
|  |  | ||||||
| (** Same as [process_pages] but return the list of outputs of the map function. *) |  | ||||||
| val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list |  | ||||||
|  |  | ||||||
| val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t | val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t | ||||||
|  |  | ||||||
| (** {2 Bookmarks} *) |  | ||||||
|  |  | ||||||
| (** [parse_bookmark_file verify pdf input] parses the bookmark file in [input]. |  | ||||||
| Details of the bookmark file format can be found in cpdfmanual.pdf *) |  | ||||||
| val parse_bookmark_file : bool -> Pdf.t -> Pdfio.input -> Pdfmarks.t list |  | ||||||
|  |  | ||||||
| (** [add_bookmarks verify input pdf] adds bookmarks from the bookmark file |  | ||||||
| give. If [verify] is given, bookmarks will be verified to ensure, for example, |  | ||||||
| that they are not out of the page range. *)  |  | ||||||
| val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** [list_bookmarks encoding range pdf output] lists the bookmarks to the given |  | ||||||
| output in the format specified in cpdfmanual.pdf *) |  | ||||||
| val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit |  | ||||||
|  |  | ||||||
| (** {2 Stamping} *) | (** {2 Stamping} *) | ||||||
|  |  | ||||||
| (** [combine_pages fast under over scaletofit swap equalize] combines the page | (** [combine_pages fast under over scaletofit swap equalize] combines the page | ||||||
| @@ -51,19 +26,6 @@ val combine_pages : bool -> Pdf.t -> Pdf.t -> bool -> bool -> bool -> Pdf.t | |||||||
| [combine_pages]. *) | [combine_pages]. *) | ||||||
| val stamp : bool -> Cpdfposition.position -> bool -> bool -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t | val stamp : bool -> Cpdfposition.position -> bool -> bool -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
| (** {2 Splitting PDFs} *) |  | ||||||
|  |  | ||||||
| (** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *) |  | ||||||
| val split_on_bookmarks : Pdf.t -> int -> Pdf.t list |  | ||||||
|  |  | ||||||
| (** {2 Listing fonts} *) |  | ||||||
|  |  | ||||||
| (** Print font list to stdout *) |  | ||||||
| val print_fonts : Pdf.t -> int list -> unit |  | ||||||
|  |  | ||||||
| (** Return font list. Page number, name, subtype, basefont, encoding.  *) |  | ||||||
| val list_fonts : Pdf.t -> int list -> (int * string * string * string * string) list |  | ||||||
|  |  | ||||||
| (** {2 Adding text} *) | (** {2 Adding text} *) | ||||||
|  |  | ||||||
| (** Justification of multiline text *) | (** Justification of multiline text *) | ||||||
| @@ -128,9 +90,6 @@ val removetext : int list -> Pdf.t -> Pdf.t | |||||||
|  |  | ||||||
| (**  {2 Page geometry} *) | (**  {2 Page geometry} *) | ||||||
|  |  | ||||||
| (** Print page info (Mediabox etc) to standard output. *) |  | ||||||
| val output_page_info : Pdf.t -> int list -> unit |  | ||||||
|  |  | ||||||
| (** True if a given page in a PDF has a given box *) | (** True if a given page in a PDF has a given box *) | ||||||
| val hasbox : Pdf.t -> int -> string -> bool | val hasbox : Pdf.t -> int -> string -> bool | ||||||
|  |  | ||||||
| @@ -195,20 +154,6 @@ val trim_marks : ?fast:bool -> Pdf.t -> int list -> Pdf.t | |||||||
|  |  | ||||||
| val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t | val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t | ||||||
|  |  | ||||||
| (** {2 Annotations} *) |  | ||||||
|  |  | ||||||
| (** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *) |  | ||||||
| val list_annotations : json:bool -> Cpdfmetadata.encoding -> Pdf.t -> unit |  | ||||||
|  |  | ||||||
| (** Return the annotations as a (pagenumber, content) list *) |  | ||||||
| val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list |  | ||||||
|  |  | ||||||
| (** Copy the annotations on a given set of pages from a to b. b is returned. *) |  | ||||||
| val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** Remove the annotations on given pages. *) |  | ||||||
| val remove_annotations : int list -> Pdf.t -> Pdf.t |  | ||||||
|  |  | ||||||
| (** {2 Imposition} *) | (** {2 Imposition} *) | ||||||
|  |  | ||||||
| val impose : x:float -> y:float -> fit:bool -> columns:bool -> rtl:bool -> btt:bool -> center:bool -> margin:float -> spacing:float -> linewidth:float -> fast:bool -> Pdf.t -> Pdf.t | val impose : x:float -> y:float -> fit:bool -> columns:bool -> rtl:bool -> btt:bool -> center:bool -> margin:float -> spacing:float -> linewidth:float -> fast:bool -> Pdf.t -> Pdf.t | ||||||
|   | |||||||
							
								
								
									
										188
									
								
								cpdfannot.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										188
									
								
								cpdfannot.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,188 @@ | |||||||
|  | open Pdfutil | ||||||
|  |  | ||||||
|  | (* For uses of process_pages which don't need to deal with matrices, this | ||||||
|  |    function transforms into one which returns the identity matrix *) | ||||||
|  | let ppstub f n p = (f n p, n, Pdftransform.i_matrix) | ||||||
|  |  | ||||||
|  | (* \section{List annotations} *) | ||||||
|  | let get_annotation_string encoding pdf annot = | ||||||
|  |   match Pdf.lookup_direct pdf "/Contents" annot with | ||||||
|  |   | Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s | ||||||
|  |   | _ -> "" | ||||||
|  |  | ||||||
|  | let print_annotation encoding pdf num s = | ||||||
|  |   let s = get_annotation_string encoding pdf s in | ||||||
|  |   match s with | ||||||
|  |   | "" -> () | ||||||
|  |   | s -> | ||||||
|  |     flprint (Printf.sprintf "Page %d: " num); | ||||||
|  |     flprint s; | ||||||
|  |     flprint "\n" | ||||||
|  |  | ||||||
|  | let list_page_annotations encoding pdf num page = | ||||||
|  |   match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with | ||||||
|  |   | Some (Pdf.Array annots) -> | ||||||
|  |       iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots) | ||||||
|  |   | _ -> () | ||||||
|  |  | ||||||
|  | let annotations_json_page pdf page pagenum = | ||||||
|  |   match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with | ||||||
|  |   | Some (Pdf.Array annots) -> | ||||||
|  |       map | ||||||
|  |         (fun annot -> | ||||||
|  |            `List [`Int pagenum; Cpdfjson.json_of_object pdf (fun _ -> ()) false false annot]) | ||||||
|  |         (map (Pdf.direct pdf) annots) | ||||||
|  |   | _ -> [] | ||||||
|  |  | ||||||
|  | let list_annotations_json pdf = | ||||||
|  |   let module J = Cpdfyojson.Safe in | ||||||
|  |   let pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|  |   let pagenums = indx pages in | ||||||
|  |   let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in | ||||||
|  |     J.pretty_to_channel stdout json | ||||||
|  |  | ||||||
|  | let list_annotations ~json encoding pdf = | ||||||
|  |   let range = Cpdfpagespec.parse_pagespec pdf "all" in | ||||||
|  |   if json | ||||||
|  |     then list_annotations_json pdf | ||||||
|  |     else Cpdfpage.iter_pages (list_page_annotations encoding pdf) pdf range | ||||||
|  |  | ||||||
|  | let get_annotations encoding pdf = | ||||||
|  |   let pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|  |     flatten | ||||||
|  |       (map2 | ||||||
|  |        (fun page pagenumber -> | ||||||
|  |          match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with | ||||||
|  |          | Some (Pdf.Array annots) -> | ||||||
|  |              let strings = | ||||||
|  |                map (get_annotation_string encoding pdf) (map (Pdf.direct pdf) annots) | ||||||
|  |              in | ||||||
|  |                combine (many pagenumber (length strings)) strings | ||||||
|  |          | _ -> []) | ||||||
|  |         pages | ||||||
|  |         (ilist 1 (length pages)))  | ||||||
|  |  | ||||||
|  | (* Equalise the page lengths of two PDFs by chopping or extending the first one. | ||||||
|  | *) | ||||||
|  | let equalise_lengths a b = | ||||||
|  |   let a' = | ||||||
|  |     if Pdfpage.endpage a < Pdfpage.endpage b then | ||||||
|  |       Pdfpage.change_pages false a | ||||||
|  |         (Pdfpage.pages_of_pagetree a @ | ||||||
|  |            many (Pdfpage.blankpage Pdfpaper.a4) (Pdfpage.endpage b - Pdfpage.endpage a)) | ||||||
|  |     else if Pdfpage.endpage a > Pdfpage.endpage b then | ||||||
|  |       Pdfpage.change_pages false a | ||||||
|  |         (take (Pdfpage.pages_of_pagetree a) (Pdfpage.endpage b)) | ||||||
|  |     else a  | ||||||
|  |   in | ||||||
|  |     a', b | ||||||
|  |  | ||||||
|  | (* Copy annotations *) | ||||||
|  |  | ||||||
|  | (* FIXME: Why does this chop the files to the same length? Should be able to | ||||||
|  | apply annotations from a longer file to a shorter? *) | ||||||
|  |  | ||||||
|  | (* Rewrite any annotation destinations to point to pages in the | ||||||
|  | destination file. This prevents pages being copied, and ensures the links are | ||||||
|  | correct Any Indirect link inside a /Dest is rewritten if in the table. If not | ||||||
|  | inside a /Dest, nothing is rewritten. *) | ||||||
|  | let rec renumber_in_dest table indest = function | ||||||
|  |     Pdf.Indirect i ->  | ||||||
|  |       begin | ||||||
|  |         try Pdf.Indirect (Hashtbl.find table i) with _ -> Pdf.Indirect i | ||||||
|  |       end | ||||||
|  |   | Pdf.Array a -> | ||||||
|  |       Pdf.recurse_array (renumber_in_dest table indest) a | ||||||
|  |   | Pdf.Dictionary d -> | ||||||
|  |       Pdf.Dictionary | ||||||
|  |         (map | ||||||
|  |           (function | ||||||
|  |              ("/Dest", v) -> ("/Dest", renumber_in_dest table true v) | ||||||
|  |            | (k, v) -> (k, renumber_in_dest table indest v)) | ||||||
|  |           d) | ||||||
|  |   | x -> x  | ||||||
|  |  | ||||||
|  | let renumber_in_object pdf objnum table = | ||||||
|  |   Pdf.addobj_given_num | ||||||
|  |     pdf (objnum, (renumber_in_dest table false (Pdf.lookup_obj pdf objnum))) | ||||||
|  |  | ||||||
|  | let copy_annotations_page topdf frompdf frompage topage = | ||||||
|  |   match Pdf.lookup_direct frompdf "/Annots" frompage.Pdfpage.rest with | ||||||
|  |     Some (Pdf.Array frompage_annots as annots) -> | ||||||
|  |       let table = | ||||||
|  |         hashtable_of_dictionary | ||||||
|  |           (combine | ||||||
|  |              (Pdf.page_reference_numbers frompdf) | ||||||
|  |              (Pdf.page_reference_numbers topdf)) | ||||||
|  |       in | ||||||
|  |         iter | ||||||
|  |          (function | ||||||
|  |             (* FIXME: We assume they are indirects. Must also do direct, though rare.*) | ||||||
|  |             Pdf.Indirect x -> | ||||||
|  |               (*Printf.printf "Copying annotation %s which is\n%s\n" | ||||||
|  |                 (Pdfwrite.string_of_pdf (Pdf.Indirect x)) | ||||||
|  |                 (Pdfwrite.string_of_pdf (Pdf.direct frompdf (Pdf.Indirect | ||||||
|  |                 x)));*) | ||||||
|  |               renumber_in_object frompdf x table | ||||||
|  |           | _ -> ()) | ||||||
|  |          frompage_annots; | ||||||
|  |         let objects_to_copy = Pdf.objects_referenced [] [] frompdf annots in | ||||||
|  |           iter | ||||||
|  |             (fun n -> | ||||||
|  |                ignore (Pdf.addobj_given_num topdf (n, Pdf.lookup_obj frompdf n))) | ||||||
|  |             objects_to_copy; | ||||||
|  |           let topage_annots = | ||||||
|  |             match Pdf.lookup_direct frompdf "/Annots" topage.Pdfpage.rest with | ||||||
|  |             | Some (Pdf.Array annots) -> annots | ||||||
|  |             | _ -> [] | ||||||
|  |           in | ||||||
|  |             let merged_dict = Pdf.Array (frompage_annots @ topage_annots) in | ||||||
|  |               let topage' = | ||||||
|  |                 {topage with Pdfpage.rest = | ||||||
|  |                    Pdf.add_dict_entry topage.Pdfpage.rest "/Annots" merged_dict} | ||||||
|  |               in | ||||||
|  |                 topdf, topage' | ||||||
|  |   | Some x -> topdf, topage | ||||||
|  |   | None -> topdf, topage | ||||||
|  |  | ||||||
|  | let copy_annotations range frompdf topdf = | ||||||
|  |   let frompdf, topdf = equalise_lengths frompdf topdf in | ||||||
|  |     match Pdf.renumber_pdfs [frompdf; topdf] with  | ||||||
|  |     | [frompdf; topdf] -> | ||||||
|  |         let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in | ||||||
|  |         let topdf_pages = Pdfpage.pages_of_pagetree topdf in | ||||||
|  |           let pdf = ref topdf | ||||||
|  |           and pages = ref [] | ||||||
|  |           and pnum = ref 1 | ||||||
|  |           and frompdf_pages = ref frompdf_pages | ||||||
|  |           and topdf_pages = ref topdf_pages in | ||||||
|  |             (* Go through, updating pdf and collecting new pages. *) | ||||||
|  |             while not (isnull !frompdf_pages) do | ||||||
|  |               let frompdf_page = hd !frompdf_pages | ||||||
|  |               and topdf_page = hd !topdf_pages in | ||||||
|  |                 let pdf', page = | ||||||
|  |                   if mem !pnum range | ||||||
|  |                     then copy_annotations_page !pdf frompdf frompdf_page topdf_page | ||||||
|  |                     else !pdf, topdf_page | ||||||
|  |                 in | ||||||
|  |                   pdf := pdf'; | ||||||
|  |                   pages =| page; | ||||||
|  |                   incr pnum; | ||||||
|  |                   frompdf_pages := tl !frompdf_pages; | ||||||
|  |                   topdf_pages := tl !topdf_pages | ||||||
|  |             done; | ||||||
|  |             Pdfpage.change_pages true !pdf (rev !pages) | ||||||
|  |     | _ -> assert false | ||||||
|  |  | ||||||
|  | (* \section{Remove annotations} *) | ||||||
|  | let remove_annotations range pdf = | ||||||
|  |   let remove_annotations_page pagenum page = | ||||||
|  |     if mem pagenum range then | ||||||
|  |       let rest' = | ||||||
|  |         Pdf.remove_dict_entry page.Pdfpage.rest "/Annots" | ||||||
|  |       in | ||||||
|  |         {page with Pdfpage.rest = rest'} | ||||||
|  |     else | ||||||
|  |       page | ||||||
|  |   in | ||||||
|  |     Cpdfpage.process_pages (ppstub remove_annotations_page) pdf range | ||||||
							
								
								
									
										13
									
								
								cpdfannot.mli
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								cpdfannot.mli
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | (** {2 Annotations} *) | ||||||
|  |  | ||||||
|  | (** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *) | ||||||
|  | val list_annotations : json:bool -> Cpdfmetadata.encoding -> Pdf.t -> unit | ||||||
|  |  | ||||||
|  | (** Return the annotations as a (pagenumber, content) list *) | ||||||
|  | val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list | ||||||
|  |  | ||||||
|  | (** Copy the annotations on a given set of pages from a to b. b is returned. *) | ||||||
|  | val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** Remove the annotations on given pages. *) | ||||||
|  | val remove_annotations : int list -> Pdf.t -> Pdf.t | ||||||
							
								
								
									
										294
									
								
								cpdfbookmarks.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										294
									
								
								cpdfbookmarks.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,294 @@ | |||||||
|  | open Pdfutil | ||||||
|  | open Cpdferror | ||||||
|  |  | ||||||
|  | (* \section{Add bookmarks} *) | ||||||
|  | let read_lines input = | ||||||
|  |   let lines = ref [] in | ||||||
|  |    try | ||||||
|  |      while true do | ||||||
|  |        let c = read_line input in | ||||||
|  |          lines =| c | ||||||
|  |     done; [] | ||||||
|  |    with | ||||||
|  |      _ -> rev !lines | ||||||
|  |  | ||||||
|  | (* Verify a list of bookmarks. Positive jumps of > 1 not allowed, no numbers | ||||||
|  | smaller than 0. *) | ||||||
|  | let rec verify_bookmarks pdf lastlevel fastrefnums endpage = function | ||||||
|  |   | [] -> true | ||||||
|  |   | {Pdfmarks.level = level; Pdfmarks.target = target}::more -> | ||||||
|  |       let page = Pdfpage.pagenumber_of_target pdf ~fastrefnums target in | ||||||
|  |         level < lastlevel + 2 && | ||||||
|  |         level >= 0 && | ||||||
|  |         page <= endpage && | ||||||
|  |         page >= 0 && | ||||||
|  |         verify_bookmarks pdf level fastrefnums endpage more | ||||||
|  |  | ||||||
|  | let verify_bookmarks pdf lastlevel endpage marks = | ||||||
|  |   let refnums = Pdf.page_reference_numbers pdf in | ||||||
|  |   let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in | ||||||
|  |     match marks with | ||||||
|  |     | [] -> true | ||||||
|  |     | m::more -> m.Pdfmarks.level = 0 && verify_bookmarks pdf lastlevel fastrefnums endpage more | ||||||
|  |  | ||||||
|  | (* Parse a line of the bookmarks file. *) | ||||||
|  |  | ||||||
|  | (* Un-escape things which are escaped. Quotes, newlines and backslashes *) | ||||||
|  | let rec fixup_characters prev = function | ||||||
|  |   | [] -> rev prev | ||||||
|  |   | '\\'::'\\'::t -> fixup_characters ('\\'::prev) t | ||||||
|  |   | '\\'::'"'::t -> fixup_characters ('"'::prev) t | ||||||
|  |   | '\\'::'n'::t -> fixup_characters ('\n'::prev) t | ||||||
|  |   | h::t -> fixup_characters (h::prev) t | ||||||
|  |  | ||||||
|  | let debug_bookmark_string s = | ||||||
|  |   Printf.printf "STR: %s\n" s | ||||||
|  |  | ||||||
|  | (* If optionaldest = [Pdfgenlex.LexString s], we parse the string, convert the | ||||||
|  |  * integer to an indirect of the real page target, and then put it in. *) | ||||||
|  | let target_of_markfile_obj pdf i' pdfobj = | ||||||
|  |   (*Printf.printf "Parsed %s\n" (Pdfwrite.string_of_pdf pdfobj);*) | ||||||
|  |   match pdfobj with | ||||||
|  |     Pdf.Array (Pdf.Integer x::more) -> | ||||||
|  |       let pageobjnum = Pdfpage.page_object_number pdf i' in | ||||||
|  |         begin match pageobjnum with | ||||||
|  |           None -> | ||||||
|  |             raise (Pdf.PDFError "bookmark_of_data: page obj num not found") | ||||||
|  |         | Some p -> | ||||||
|  |             Pdfdest.read_destination pdf (Pdf.Array (Pdf.Indirect p::more)) | ||||||
|  |         end | ||||||
|  |   (* Need to deal with "null", "(string)", and "<<other thing like action" *)  | ||||||
|  |   | Pdf.Null -> Pdfdest.NullDestination | ||||||
|  |   | Pdf.String s -> Pdfdest.read_destination pdf (Pdf.String s) | ||||||
|  |   | x -> Pdfdest.Action x | ||||||
|  |  | ||||||
|  | let target_of_markfile_target pdf i' = function | ||||||
|  |   | [Pdfgenlex.LexString s] -> | ||||||
|  |       let pdfobj = Pdfread.parse_single_object s in | ||||||
|  |         target_of_markfile_obj pdf i' pdfobj | ||||||
|  |   | _ -> Pdfpage.target_of_pagenumber pdf i' | ||||||
|  |  | ||||||
|  | let bookmark_of_data pdf i s i' isopen optionaldest = | ||||||
|  |     (*debug_bookmark_string s; | ||||||
|  |     debug_bookmark_string (implode (fixup_characters [] (explode s))); | ||||||
|  |     debug_bookmark_string (Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))));*) | ||||||
|  |     {Pdfmarks.level = i; | ||||||
|  |      Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))); | ||||||
|  |      Pdfmarks.target = target_of_markfile_target pdf i' optionaldest; | ||||||
|  |      Pdfmarks.isopen = isopen} | ||||||
|  |  | ||||||
|  | let target_of_json_target pdf pagenumber target =  | ||||||
|  |   target_of_markfile_obj pdf pagenumber (Cpdfjson.object_of_json target) | ||||||
|  |  | ||||||
|  | let mark_of_json pdf = function | ||||||
|  |   | `Assoc [("level", `Int level); | ||||||
|  |             ("text", `String text); | ||||||
|  |             ("page", `Int pagenumber); | ||||||
|  |             ("open", `Bool openstatus); | ||||||
|  |             ("target", target)] -> | ||||||
|  |        {Pdfmarks.level = level; | ||||||
|  |         Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 text; | ||||||
|  |         Pdfmarks.target = target_of_json_target pdf pagenumber target; | ||||||
|  |         Pdfmarks.isopen = openstatus} | ||||||
|  |   | _ -> error "malformed mark in mark_of_json" | ||||||
|  |  | ||||||
|  | let marks_of_json pdf = function | ||||||
|  |   | `List ms -> map (mark_of_json pdf) ms | ||||||
|  |   | _ -> error "top level of JSON boomark file not a list" | ||||||
|  |  | ||||||
|  | let parse_bookmark_file_json verify pdf i = | ||||||
|  |   let module J = Cpdfyojson.Safe in | ||||||
|  |     try | ||||||
|  |       let json = | ||||||
|  |         match i.Pdfio.caml_channel with | ||||||
|  |         | Some ch -> J.from_channel ch | ||||||
|  |         | None -> | ||||||
|  |           let content = Pdfio.string_of_bytes (Pdfio.bytes_of_input i 0 i.Pdfio.in_channel_length) in | ||||||
|  |             J.from_string content | ||||||
|  |       in | ||||||
|  |       let marks = marks_of_json pdf json in | ||||||
|  |         if verify then | ||||||
|  |           if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) marks then marks else | ||||||
|  |             error "Bad bookmark file (References non-existant pages or is malformed)" | ||||||
|  |         else | ||||||
|  |           marks | ||||||
|  |     with | ||||||
|  |       e -> | ||||||
|  |         error (Printf.sprintf "Malformed JSON bookmark file (%s)" (Printexc.to_string e))  | ||||||
|  |  | ||||||
|  | let parse_bookmark_file verify pdf input = | ||||||
|  |   let currline = ref 0 in | ||||||
|  |   try | ||||||
|  |     let lines = Pdfio.read_lines input in | ||||||
|  |       let currline = ref 0 in | ||||||
|  |       let bookmarks = ref [] in | ||||||
|  |         iter | ||||||
|  |           (function line -> | ||||||
|  |              match | ||||||
|  |                incr currline; | ||||||
|  |                Pdfgenlex.lex_string line | ||||||
|  |              with | ||||||
|  |              | Pdfgenlex.LexInt i::Pdfgenlex.LexString s::Pdfgenlex.LexInt i'::Pdfgenlex.LexName "open"::optionaldest -> | ||||||
|  |                  bookmarks =| bookmark_of_data pdf i s i' true optionaldest | ||||||
|  |              | Pdfgenlex.LexInt i::Pdfgenlex.LexString s::Pdfgenlex.LexInt i'::optionaldest -> | ||||||
|  |                  bookmarks =| bookmark_of_data pdf i s i' false optionaldest | ||||||
|  |              | [] -> () (* ignore blank lines *) | ||||||
|  |              | _ -> | ||||||
|  |                  error ("Bad bookmark file, line " ^ (string_of_int !currline))) | ||||||
|  |           lines; | ||||||
|  |         let bookmarks = rev !bookmarks in | ||||||
|  |           if verify then | ||||||
|  |             if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) bookmarks | ||||||
|  |                 then bookmarks | ||||||
|  |                 else | ||||||
|  |                   error | ||||||
|  |                     "Bad bookmark file (References non-existant pages or is malformed)" | ||||||
|  |             else | ||||||
|  |               bookmarks | ||||||
|  |   with | ||||||
|  |     e -> | ||||||
|  |       error | ||||||
|  |         (Printf.sprintf  | ||||||
|  |            "Bad bookmark file (syntax) at line %i (error was %s)" | ||||||
|  |            !currline | ||||||
|  |            (Printexc.to_string e)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | let add_bookmarks ~json verify input pdf = | ||||||
|  |   let parsed = | ||||||
|  |     (if json then parse_bookmark_file_json else parse_bookmark_file) verify pdf input in | ||||||
|  |     (*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*) | ||||||
|  |     Pdfmarks.add_bookmarks parsed pdf  | ||||||
|  |  | ||||||
|  | (* List bookmarks *) | ||||||
|  | let output_string_of_target pdf fastrefnums x = | ||||||
|  |   match Pdfdest.pdfobject_of_destination x with | ||||||
|  |   | Pdf.Array (_::more) -> | ||||||
|  |       let a = | ||||||
|  |         Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more) | ||||||
|  |       in | ||||||
|  |         "\"" ^ Pdfwrite.string_of_pdf a ^ "\""  | ||||||
|  |   | x -> "\"" ^ Pdfwrite.string_of_pdf x ^ "\"" | ||||||
|  |  | ||||||
|  | let json_of_target pdf fastrefnums x = | ||||||
|  |   match Pdfdest.pdfobject_of_destination x with | ||||||
|  |   | Pdf.Array (_::more) -> | ||||||
|  |       let a = | ||||||
|  |         Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more) | ||||||
|  |       in | ||||||
|  |         Cpdfjson.json_of_object pdf (fun _ -> ()) false false a | ||||||
|  |   | x -> Cpdfjson.json_of_object pdf (fun _ -> ()) false false x | ||||||
|  |  | ||||||
|  | let output_json_marks ch calculate_page_number pdf fastrefnums marks = | ||||||
|  |   let module J = Cpdfyojson.Safe in | ||||||
|  |   let json_of_mark m = | ||||||
|  |     `Assoc | ||||||
|  |        [("level", `Int m.Pdfmarks.level); | ||||||
|  |         ("text", `String (Pdftext.utf8_of_pdfdocstring m.Pdfmarks.text)); | ||||||
|  |         ("page", `Int (calculate_page_number m)); | ||||||
|  |         ("open", `Bool m.Pdfmarks.isopen); | ||||||
|  |         ("target", json_of_target pdf fastrefnums m.Pdfmarks.target)] | ||||||
|  |   in | ||||||
|  |   let json = `List (map json_of_mark marks) in | ||||||
|  |     J.pretty_to_channel ch json | ||||||
|  |  | ||||||
|  | (* List the bookmarks in the given range to the given output *) | ||||||
|  | let list_bookmarks ~json encoding range pdf output = | ||||||
|  |   let process_stripped escaped = | ||||||
|  |     let b = Buffer.create 200 in | ||||||
|  |       iter | ||||||
|  |         (fun x -> | ||||||
|  |            if x <= 127 then Buffer.add_char b (char_of_int x)) | ||||||
|  |         escaped; | ||||||
|  |       Buffer.contents b | ||||||
|  |   in | ||||||
|  |   let process_string s = | ||||||
|  |     let rec replace c x y = function | ||||||
|  |     | [] -> [] | ||||||
|  |     | h::t when h = c -> x::y::replace c x y t | ||||||
|  |     | h::t -> h::replace c x y t | ||||||
|  |     in | ||||||
|  |       (* Convert to UTF8, raw, or stripped, and escape backslashed and quotation marks *) | ||||||
|  |       let codepoints = Pdftext.codepoints_of_pdfdocstring s in | ||||||
|  |         let escaped = | ||||||
|  |           let bs = int_of_char '\\' | ||||||
|  |           and nl = int_of_char '\n' | ||||||
|  |           and n = int_of_char 'n' | ||||||
|  |           and q = int_of_char '\"' in | ||||||
|  |             replace q bs q (replace nl bs n (replace bs bs bs codepoints)) | ||||||
|  |         in | ||||||
|  |           match encoding with | ||||||
|  |           | Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped | ||||||
|  |           | Cpdfmetadata.Stripped -> process_stripped escaped | ||||||
|  |           | Cpdfmetadata.Raw -> s | ||||||
|  |     in | ||||||
|  |       let bookmarks = Pdfmarks.read_bookmarks pdf in | ||||||
|  |       let refnums = Pdf.page_reference_numbers pdf in | ||||||
|  |       let rangetable = hashset_of_list range in | ||||||
|  |       let range_is_all = range = ilist 1 (Pdfpage.endpage pdf) in | ||||||
|  |       let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in | ||||||
|  |         (* Find the pagenumber of each bookmark target. If it is in the range, | ||||||
|  |          * keep that bookmark. Also keep the bookmark if its target is the null | ||||||
|  |          * destination. *) | ||||||
|  |         let inrange = | ||||||
|  |           keep | ||||||
|  |             (function x -> | ||||||
|  |                range_is_all ||  | ||||||
|  |                x.Pdfmarks.target = Pdfdest.NullDestination || | ||||||
|  |                (match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) || | ||||||
|  |                Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks | ||||||
|  |         in | ||||||
|  |           let calculate_page_number mark = | ||||||
|  |             (* Some buggy PDFs use integers for page numbers instead of page | ||||||
|  |              * object references. Adobe Reader and Preview seem to support | ||||||
|  |              * this, for presumably historical reasons. So if we see a | ||||||
|  |              * OtherDocPageNumber (which is what Pdfdest parses these as, | ||||||
|  |              * because that's what they are legitimately, we use this as the | ||||||
|  |              * page number. It is zero based, though, and we are one-based, so | ||||||
|  |              * we add one. Pdfpage.pagenumber_of_target has been modified to support this.*) | ||||||
|  |             Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target | ||||||
|  |           in | ||||||
|  |             if json then | ||||||
|  |               output_json_marks stdout calculate_page_number pdf fastrefnums inrange | ||||||
|  |             else | ||||||
|  |               iter | ||||||
|  |                 (function mark -> | ||||||
|  |                    output.Pdfio.output_string | ||||||
|  |                      (Printf.sprintf "%i \"%s\" %i%s %s\n" | ||||||
|  |                        mark.Pdfmarks.level | ||||||
|  |                        (process_string mark.Pdfmarks.text) | ||||||
|  |                        (calculate_page_number mark) | ||||||
|  |                        (if mark.Pdfmarks.isopen then " open" else "") | ||||||
|  |                        (output_string_of_target pdf fastrefnums mark.Pdfmarks.target))) | ||||||
|  |                 inrange | ||||||
|  |  | ||||||
|  | (* o is the stamp, u is the main pdf page *) | ||||||
|  |  | ||||||
|  | (* \section{Split at bookmarks} *) | ||||||
|  |  | ||||||
|  | let get_bookmark_name pdf marks splitlevel n _ = | ||||||
|  |   let refnums = Pdf.page_reference_numbers pdf in | ||||||
|  |   let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in | ||||||
|  |   match keep (function m -> n = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with | ||||||
|  |   | {Pdfmarks.text = title}::_ -> Cpdfattach.remove_unsafe_characters Cpdfmetadata.UTF8 title | ||||||
|  |   | _ -> "" | ||||||
|  |  | ||||||
|  | (* Find the stem of a filename *) | ||||||
|  | (*let stem s = | ||||||
|  |   implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s))))))*) | ||||||
|  |  | ||||||
|  | (* Return list, in order, a *set* of page numbers of bookmarks at a given level *) | ||||||
|  | let bookmark_pages level pdf = | ||||||
|  |   let refnums = Pdf.page_reference_numbers pdf in | ||||||
|  |   let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in | ||||||
|  |   setify_preserving_order | ||||||
|  |     (option_map | ||||||
|  |       (function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target ~fastrefnums pdf l.Pdfmarks.target) | _ -> None) | ||||||
|  |       (Pdfmarks.read_bookmarks pdf)) | ||||||
|  |  | ||||||
|  | (* Called from cpdflib.ml - different from above *) | ||||||
|  | let split_on_bookmarks pdf level = | ||||||
|  |   let points = lose (eq 0) (map pred (bookmark_pages level pdf)) | ||||||
|  |   in let pdf_pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|  |     let ranges = splitat points (indx pdf_pages) in | ||||||
|  |       map (fun rs -> Pdfpage.pdf_of_pages pdf rs) ranges | ||||||
							
								
								
									
										14
									
								
								cpdfbookmarks.mli
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								cpdfbookmarks.mli
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | |||||||
|  | (** {2 Bookmarks} *) | ||||||
|  |  | ||||||
|  | (** [parse_bookmark_file verify pdf input] parses the bookmark file in [input]. | ||||||
|  | Details of the bookmark file format can be found in cpdfmanual.pdf *) | ||||||
|  | val parse_bookmark_file : bool -> Pdf.t -> Pdfio.input -> Pdfmarks.t list | ||||||
|  |  | ||||||
|  | (** [add_bookmarks verify input pdf] adds bookmarks from the bookmark file | ||||||
|  | give. If [verify] is given, bookmarks will be verified to ensure, for example, | ||||||
|  | that they are not out of the page range. *)  | ||||||
|  | val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** [list_bookmarks encoding range pdf output] lists the bookmarks to the given | ||||||
|  | output in the format specified in cpdfmanual.pdf *) | ||||||
|  | val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit | ||||||
| @@ -3152,7 +3152,7 @@ let go () = | |||||||
|       | (_, pagespec, _, _, _, _)::_, _ -> |       | (_, pagespec, _, _, _, _)::_, _ -> | ||||||
|           let pdf = get_single_pdf args.op true in |           let pdf = get_single_pdf args.op true in | ||||||
|             let range = parse_pagespec_allow_empty pdf pagespec in |             let range = parse_pagespec_allow_empty pdf pagespec in | ||||||
|               Cpdf.output_page_info pdf range |               Cpdfpage.output_page_info pdf range | ||||||
|       | _ -> error "list-bookmarks: bad command line" |       | _ -> error "list-bookmarks: bad command line" | ||||||
|       end |       end | ||||||
|   | Some Metadata -> |   | Some Metadata -> | ||||||
| @@ -3162,7 +3162,7 @@ let go () = | |||||||
|       | (_, pagespec, _, _, _, _)::_, _ -> |       | (_, pagespec, _, _, _, _)::_, _ -> | ||||||
|           let pdf = get_single_pdf (Some Fonts) true in |           let pdf = get_single_pdf (Some Fonts) true in | ||||||
|           let range = parse_pagespec_allow_empty pdf pagespec in |           let range = parse_pagespec_allow_empty pdf pagespec in | ||||||
|             Cpdf.print_fonts pdf range |             Cpdffont.print_fonts pdf range | ||||||
|       | _ -> error "-list-fonts: bad command line" |       | _ -> error "-list-fonts: bad command line" | ||||||
|       end |       end | ||||||
|   | Some ListBookmarks -> |   | Some ListBookmarks -> | ||||||
| @@ -3170,7 +3170,7 @@ let go () = | |||||||
|       | (_, pagespec, _, _, _, _)::_, _ -> |       | (_, pagespec, _, _, _, _)::_, _ -> | ||||||
|         let pdf = get_single_pdf args.op true in |         let pdf = get_single_pdf args.op true in | ||||||
|           let range = parse_pagespec_allow_empty pdf pagespec in |           let range = parse_pagespec_allow_empty pdf pagespec in | ||||||
|             Cpdf.list_bookmarks ~json:args.format_json args.encoding range pdf (Pdfio.output_of_channel stdout); |             Cpdfbookmarks.list_bookmarks ~json:args.format_json args.encoding range pdf (Pdfio.output_of_channel stdout); | ||||||
|             flush stdout |             flush stdout | ||||||
|       | _ -> error "list-bookmarks: bad command line" |       | _ -> error "list-bookmarks: bad command line" | ||||||
|       end |       end | ||||||
| @@ -3540,14 +3540,14 @@ let go () = | |||||||
|   | Some RemoveAnnotations -> |   | Some RemoveAnnotations -> | ||||||
|       let pdf = get_single_pdf args.op false in |       let pdf = get_single_pdf args.op false in | ||||||
|         let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in |         let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in | ||||||
|           write_pdf false (Cpdf.remove_annotations range pdf) |           write_pdf false (Cpdfannot.remove_annotations range pdf) | ||||||
|   | Some (CopyAnnotations getfrom) -> |   | Some (CopyAnnotations getfrom) -> | ||||||
|       begin match args.inputs with |       begin match args.inputs with | ||||||
|       | [(k, _, u, o, _, _) as input] -> |       | [(k, _, u, o, _, _) as input] -> | ||||||
|         let input_pdf = get_pdf_from_input_kind input args.op k in |         let input_pdf = get_pdf_from_input_kind input args.op k in | ||||||
|           let range = parse_pagespec_allow_empty input_pdf (get_pagespec ()) in |           let range = parse_pagespec_allow_empty input_pdf (get_pagespec ()) in | ||||||
|             let pdf = |             let pdf = | ||||||
|               Cpdf.copy_annotations |               Cpdfannot.copy_annotations | ||||||
|                 range |                 range | ||||||
|                 (pdfread_pdf_of_file (optstring u) (optstring o) getfrom) |                 (pdfread_pdf_of_file (optstring u) (optstring o) getfrom) | ||||||
|                 input_pdf |                 input_pdf | ||||||
| @@ -3556,7 +3556,7 @@ let go () = | |||||||
|       | _ -> error "copy-annotations: No input file specified" |       | _ -> error "copy-annotations: No input file specified" | ||||||
|       end |       end | ||||||
|   | Some ListAnnotations -> |   | Some ListAnnotations -> | ||||||
|       Cpdf.list_annotations ~json:args.format_json args.encoding (get_single_pdf args.op true) |       Cpdfannot.list_annotations ~json:args.format_json args.encoding (get_single_pdf args.op true) | ||||||
|   | Some Shift -> |   | Some Shift -> | ||||||
|       let pdf = get_single_pdf args.op false in |       let pdf = get_single_pdf args.op false in | ||||||
|         let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in |         let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in | ||||||
| @@ -3687,7 +3687,7 @@ let go () = | |||||||
|                args.relative_to_cropbox args.underneath range pdf) |                args.relative_to_cropbox args.underneath range pdf) | ||||||
|   | Some (AddBookmarks file) -> |   | Some (AddBookmarks file) -> | ||||||
|       write_pdf false |       write_pdf false | ||||||
|         (Cpdf.add_bookmarks ~json:args.format_json true (Pdfio.input_of_channel (open_in_bin file)) |         (Cpdfbookmarks.add_bookmarks ~json:args.format_json true (Pdfio.input_of_channel (open_in_bin file)) | ||||||
|           (get_single_pdf args.op false)) |           (get_single_pdf args.op false)) | ||||||
|   | Some RemoveBookmarks -> |   | Some RemoveBookmarks -> | ||||||
|       write_pdf false (Pdfmarks.remove_bookmarks (get_single_pdf args.op false)) |       write_pdf false (Pdfmarks.remove_bookmarks (get_single_pdf args.op false)) | ||||||
|   | |||||||
							
								
								
									
										42
									
								
								cpdffont.ml
									
									
									
									
									
								
							
							
						
						
									
										42
									
								
								cpdffont.ml
									
									
									
									
									
								
							| @@ -112,7 +112,7 @@ let missing_font pdf page (name, dict) = | |||||||
|       Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding |       Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding | ||||||
|  |  | ||||||
| let missing_fonts pdf range = | let missing_fonts pdf range = | ||||||
|   Cpdf.iter_pages |   Cpdfpage.iter_pages | ||||||
|     (fun num page -> |     (fun num page -> | ||||||
|        match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with |        match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with | ||||||
|        | Some (Pdf.Dictionary fontdict) -> |        | Some (Pdf.Dictionary fontdict) -> | ||||||
| @@ -257,3 +257,43 @@ let remove_fontdescriptor pdf = function | |||||||
| let remove_fonts pdf = | let remove_fonts pdf = | ||||||
|   Pdf.objiter (fun k v -> ignore (Pdf.addobj_given_num pdf (k, remove_fontdescriptor pdf v))) pdf; |   Pdf.objiter (fun k v -> ignore (Pdf.addobj_given_num pdf (k, remove_fontdescriptor pdf v))) pdf; | ||||||
|   pdf |   pdf | ||||||
|  |  | ||||||
|  | (* List fonts *) | ||||||
|  | let list_font pdf page (name, dict) = | ||||||
|  |   let subtype = | ||||||
|  |     match Pdf.lookup_direct pdf "/Subtype" dict with | ||||||
|  |     | Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n) | ||||||
|  |     | _ -> "" | ||||||
|  |   in let basefont = | ||||||
|  |     match Pdf.lookup_direct pdf "/BaseFont" dict with | ||||||
|  |     | Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n) | ||||||
|  |     | _ -> "" | ||||||
|  |   in let encoding = | ||||||
|  |    match Pdf.lookup_direct pdf "/Encoding" dict with | ||||||
|  |     | Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n) | ||||||
|  |     | _ -> "" | ||||||
|  |   in  | ||||||
|  |     (page, name, subtype, basefont, encoding) | ||||||
|  |  | ||||||
|  | let list_fonts pdf range = | ||||||
|  |   let pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|  |     flatten | ||||||
|  |       (map | ||||||
|  |         (fun (num, page) -> | ||||||
|  |            if mem num range then | ||||||
|  |              begin match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with | ||||||
|  |              | Some (Pdf.Dictionary fontdict) -> | ||||||
|  |                  map (list_font pdf num) fontdict | ||||||
|  |              | _ -> [] | ||||||
|  |              end | ||||||
|  |            else | ||||||
|  |              []) | ||||||
|  |         (combine (ilist 1 (length pages)) pages)) | ||||||
|  |  | ||||||
|  | let string_of_font (p, n, s, b, e) = | ||||||
|  |   Printf.sprintf "%i %s %s %s %s\n" p n s b e | ||||||
|  |  | ||||||
|  | let print_fonts pdf range = | ||||||
|  |   flprint | ||||||
|  |     (fold_left ( ^ ) "" (map string_of_font (list_fonts pdf range))) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -9,3 +9,12 @@ val print_font_table : Pdf.t -> string -> int -> unit | |||||||
| val extract_fontfile : int -> string -> Pdf.t -> unit | val extract_fontfile : int -> string -> Pdf.t -> unit | ||||||
|  |  | ||||||
| val remove_fonts : Pdf.t -> Pdf.t | val remove_fonts : Pdf.t -> Pdf.t | ||||||
|  |  | ||||||
|  | (** {2 Listing fonts} *) | ||||||
|  |  | ||||||
|  | (** Print font list to stdout *) | ||||||
|  | val print_fonts : Pdf.t -> int list -> unit | ||||||
|  |  | ||||||
|  | (** Return font list. Page number, name, subtype, basefont, encoding.  *) | ||||||
|  | val list_fonts : Pdf.t -> int list -> (int * string * string * string * string) list | ||||||
|  |  | ||||||
|   | |||||||
| @@ -656,6 +656,5 @@ let set_open_action pdf fit pagenumber = | |||||||
|                     {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} |                     {pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'} | ||||||
|           | None -> error "bad root" |           | None -> error "bad root" | ||||||
|  |  | ||||||
|  |  | ||||||
| let set_version v pdf = | let set_version v pdf = | ||||||
|   pdf.Pdf.minor <- v |   pdf.Pdf.minor <- v | ||||||
|   | |||||||
							
								
								
									
										61
									
								
								cpdfpage.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								cpdfpage.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | |||||||
|  | open Pdfutil | ||||||
|  |  | ||||||
|  | (* Output information for each page *) | ||||||
|  | let output_page_info pdf range = | ||||||
|  |   let pages = Pdfpage.pages_of_pagetree pdf | ||||||
|  |   and labels = Pdfpagelabels.read pdf in | ||||||
|  |     let getbox page box = | ||||||
|  |       if box = "/MediaBox" then | ||||||
|  |         match page.Pdfpage.mediabox with | ||||||
|  |         | Pdf.Array [a; b; c; d] -> | ||||||
|  |            Printf.sprintf "%f %f %f %f" | ||||||
|  |              (Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d) | ||||||
|  |         | _ -> "" | ||||||
|  |       else | ||||||
|  |         match Pdf.lookup_direct pdf box page.Pdfpage.rest with | ||||||
|  |         | Some (Pdf.Array [a; b; c; d]) -> | ||||||
|  |            Printf.sprintf "%f %f %f %f" | ||||||
|  |              (Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d) | ||||||
|  |         | _ -> "" | ||||||
|  |     and rotation page = | ||||||
|  |       Pdfpage.int_of_rotation page.Pdfpage.rotate | ||||||
|  |     in | ||||||
|  |       iter | ||||||
|  |         (fun pnum -> | ||||||
|  |            let page = select pnum pages in | ||||||
|  |              Printf.printf "Page %i:\n" pnum; | ||||||
|  |              Printf.printf "Label: %s\n" | ||||||
|  |                (try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> ""); | ||||||
|  |              Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox"); | ||||||
|  |              Printf.printf "CropBox: %s\n" (getbox page "/CropBox"); | ||||||
|  |              Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox"); | ||||||
|  |              Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox"); | ||||||
|  |              Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox"); | ||||||
|  |              Printf.printf "Rotation: %i\n" (rotation page)) | ||||||
|  |         range | ||||||
|  |  | ||||||
|  | let process_pages f pdf range = | ||||||
|  |   let pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|  |     let pages', pagenumbers, matrices = (* new page objects, page number, matrix *) | ||||||
|  |       split3 | ||||||
|  |         (map2 | ||||||
|  |           (fun n p -> if mem n range then f n p else (p, n, Pdftransform.i_matrix)) | ||||||
|  |           (ilist 1 (length pages)) | ||||||
|  |           pages) | ||||||
|  |     in | ||||||
|  |       Pdfpage.change_pages ~matrices:(combine pagenumbers matrices) true pdf pages' | ||||||
|  |  | ||||||
|  | let iter_pages f pdf range = | ||||||
|  |   let pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|  |     iter2 | ||||||
|  |       (fun n p -> if mem n range then f n p) | ||||||
|  |       (ilist 1 (length pages)) | ||||||
|  |       pages | ||||||
|  |  | ||||||
|  | let map_pages f pdf range = | ||||||
|  |   let pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|  |     option_map2 | ||||||
|  |       (fun n p -> if mem n range then Some (f n p) else None) | ||||||
|  |       (ilist 1 (length pages)) | ||||||
|  |       pages | ||||||
|  |  | ||||||
							
								
								
									
										14
									
								
								cpdfpage.mli
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								cpdfpage.mli
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | |||||||
|  | (** Print page info (Mediabox etc) to standard output. *) | ||||||
|  | val output_page_info : Pdf.t -> int list -> unit | ||||||
|  |  | ||||||
|  | (** Given a function from page number and page to page, a document, and a list | ||||||
|  | of page numbers to apply it to, apply the function to all those pages. *) | ||||||
|  | val process_pages : (int -> Pdfpage.t -> Pdfpage.t * int * Pdftransform.transform_matrix) -> | ||||||
|  |                     Pdf.t -> int list -> Pdf.t | ||||||
|  |  | ||||||
|  | (** Same as [process_pages], but iterate rather than map. *) | ||||||
|  | val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit | ||||||
|  |  | ||||||
|  | (** Same as [process_pages] but return the list of outputs of the map function. *) | ||||||
|  | val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list | ||||||
|  |  | ||||||
		Reference in New Issue
	
	Block a user