diff --git a/Makefile b/Makefile index 79472fa..45511c8 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Build the cpdf command line tools -NONDOC = cpdfyojson cpdfxmlm cpdfutil +NONDOC = cpdfyojson cpdfxmlm DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \ cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \ diff --git a/cpdfaddtext.ml b/cpdfaddtext.ml index 67dab6f..e1f62ca 100644 --- a/cpdfaddtext.ml +++ b/cpdfaddtext.ml @@ -396,7 +396,7 @@ let addtext then Pdfpage.prepend_operators pdf ops ~fast:fast page else Pdfpage.postpend_operators pdf ops ~fast:fast page in - Cpdfpage.process_pages (Cpdfutil.ppstub addtext_page) pdf pages + Cpdfpage.process_pages (Pdfpage.ppstub addtext_page) pdf pages (* Prev is a list of lists of characters *) let split_at_newline t = @@ -611,4 +611,4 @@ let addrectangle then Pdfpage.prepend_operators pdf ops ~fast:fast page else Pdfpage.postpend_operators pdf ops ~fast:fast page in - Cpdfpage.process_pages (Cpdfutil.ppstub addrectangle_page) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub addrectangle_page) pdf range diff --git a/cpdfannot.ml b/cpdfannot.ml index 8f467fc..50e2701 100644 --- a/cpdfannot.ml +++ b/cpdfannot.ml @@ -326,4 +326,4 @@ let remove_annotations range pdf = else page in - Cpdfpage.process_pages (Cpdfutil.ppstub remove_annotations_page) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub remove_annotations_page) pdf range diff --git a/cpdfimpose.ml b/cpdfimpose.ml index 9577dd2..f2491c1 100644 --- a/cpdfimpose.ml +++ b/cpdfimpose.ml @@ -132,7 +132,7 @@ let impose_pages fit x y columns rtl btt center margin output_mediabox fast fit_ (r, List.hd r)*) pages, List.hd pages in - let resources' = pair_reduce (Cpdfutil.combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in + let resources' = pair_reduce (Pdfpage.combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in let content' = let transform_stream transform contents = diff --git a/cpdfpage.ml b/cpdfpage.ml index 6a2d136..9bc350d 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -66,7 +66,7 @@ let map_pages f pdf range = * *) let hard_box pdf range boxname mediabox_if_missing fast = process_pages - (Cpdfutil.ppstub (fun pagenum page -> + (Pdfpage.ppstub (fun pagenum page -> let minx, miny, maxx, maxy = if boxname = "/MediaBox" then Pdf.parse_rectangle pdf page.Pdfpage.mediabox @@ -179,12 +179,12 @@ let set_mediabox xywhlist pdf range = [Pdf.Real x; Pdf.Real y; Pdf.Real (x +. w); Pdf.Real (y +. h)])} in - process_pages (Cpdfutil.ppstub crop_page) pdf range + process_pages (Pdfpage.ppstub crop_page) pdf range (* If a cropbox exists, make it the mediabox. If not, change nothing. *) let copy_cropbox_to_mediabox pdf range = process_pages - (Cpdfutil.ppstub (fun _ page -> + (Pdfpage.ppstub (fun _ page -> match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with | Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject} | None -> page)) @@ -197,7 +197,7 @@ let remove_cropping_pdf pdf range = Pdfpage.rest = (Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")} in - process_pages (Cpdfutil.ppstub remove_cropping_page) pdf range + process_pages (Pdfpage.ppstub remove_cropping_page) pdf range let remove_trim_pdf pdf range = let remove_trim_page _ page = @@ -205,7 +205,7 @@ let remove_trim_pdf pdf range = Pdfpage.rest = (Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")} in - process_pages (Cpdfutil.ppstub remove_trim_page) pdf range + process_pages (Pdfpage.ppstub remove_trim_page) pdf range let remove_art_pdf pdf range = let remove_art_page _ page = @@ -213,7 +213,7 @@ let remove_art_pdf pdf range = Pdfpage.rest = (Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")} in - process_pages (Cpdfutil.ppstub remove_art_page) pdf range + process_pages (Pdfpage.ppstub remove_art_page) pdf range let remove_bleed_pdf pdf range = let remove_bleed_page _ page = @@ -221,7 +221,7 @@ let remove_bleed_pdf pdf range = Pdfpage.rest = (Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")} in - process_pages (Cpdfutil.ppstub remove_bleed_page) pdf range + process_pages (Pdfpage.ppstub remove_bleed_page) pdf range (* Upright functionality *) @@ -295,14 +295,14 @@ let rotate_pdf r pdf range = {page with Pdfpage.rotate = Pdfpage.rotation_of_int r} in - process_pages (Cpdfutil.ppstub rotate_page) pdf range + process_pages (Pdfpage.ppstub rotate_page) pdf range let rotate_pdf_by r pdf range = let rotate_page_by _ page = {page with Pdfpage.rotate = Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)} in - process_pages (Cpdfutil.ppstub rotate_page_by) pdf range + process_pages (Pdfpage.ppstub rotate_page_by) pdf range let rotate_page_contents ~fast rotpoint r pdf pnum page = let rotation_point = @@ -537,7 +537,7 @@ let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isov Pdfpage.rest = combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest; Pdfpage.resources = - Cpdfutil.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} + Pdfpage.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf = let prefix = Pdfpage.shortest_unused_prefix pdf in @@ -661,7 +661,7 @@ let setBox box minx maxx miny maxy pdf range = page.Pdfpage.rest box (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} in - process_pages (Cpdfutil.ppstub set_box_page) pdf range + process_pages (Pdfpage.ppstub set_box_page) pdf range (* Cropping *) let crop_pdf ?(box="/CropBox") xywhlist pdf range = @@ -676,7 +676,7 @@ let crop_pdf ?(box="/CropBox") xywhlist pdf range = [Pdf.Real x; Pdf.Real y; Pdf.Real (x +. w); Pdf.Real (y +. h)])))} in - process_pages (Cpdfutil.ppstub crop_page) pdf range + process_pages (Pdfpage.ppstub crop_page) pdf range (* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. * @@ -722,7 +722,7 @@ let show_boxes_page fast pdf _ page = Pdfpage.postpend_operators pdf ops ~fast page let show_boxes ?(fast=false) pdf range = - process_pages (Cpdfutil.ppstub (show_boxes_page fast pdf)) pdf range + process_pages (Pdfpage.ppstub (show_boxes_page fast pdf)) pdf range let allowance = 9. @@ -754,14 +754,14 @@ let trim_marks_page fast pdf n page = page let trim_marks ?(fast=false) pdf range = - process_pages (Cpdfutil.ppstub (trim_marks_page fast pdf)) pdf range + process_pages (Pdfpage.ppstub (trim_marks_page fast pdf)) pdf range (* copy the contents of the box f to the box t. If mediabox_if_missing is set, the contents of the mediabox will be used if the from fox is not available. If mediabox_is_missing is false, the page is unaltered. *) let copy_box f t mediabox_if_missing pdf range = process_pages - (Cpdfutil.ppstub (fun _ page -> + (Pdfpage.ppstub (fun _ page -> if f = "/MediaBox" then {page with Pdfpage.rest = (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} diff --git a/cpdfremovetext.ml b/cpdfremovetext.ml index da029e6..9341c07 100644 --- a/cpdfremovetext.ml +++ b/cpdfremovetext.ml @@ -26,7 +26,7 @@ let removetext range pdf = let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in [Pdfops.stream_of_ops (remove_stamps [] ops)]} in - Cpdfpage.process_pages (Cpdfutil.ppstub removetext_page) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub removetext_page) pdf range let rec remove_all_text_ops pdf resources content = let is_textop = function @@ -43,7 +43,7 @@ let rec remove_all_text_ops pdf resources content = let remove_all_text_page pdf p = let resources = p.Pdfpage.resources in let content = p.Pdfpage.content in - Cpdfutil.process_xobjects pdf p remove_all_text_ops; + Pdfpage.process_xobjects pdf p remove_all_text_ops; {p with Pdfpage.content = remove_all_text_ops pdf resources content}, pdf let remove_all_text range pdf = diff --git a/cpdftweak.ml b/cpdftweak.ml index 8582a04..8f04104 100644 --- a/cpdftweak.ml +++ b/cpdftweak.ml @@ -87,10 +87,10 @@ let blacktext c range pdf = let content' = blacktext_ops c pdf page.Pdfpage.resources page.Pdfpage.content in - Cpdfutil.process_xobjects pdf page (blacktext_ops c); + Pdfpage.process_xobjects pdf page (blacktext_ops c); {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (Cpdfutil.ppstub blacktext_page) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub blacktext_page) pdf range (* Blacken lines *) let blacklines_ops c pdf resources content = @@ -113,10 +113,10 @@ let blacklines c range pdf = let content' = blacklines_ops c pdf page.Pdfpage.resources page.Pdfpage.content in - Cpdfutil.process_xobjects pdf page (blacklines_ops c); + Pdfpage.process_xobjects pdf page (blacklines_ops c); {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (Cpdfutil.ppstub blacklines_page) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub blacklines_page) pdf range (* Blacken Fills *) let blackfills_ops c pdf resources content = @@ -139,10 +139,10 @@ let blackfills c range pdf = let content' = blackfills_ops c pdf page.Pdfpage.resources page.Pdfpage.content in - Cpdfutil.process_xobjects pdf page (blackfills_ops c); + Pdfpage.process_xobjects pdf page (blackfills_ops c); {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (Cpdfutil.ppstub blackfills_page) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub blackfills_page) pdf range (* Set a minimum line width to avoid dropout *) let thinlines range width pdf = @@ -217,7 +217,7 @@ let thinlines range width pdf = let content' = [Pdfops.stream_of_ops operators] in {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (Cpdfutil.ppstub thinpage) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub thinpage) pdf range (* Parse the new content to make sure syntactically ok, append * as required. Rewrite the content *) @@ -229,7 +229,7 @@ let append_page_content_page fast s before pdf n page = pdf ops ~fast page let append_page_content s before fast range pdf = - Cpdfpage.process_pages (Cpdfutil.ppstub (append_page_content_page fast s before pdf)) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub (append_page_content_page fast s before pdf)) pdf range let rec dict_entry_single_object f pdf = function | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d) @@ -303,10 +303,10 @@ let remove_clipping pdf range = let content' = remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content in - Cpdfutil.process_xobjects pdf page remove_clipping_ops; + Pdfpage.process_xobjects pdf page remove_clipping_ops; {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (Cpdfutil.ppstub remove_clipping_page) pdf range + Cpdfpage.process_pages (Pdfpage.ppstub remove_clipping_page) pdf range let remove_unused_resources_page pdf n page = let xobjects, all_names = @@ -324,4 +324,4 @@ let remove_unused_resources_page pdf n page = {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} let remove_unused_resources pdf = - Cpdfpage.process_pages (Cpdfutil.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) + Cpdfpage.process_pages (Pdfpage.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) diff --git a/cpdfutil.ml b/cpdfutil.ml deleted file mode 100644 index 8b9cd23..0000000 --- a/cpdfutil.ml +++ /dev/null @@ -1,136 +0,0 @@ -open Pdfutil - -(* FIXME: All of this should probably be pushed down into CamlPDF *) - -(* For uses of process_pages which don't need to deal with matrices, this - function transforms into one which returns the identity matrix *) -let ppstub f n p = (f n p, n, Pdftransform.i_matrix) - -(* These may move into CamlPDF at some point *) -let process_xobject f pdf resources i = - let xobj = Pdf.lookup_obj pdf i in - match Pdf.lookup_direct pdf "/Subtype" xobj with - | None -> raise (Pdf.PDFError "No /Subtype in Xobject") - | Some (Pdf.Name "/Form") -> - Pdf.getstream xobj; - begin match xobj with - | Pdf.Stream ({contents = Pdf.Dictionary dict, Pdf.Got bytes} as rf) -> - begin match f pdf resources [Pdf.Stream rf] with - | [Pdf.Stream {contents = (Pdf.Dictionary dict', data)}] -> - let dict' = - Pdf.remove_dict_entry - (Pdf.Dictionary (mergedict dict dict')) - "/Filter" - in - rf := (dict', data) - | _ -> assert false - end - | _ -> assert false (* getstream would have complained already *) - end - | Some _ -> () - -let process_xobjects pdf page f = - match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with - | Some (Pdf.Dictionary elts) -> - iter - (fun (k, v) -> - match v with - | Pdf.Indirect i -> process_xobject f pdf page.Pdfpage.resources i - | _ -> raise (Pdf.PDFError "process_xobject")) - elts - | _ -> () - -(*(* The content transformed by altering any use of [Op_cm]. But we must also -alter any /Matrix entries in pattern dictionaries *) -let change_pattern_matrices_resources pdf tr resources = - try - begin match Pdf.lookup_direct pdf "/Pattern" resources with - | Some (Pdf.Dictionary patterns) -> - let entries = - map - (fun (name, p) -> - (*Printf.printf "Changing matrices of pattern %s\n" name;*) - let old_pattern = Pdf.direct pdf p in - let new_pattern = - let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in - let new_tr = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) existing_tr in - Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr) - in - name, Pdf.Indirect (Pdf.addobj pdf new_pattern)) - patterns - in - Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries) - | _ -> resources - end - with - Pdftransform.NonInvertable -> - Printf.eprintf "Warning: noninvertible matrix\n%!"; - resources - -let change_pattern_matrices_page pdf tr page = - let page = - {page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources} - in - match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with - | Some (Pdf.Dictionary elts) -> - iter - (fun (k, v) -> - match v with - | Pdf.Indirect i -> - (* Check if it's a form XObject. If so, rewrite its resources and add back as same number. *) - begin match Pdf.lookup_direct pdf "/Subtype" v with - | Some (Pdf.Name "/Form") -> - (*Printf.printf "Processing form xobject %s for patterns\n" k; *) - let form_xobject = Pdf.lookup_obj pdf i in - begin match Pdf.lookup_direct pdf "/Resources" form_xobject with - | Some resources -> - let form_xobject' = - Pdf.add_dict_entry form_xobject "/Resources" (change_pattern_matrices_resources pdf tr resources) - in - Pdf.addobj_given_num pdf (i, form_xobject') - | _ -> () - end - | _ -> () - end; - | _ -> raise (Pdf.PDFError "change_pattern_matrices_page")) - elts; - page - | _ -> page*) - -(* Union two resource dictionaries from the same PDF. *) -let combine_pdf_resources pdf a b = - let a_entries = - match a with - | Pdf.Dictionary entries -> entries - | _ -> [] - in let b_entries = - match b with - | Pdf.Dictionary entries -> entries - | _ -> [] - in - let resource_keys = - ["/Font"; "/ExtGState"; "/ColorSpace"; "/Pattern"; - "/Shading"; "/XObject"; "/Properties"] - in - let combine_entries key = - let a_entries = - match Pdf.lookup_direct pdf key a with - | Some (Pdf.Dictionary d) -> d - | _ -> [] - in let b_entries = - match Pdf.lookup_direct pdf key b with - | Some (Pdf.Dictionary d) -> d - | _ -> [] - in - if a_entries = [] && b_entries = [] then - None - else - Some (key, Pdf.Dictionary (a_entries @ b_entries)) - in - let unknown_keys_a = lose (fun (k, _) -> mem k resource_keys) a_entries in - let unknown_keys_b = lose (fun (k, _) -> mem k resource_keys) b_entries in - let combined_known_entries = option_map combine_entries resource_keys in - fold_left - (fun dict (k, v) -> Pdf.add_dict_entry dict k v) - (Pdf.Dictionary []) - (unknown_keys_a @ unknown_keys_b @ combined_known_entries) diff --git a/cpdfutil.mli b/cpdfutil.mli deleted file mode 100644 index 7732bf2..0000000 --- a/cpdfutil.mli +++ /dev/null @@ -1,11 +0,0 @@ -val process_xobjects : Pdf.t -> - Pdfpage.t -> - (Pdf.t -> - Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) -> - unit - -(*val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t*) - -val combine_pdf_resources : Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject -> Pdf.pdfobject - -val ppstub : ('a -> 'b -> 'c) -> 'a -> 'b -> 'c * 'a * Pdftransform.transform_matrix