This commit is contained in:
John Whitington 2023-03-23 19:24:50 +00:00
parent c2e89565a2
commit e957abab75
9 changed files with 33 additions and 180 deletions

View File

@ -1,5 +1,5 @@
# Build the cpdf command line tools # Build the cpdf command line tools
NONDOC = cpdfyojson cpdfxmlm cpdfutil NONDOC = cpdfyojson cpdfxmlm
DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \ DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \
cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \ cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \

View File

@ -396,7 +396,7 @@ let addtext
then Pdfpage.prepend_operators pdf ops ~fast:fast page then Pdfpage.prepend_operators pdf ops ~fast:fast page
else Pdfpage.postpend_operators pdf ops ~fast:fast page else Pdfpage.postpend_operators pdf ops ~fast:fast page
in in
Cpdfpage.process_pages (Cpdfutil.ppstub addtext_page) pdf pages Cpdfpage.process_pages (Pdfpage.ppstub addtext_page) pdf pages
(* Prev is a list of lists of characters *) (* Prev is a list of lists of characters *)
let split_at_newline t = let split_at_newline t =
@ -611,4 +611,4 @@ let addrectangle
then Pdfpage.prepend_operators pdf ops ~fast:fast page then Pdfpage.prepend_operators pdf ops ~fast:fast page
else Pdfpage.postpend_operators pdf ops ~fast:fast page else Pdfpage.postpend_operators pdf ops ~fast:fast page
in in
Cpdfpage.process_pages (Cpdfutil.ppstub addrectangle_page) pdf range Cpdfpage.process_pages (Pdfpage.ppstub addrectangle_page) pdf range

View File

@ -326,4 +326,4 @@ let remove_annotations range pdf =
else else
page page
in in
Cpdfpage.process_pages (Cpdfutil.ppstub remove_annotations_page) pdf range Cpdfpage.process_pages (Pdfpage.ppstub remove_annotations_page) pdf range

View File

@ -132,7 +132,7 @@ let impose_pages fit x y columns rtl btt center margin output_mediabox fast fit_
(r, List.hd r)*) (r, List.hd r)*)
pages, List.hd pages pages, List.hd pages
in in
let resources' = pair_reduce (Cpdfutil.combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in let resources' = pair_reduce (Pdfpage.combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in
let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in
let content' = let content' =
let transform_stream transform contents = let transform_stream transform contents =

View File

@ -66,7 +66,7 @@ let map_pages f pdf range =
* *) * *)
let hard_box pdf range boxname mediabox_if_missing fast = let hard_box pdf range boxname mediabox_if_missing fast =
process_pages process_pages
(Cpdfutil.ppstub (fun pagenum page -> (Pdfpage.ppstub (fun pagenum page ->
let minx, miny, maxx, maxy = let minx, miny, maxx, maxy =
if boxname = "/MediaBox" then if boxname = "/MediaBox" then
Pdf.parse_rectangle pdf page.Pdfpage.mediabox Pdf.parse_rectangle pdf page.Pdfpage.mediabox
@ -179,12 +179,12 @@ let set_mediabox xywhlist pdf range =
[Pdf.Real x; Pdf.Real y; [Pdf.Real x; Pdf.Real y;
Pdf.Real (x +. w); Pdf.Real (y +. h)])} Pdf.Real (x +. w); Pdf.Real (y +. h)])}
in in
process_pages (Cpdfutil.ppstub crop_page) pdf range process_pages (Pdfpage.ppstub crop_page) pdf range
(* If a cropbox exists, make it the mediabox. If not, change nothing. *) (* If a cropbox exists, make it the mediabox. If not, change nothing. *)
let copy_cropbox_to_mediabox pdf range = let copy_cropbox_to_mediabox pdf range =
process_pages process_pages
(Cpdfutil.ppstub (fun _ page -> (Pdfpage.ppstub (fun _ page ->
match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
| Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject} | Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject}
| None -> page)) | None -> page))
@ -197,7 +197,7 @@ let remove_cropping_pdf pdf range =
Pdfpage.rest = Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")} (Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")}
in in
process_pages (Cpdfutil.ppstub remove_cropping_page) pdf range process_pages (Pdfpage.ppstub remove_cropping_page) pdf range
let remove_trim_pdf pdf range = let remove_trim_pdf pdf range =
let remove_trim_page _ page = let remove_trim_page _ page =
@ -205,7 +205,7 @@ let remove_trim_pdf pdf range =
Pdfpage.rest = Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")} (Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")}
in in
process_pages (Cpdfutil.ppstub remove_trim_page) pdf range process_pages (Pdfpage.ppstub remove_trim_page) pdf range
let remove_art_pdf pdf range = let remove_art_pdf pdf range =
let remove_art_page _ page = let remove_art_page _ page =
@ -213,7 +213,7 @@ let remove_art_pdf pdf range =
Pdfpage.rest = Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")} (Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")}
in in
process_pages (Cpdfutil.ppstub remove_art_page) pdf range process_pages (Pdfpage.ppstub remove_art_page) pdf range
let remove_bleed_pdf pdf range = let remove_bleed_pdf pdf range =
let remove_bleed_page _ page = let remove_bleed_page _ page =
@ -221,7 +221,7 @@ let remove_bleed_pdf pdf range =
Pdfpage.rest = Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")} (Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")}
in in
process_pages (Cpdfutil.ppstub remove_bleed_page) pdf range process_pages (Pdfpage.ppstub remove_bleed_page) pdf range
(* Upright functionality *) (* Upright functionality *)
@ -295,14 +295,14 @@ let rotate_pdf r pdf range =
{page with Pdfpage.rotate = {page with Pdfpage.rotate =
Pdfpage.rotation_of_int r} Pdfpage.rotation_of_int r}
in in
process_pages (Cpdfutil.ppstub rotate_page) pdf range process_pages (Pdfpage.ppstub rotate_page) pdf range
let rotate_pdf_by r pdf range = let rotate_pdf_by r pdf range =
let rotate_page_by _ page = let rotate_page_by _ page =
{page with Pdfpage.rotate = {page with Pdfpage.rotate =
Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)} Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)}
in in
process_pages (Cpdfutil.ppstub rotate_page_by) pdf range process_pages (Pdfpage.ppstub rotate_page_by) pdf range
let rotate_page_contents ~fast rotpoint r pdf pnum page = let rotate_page_contents ~fast rotpoint r pdf pnum page =
let rotation_point = let rotation_point =
@ -537,7 +537,7 @@ let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isov
Pdfpage.rest = Pdfpage.rest =
combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest; combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest;
Pdfpage.resources = Pdfpage.resources =
Cpdfutil.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} Pdfpage.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources}
let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf = let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf =
let prefix = Pdfpage.shortest_unused_prefix pdf in let prefix = Pdfpage.shortest_unused_prefix pdf in
@ -661,7 +661,7 @@ let setBox box minx maxx miny maxy pdf range =
page.Pdfpage.rest box page.Pdfpage.rest box
(Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])}
in in
process_pages (Cpdfutil.ppstub set_box_page) pdf range process_pages (Pdfpage.ppstub set_box_page) pdf range
(* Cropping *) (* Cropping *)
let crop_pdf ?(box="/CropBox") xywhlist pdf range = let crop_pdf ?(box="/CropBox") xywhlist pdf range =
@ -676,7 +676,7 @@ let crop_pdf ?(box="/CropBox") xywhlist pdf range =
[Pdf.Real x; Pdf.Real y; [Pdf.Real x; Pdf.Real y;
Pdf.Real (x +. w); Pdf.Real (y +. h)])))} Pdf.Real (x +. w); Pdf.Real (y +. h)])))}
in in
process_pages (Cpdfutil.ppstub crop_page) pdf range process_pages (Pdfpage.ppstub crop_page) pdf range
(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. (* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes.
* *
@ -722,7 +722,7 @@ let show_boxes_page fast pdf _ page =
Pdfpage.postpend_operators pdf ops ~fast page Pdfpage.postpend_operators pdf ops ~fast page
let show_boxes ?(fast=false) pdf range = let show_boxes ?(fast=false) pdf range =
process_pages (Cpdfutil.ppstub (show_boxes_page fast pdf)) pdf range process_pages (Pdfpage.ppstub (show_boxes_page fast pdf)) pdf range
let allowance = 9. let allowance = 9.
@ -754,14 +754,14 @@ let trim_marks_page fast pdf n page =
page page
let trim_marks ?(fast=false) pdf range = let trim_marks ?(fast=false) pdf range =
process_pages (Cpdfutil.ppstub (trim_marks_page fast pdf)) pdf range process_pages (Pdfpage.ppstub (trim_marks_page fast pdf)) pdf range
(* copy the contents of the box f to the box t. If mediabox_if_missing is set, (* copy the contents of the box f to the box t. If mediabox_if_missing is set,
the contents of the mediabox will be used if the from fox is not available. If the contents of the mediabox will be used if the from fox is not available. If
mediabox_is_missing is false, the page is unaltered. *) mediabox_is_missing is false, the page is unaltered. *)
let copy_box f t mediabox_if_missing pdf range = let copy_box f t mediabox_if_missing pdf range =
process_pages process_pages
(Cpdfutil.ppstub (fun _ page -> (Pdfpage.ppstub (fun _ page ->
if f = "/MediaBox" then if f = "/MediaBox" then
{page with Pdfpage.rest = {page with Pdfpage.rest =
(Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))}

View File

@ -26,7 +26,7 @@ let removetext range pdf =
let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in
[Pdfops.stream_of_ops (remove_stamps [] ops)]} [Pdfops.stream_of_ops (remove_stamps [] ops)]}
in in
Cpdfpage.process_pages (Cpdfutil.ppstub removetext_page) pdf range Cpdfpage.process_pages (Pdfpage.ppstub removetext_page) pdf range
let rec remove_all_text_ops pdf resources content = let rec remove_all_text_ops pdf resources content =
let is_textop = function let is_textop = function
@ -43,7 +43,7 @@ let rec remove_all_text_ops pdf resources content =
let remove_all_text_page pdf p = let remove_all_text_page pdf p =
let resources = p.Pdfpage.resources in let resources = p.Pdfpage.resources in
let content = p.Pdfpage.content in let content = p.Pdfpage.content in
Cpdfutil.process_xobjects pdf p remove_all_text_ops; Pdfpage.process_xobjects pdf p remove_all_text_ops;
{p with Pdfpage.content = remove_all_text_ops pdf resources content}, pdf {p with Pdfpage.content = remove_all_text_ops pdf resources content}, pdf
let remove_all_text range pdf = let remove_all_text range pdf =

View File

@ -87,10 +87,10 @@ let blacktext c range pdf =
let content' = let content' =
blacktext_ops c pdf page.Pdfpage.resources page.Pdfpage.content blacktext_ops c pdf page.Pdfpage.resources page.Pdfpage.content
in in
Cpdfutil.process_xobjects pdf page (blacktext_ops c); Pdfpage.process_xobjects pdf page (blacktext_ops c);
{page with Pdfpage.content = content'} {page with Pdfpage.content = content'}
in in
Cpdfpage.process_pages (Cpdfutil.ppstub blacktext_page) pdf range Cpdfpage.process_pages (Pdfpage.ppstub blacktext_page) pdf range
(* Blacken lines *) (* Blacken lines *)
let blacklines_ops c pdf resources content = let blacklines_ops c pdf resources content =
@ -113,10 +113,10 @@ let blacklines c range pdf =
let content' = let content' =
blacklines_ops c pdf page.Pdfpage.resources page.Pdfpage.content blacklines_ops c pdf page.Pdfpage.resources page.Pdfpage.content
in in
Cpdfutil.process_xobjects pdf page (blacklines_ops c); Pdfpage.process_xobjects pdf page (blacklines_ops c);
{page with Pdfpage.content = content'} {page with Pdfpage.content = content'}
in in
Cpdfpage.process_pages (Cpdfutil.ppstub blacklines_page) pdf range Cpdfpage.process_pages (Pdfpage.ppstub blacklines_page) pdf range
(* Blacken Fills *) (* Blacken Fills *)
let blackfills_ops c pdf resources content = let blackfills_ops c pdf resources content =
@ -139,10 +139,10 @@ let blackfills c range pdf =
let content' = let content' =
blackfills_ops c pdf page.Pdfpage.resources page.Pdfpage.content blackfills_ops c pdf page.Pdfpage.resources page.Pdfpage.content
in in
Cpdfutil.process_xobjects pdf page (blackfills_ops c); Pdfpage.process_xobjects pdf page (blackfills_ops c);
{page with Pdfpage.content = content'} {page with Pdfpage.content = content'}
in in
Cpdfpage.process_pages (Cpdfutil.ppstub blackfills_page) pdf range Cpdfpage.process_pages (Pdfpage.ppstub blackfills_page) pdf range
(* Set a minimum line width to avoid dropout *) (* Set a minimum line width to avoid dropout *)
let thinlines range width pdf = let thinlines range width pdf =
@ -217,7 +217,7 @@ let thinlines range width pdf =
let content' = [Pdfops.stream_of_ops operators] in let content' = [Pdfops.stream_of_ops operators] in
{page with Pdfpage.content = content'} {page with Pdfpage.content = content'}
in in
Cpdfpage.process_pages (Cpdfutil.ppstub thinpage) pdf range Cpdfpage.process_pages (Pdfpage.ppstub thinpage) pdf range
(* Parse the new content to make sure syntactically ok, append (* Parse the new content to make sure syntactically ok, append
* as required. Rewrite the content *) * as required. Rewrite the content *)
@ -229,7 +229,7 @@ let append_page_content_page fast s before pdf n page =
pdf ops ~fast page pdf ops ~fast page
let append_page_content s before fast range pdf = let append_page_content s before fast range pdf =
Cpdfpage.process_pages (Cpdfutil.ppstub (append_page_content_page fast s before pdf)) pdf range Cpdfpage.process_pages (Pdfpage.ppstub (append_page_content_page fast s before pdf)) pdf range
let rec dict_entry_single_object f pdf = function let rec dict_entry_single_object f pdf = function
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d) | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d)
@ -303,10 +303,10 @@ let remove_clipping pdf range =
let content' = let content' =
remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content
in in
Cpdfutil.process_xobjects pdf page remove_clipping_ops; Pdfpage.process_xobjects pdf page remove_clipping_ops;
{page with Pdfpage.content = content'} {page with Pdfpage.content = content'}
in in
Cpdfpage.process_pages (Cpdfutil.ppstub remove_clipping_page) pdf range Cpdfpage.process_pages (Pdfpage.ppstub remove_clipping_page) pdf range
let remove_unused_resources_page pdf n page = let remove_unused_resources_page pdf n page =
let xobjects, all_names = let xobjects, all_names =
@ -324,4 +324,4 @@ let remove_unused_resources_page pdf n page =
{page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict}
let remove_unused_resources pdf = let remove_unused_resources pdf =
Cpdfpage.process_pages (Cpdfutil.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) Cpdfpage.process_pages (Pdfpage.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))

View File

@ -1,136 +0,0 @@
open Pdfutil
(* FIXME: All of this should probably be pushed down into CamlPDF *)
(* For uses of process_pages which don't need to deal with matrices, this
function transforms into one which returns the identity matrix *)
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
(* These may move into CamlPDF at some point *)
let process_xobject f pdf resources i =
let xobj = Pdf.lookup_obj pdf i in
match Pdf.lookup_direct pdf "/Subtype" xobj with
| None -> raise (Pdf.PDFError "No /Subtype in Xobject")
| Some (Pdf.Name "/Form") ->
Pdf.getstream xobj;
begin match xobj with
| Pdf.Stream ({contents = Pdf.Dictionary dict, Pdf.Got bytes} as rf) ->
begin match f pdf resources [Pdf.Stream rf] with
| [Pdf.Stream {contents = (Pdf.Dictionary dict', data)}] ->
let dict' =
Pdf.remove_dict_entry
(Pdf.Dictionary (mergedict dict dict'))
"/Filter"
in
rf := (dict', data)
| _ -> assert false
end
| _ -> assert false (* getstream would have complained already *)
end
| Some _ -> ()
let process_xobjects pdf page f =
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some (Pdf.Dictionary elts) ->
iter
(fun (k, v) ->
match v with
| Pdf.Indirect i -> process_xobject f pdf page.Pdfpage.resources i
| _ -> raise (Pdf.PDFError "process_xobject"))
elts
| _ -> ()
(*(* The content transformed by altering any use of [Op_cm]. But we must also
alter any /Matrix entries in pattern dictionaries *)
let change_pattern_matrices_resources pdf tr resources =
try
begin match Pdf.lookup_direct pdf "/Pattern" resources with
| Some (Pdf.Dictionary patterns) ->
let entries =
map
(fun (name, p) ->
(*Printf.printf "Changing matrices of pattern %s\n" name;*)
let old_pattern = Pdf.direct pdf p in
let new_pattern =
let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in
let new_tr = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) existing_tr in
Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr)
in
name, Pdf.Indirect (Pdf.addobj pdf new_pattern))
patterns
in
Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries)
| _ -> resources
end
with
Pdftransform.NonInvertable ->
Printf.eprintf "Warning: noninvertible matrix\n%!";
resources
let change_pattern_matrices_page pdf tr page =
let page =
{page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources}
in
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some (Pdf.Dictionary elts) ->
iter
(fun (k, v) ->
match v with
| Pdf.Indirect i ->
(* Check if it's a form XObject. If so, rewrite its resources and add back as same number. *)
begin match Pdf.lookup_direct pdf "/Subtype" v with
| Some (Pdf.Name "/Form") ->
(*Printf.printf "Processing form xobject %s for patterns\n" k; *)
let form_xobject = Pdf.lookup_obj pdf i in
begin match Pdf.lookup_direct pdf "/Resources" form_xobject with
| Some resources ->
let form_xobject' =
Pdf.add_dict_entry form_xobject "/Resources" (change_pattern_matrices_resources pdf tr resources)
in
Pdf.addobj_given_num pdf (i, form_xobject')
| _ -> ()
end
| _ -> ()
end;
| _ -> raise (Pdf.PDFError "change_pattern_matrices_page"))
elts;
page
| _ -> page*)
(* Union two resource dictionaries from the same PDF. *)
let combine_pdf_resources pdf a b =
let a_entries =
match a with
| Pdf.Dictionary entries -> entries
| _ -> []
in let b_entries =
match b with
| Pdf.Dictionary entries -> entries
| _ -> []
in
let resource_keys =
["/Font"; "/ExtGState"; "/ColorSpace"; "/Pattern";
"/Shading"; "/XObject"; "/Properties"]
in
let combine_entries key =
let a_entries =
match Pdf.lookup_direct pdf key a with
| Some (Pdf.Dictionary d) -> d
| _ -> []
in let b_entries =
match Pdf.lookup_direct pdf key b with
| Some (Pdf.Dictionary d) -> d
| _ -> []
in
if a_entries = [] && b_entries = [] then
None
else
Some (key, Pdf.Dictionary (a_entries @ b_entries))
in
let unknown_keys_a = lose (fun (k, _) -> mem k resource_keys) a_entries in
let unknown_keys_b = lose (fun (k, _) -> mem k resource_keys) b_entries in
let combined_known_entries = option_map combine_entries resource_keys in
fold_left
(fun dict (k, v) -> Pdf.add_dict_entry dict k v)
(Pdf.Dictionary [])
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)

View File

@ -1,11 +0,0 @@
val process_xobjects : Pdf.t ->
Pdfpage.t ->
(Pdf.t ->
Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) ->
unit
(*val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t*)
val combine_pdf_resources : Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject -> Pdf.pdfobject
val ppstub : ('a -> 'b -> 'c) -> 'a -> 'b -> 'c * 'a * Pdftransform.transform_matrix