This commit is contained in:
John Whitington 2023-03-23 19:24:50 +00:00
parent c2e89565a2
commit e957abab75
9 changed files with 33 additions and 180 deletions

View File

@ -1,5 +1,5 @@
# Build the cpdf command line tools
NONDOC = cpdfyojson cpdfxmlm cpdfutil
NONDOC = cpdfyojson cpdfxmlm
DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \
cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \

View File

@ -396,7 +396,7 @@ let addtext
then Pdfpage.prepend_operators pdf ops ~fast:fast page
else Pdfpage.postpend_operators pdf ops ~fast:fast page
in
Cpdfpage.process_pages (Cpdfutil.ppstub addtext_page) pdf pages
Cpdfpage.process_pages (Pdfpage.ppstub addtext_page) pdf pages
(* Prev is a list of lists of characters *)
let split_at_newline t =
@ -611,4 +611,4 @@ let addrectangle
then Pdfpage.prepend_operators pdf ops ~fast:fast page
else Pdfpage.postpend_operators pdf ops ~fast:fast page
in
Cpdfpage.process_pages (Cpdfutil.ppstub addrectangle_page) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub addrectangle_page) pdf range

View File

@ -326,4 +326,4 @@ let remove_annotations range pdf =
else
page
in
Cpdfpage.process_pages (Cpdfutil.ppstub remove_annotations_page) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub remove_annotations_page) pdf range

View File

@ -132,7 +132,7 @@ let impose_pages fit x y columns rtl btt center margin output_mediabox fast fit_
(r, List.hd r)*)
pages, List.hd pages
in
let resources' = pair_reduce (Cpdfutil.combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in
let resources' = pair_reduce (Pdfpage.combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in
let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in
let content' =
let transform_stream transform contents =

View File

@ -66,7 +66,7 @@ let map_pages f pdf range =
* *)
let hard_box pdf range boxname mediabox_if_missing fast =
process_pages
(Cpdfutil.ppstub (fun pagenum page ->
(Pdfpage.ppstub (fun pagenum page ->
let minx, miny, maxx, maxy =
if boxname = "/MediaBox" then
Pdf.parse_rectangle pdf page.Pdfpage.mediabox
@ -179,12 +179,12 @@ let set_mediabox xywhlist pdf range =
[Pdf.Real x; Pdf.Real y;
Pdf.Real (x +. w); Pdf.Real (y +. h)])}
in
process_pages (Cpdfutil.ppstub crop_page) pdf range
process_pages (Pdfpage.ppstub crop_page) pdf range
(* If a cropbox exists, make it the mediabox. If not, change nothing. *)
let copy_cropbox_to_mediabox pdf range =
process_pages
(Cpdfutil.ppstub (fun _ page ->
(Pdfpage.ppstub (fun _ page ->
match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
| Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject}
| None -> page))
@ -197,7 +197,7 @@ let remove_cropping_pdf pdf range =
Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")}
in
process_pages (Cpdfutil.ppstub remove_cropping_page) pdf range
process_pages (Pdfpage.ppstub remove_cropping_page) pdf range
let remove_trim_pdf pdf range =
let remove_trim_page _ page =
@ -205,7 +205,7 @@ let remove_trim_pdf pdf range =
Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")}
in
process_pages (Cpdfutil.ppstub remove_trim_page) pdf range
process_pages (Pdfpage.ppstub remove_trim_page) pdf range
let remove_art_pdf pdf range =
let remove_art_page _ page =
@ -213,7 +213,7 @@ let remove_art_pdf pdf range =
Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")}
in
process_pages (Cpdfutil.ppstub remove_art_page) pdf range
process_pages (Pdfpage.ppstub remove_art_page) pdf range
let remove_bleed_pdf pdf range =
let remove_bleed_page _ page =
@ -221,7 +221,7 @@ let remove_bleed_pdf pdf range =
Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")}
in
process_pages (Cpdfutil.ppstub remove_bleed_page) pdf range
process_pages (Pdfpage.ppstub remove_bleed_page) pdf range
(* Upright functionality *)
@ -295,14 +295,14 @@ let rotate_pdf r pdf range =
{page with Pdfpage.rotate =
Pdfpage.rotation_of_int r}
in
process_pages (Cpdfutil.ppstub rotate_page) pdf range
process_pages (Pdfpage.ppstub rotate_page) pdf range
let rotate_pdf_by r pdf range =
let rotate_page_by _ page =
{page with Pdfpage.rotate =
Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)}
in
process_pages (Cpdfutil.ppstub rotate_page_by) pdf range
process_pages (Pdfpage.ppstub rotate_page_by) pdf range
let rotate_page_contents ~fast rotpoint r pdf pnum page =
let rotation_point =
@ -537,7 +537,7 @@ let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isov
Pdfpage.rest =
combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest;
Pdfpage.resources =
Cpdfutil.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources}
Pdfpage.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources}
let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf =
let prefix = Pdfpage.shortest_unused_prefix pdf in
@ -661,7 +661,7 @@ let setBox box minx maxx miny maxy pdf range =
page.Pdfpage.rest box
(Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])}
in
process_pages (Cpdfutil.ppstub set_box_page) pdf range
process_pages (Pdfpage.ppstub set_box_page) pdf range
(* Cropping *)
let crop_pdf ?(box="/CropBox") xywhlist pdf range =
@ -676,7 +676,7 @@ let crop_pdf ?(box="/CropBox") xywhlist pdf range =
[Pdf.Real x; Pdf.Real y;
Pdf.Real (x +. w); Pdf.Real (y +. h)])))}
in
process_pages (Cpdfutil.ppstub crop_page) pdf range
process_pages (Pdfpage.ppstub crop_page) pdf range
(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes.
*
@ -722,7 +722,7 @@ let show_boxes_page fast pdf _ page =
Pdfpage.postpend_operators pdf ops ~fast page
let show_boxes ?(fast=false) pdf range =
process_pages (Cpdfutil.ppstub (show_boxes_page fast pdf)) pdf range
process_pages (Pdfpage.ppstub (show_boxes_page fast pdf)) pdf range
let allowance = 9.
@ -754,14 +754,14 @@ let trim_marks_page fast pdf n page =
page
let trim_marks ?(fast=false) pdf range =
process_pages (Cpdfutil.ppstub (trim_marks_page fast pdf)) pdf range
process_pages (Pdfpage.ppstub (trim_marks_page fast pdf)) pdf range
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
the contents of the mediabox will be used if the from fox is not available. If
mediabox_is_missing is false, the page is unaltered. *)
let copy_box f t mediabox_if_missing pdf range =
process_pages
(Cpdfutil.ppstub (fun _ page ->
(Pdfpage.ppstub (fun _ page ->
if f = "/MediaBox" then
{page with Pdfpage.rest =
(Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))}

View File

@ -26,7 +26,7 @@ let removetext range pdf =
let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in
[Pdfops.stream_of_ops (remove_stamps [] ops)]}
in
Cpdfpage.process_pages (Cpdfutil.ppstub removetext_page) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub removetext_page) pdf range
let rec remove_all_text_ops pdf resources content =
let is_textop = function
@ -43,7 +43,7 @@ let rec remove_all_text_ops pdf resources content =
let remove_all_text_page pdf p =
let resources = p.Pdfpage.resources in
let content = p.Pdfpage.content in
Cpdfutil.process_xobjects pdf p remove_all_text_ops;
Pdfpage.process_xobjects pdf p remove_all_text_ops;
{p with Pdfpage.content = remove_all_text_ops pdf resources content}, pdf
let remove_all_text range pdf =

View File

@ -87,10 +87,10 @@ let blacktext c range pdf =
let content' =
blacktext_ops c pdf page.Pdfpage.resources page.Pdfpage.content
in
Cpdfutil.process_xobjects pdf page (blacktext_ops c);
Pdfpage.process_xobjects pdf page (blacktext_ops c);
{page with Pdfpage.content = content'}
in
Cpdfpage.process_pages (Cpdfutil.ppstub blacktext_page) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub blacktext_page) pdf range
(* Blacken lines *)
let blacklines_ops c pdf resources content =
@ -113,10 +113,10 @@ let blacklines c range pdf =
let content' =
blacklines_ops c pdf page.Pdfpage.resources page.Pdfpage.content
in
Cpdfutil.process_xobjects pdf page (blacklines_ops c);
Pdfpage.process_xobjects pdf page (blacklines_ops c);
{page with Pdfpage.content = content'}
in
Cpdfpage.process_pages (Cpdfutil.ppstub blacklines_page) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub blacklines_page) pdf range
(* Blacken Fills *)
let blackfills_ops c pdf resources content =
@ -139,10 +139,10 @@ let blackfills c range pdf =
let content' =
blackfills_ops c pdf page.Pdfpage.resources page.Pdfpage.content
in
Cpdfutil.process_xobjects pdf page (blackfills_ops c);
Pdfpage.process_xobjects pdf page (blackfills_ops c);
{page with Pdfpage.content = content'}
in
Cpdfpage.process_pages (Cpdfutil.ppstub blackfills_page) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub blackfills_page) pdf range
(* Set a minimum line width to avoid dropout *)
let thinlines range width pdf =
@ -217,7 +217,7 @@ let thinlines range width pdf =
let content' = [Pdfops.stream_of_ops operators] in
{page with Pdfpage.content = content'}
in
Cpdfpage.process_pages (Cpdfutil.ppstub thinpage) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub thinpage) pdf range
(* Parse the new content to make sure syntactically ok, append
* as required. Rewrite the content *)
@ -229,7 +229,7 @@ let append_page_content_page fast s before pdf n page =
pdf ops ~fast page
let append_page_content s before fast range pdf =
Cpdfpage.process_pages (Cpdfutil.ppstub (append_page_content_page fast s before pdf)) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub (append_page_content_page fast s before pdf)) pdf range
let rec dict_entry_single_object f pdf = function
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d)
@ -303,10 +303,10 @@ let remove_clipping pdf range =
let content' =
remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content
in
Cpdfutil.process_xobjects pdf page remove_clipping_ops;
Pdfpage.process_xobjects pdf page remove_clipping_ops;
{page with Pdfpage.content = content'}
in
Cpdfpage.process_pages (Cpdfutil.ppstub remove_clipping_page) pdf range
Cpdfpage.process_pages (Pdfpage.ppstub remove_clipping_page) pdf range
let remove_unused_resources_page pdf n page =
let xobjects, all_names =
@ -324,4 +324,4 @@ let remove_unused_resources_page pdf n page =
{page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict}
let remove_unused_resources pdf =
Cpdfpage.process_pages (Cpdfutil.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))
Cpdfpage.process_pages (Pdfpage.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))

View File

@ -1,136 +0,0 @@
open Pdfutil
(* FIXME: All of this should probably be pushed down into CamlPDF *)
(* For uses of process_pages which don't need to deal with matrices, this
function transforms into one which returns the identity matrix *)
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
(* These may move into CamlPDF at some point *)
let process_xobject f pdf resources i =
let xobj = Pdf.lookup_obj pdf i in
match Pdf.lookup_direct pdf "/Subtype" xobj with
| None -> raise (Pdf.PDFError "No /Subtype in Xobject")
| Some (Pdf.Name "/Form") ->
Pdf.getstream xobj;
begin match xobj with
| Pdf.Stream ({contents = Pdf.Dictionary dict, Pdf.Got bytes} as rf) ->
begin match f pdf resources [Pdf.Stream rf] with
| [Pdf.Stream {contents = (Pdf.Dictionary dict', data)}] ->
let dict' =
Pdf.remove_dict_entry
(Pdf.Dictionary (mergedict dict dict'))
"/Filter"
in
rf := (dict', data)
| _ -> assert false
end
| _ -> assert false (* getstream would have complained already *)
end
| Some _ -> ()
let process_xobjects pdf page f =
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some (Pdf.Dictionary elts) ->
iter
(fun (k, v) ->
match v with
| Pdf.Indirect i -> process_xobject f pdf page.Pdfpage.resources i
| _ -> raise (Pdf.PDFError "process_xobject"))
elts
| _ -> ()
(*(* The content transformed by altering any use of [Op_cm]. But we must also
alter any /Matrix entries in pattern dictionaries *)
let change_pattern_matrices_resources pdf tr resources =
try
begin match Pdf.lookup_direct pdf "/Pattern" resources with
| Some (Pdf.Dictionary patterns) ->
let entries =
map
(fun (name, p) ->
(*Printf.printf "Changing matrices of pattern %s\n" name;*)
let old_pattern = Pdf.direct pdf p in
let new_pattern =
let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in
let new_tr = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) existing_tr in
Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr)
in
name, Pdf.Indirect (Pdf.addobj pdf new_pattern))
patterns
in
Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries)
| _ -> resources
end
with
Pdftransform.NonInvertable ->
Printf.eprintf "Warning: noninvertible matrix\n%!";
resources
let change_pattern_matrices_page pdf tr page =
let page =
{page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources}
in
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some (Pdf.Dictionary elts) ->
iter
(fun (k, v) ->
match v with
| Pdf.Indirect i ->
(* Check if it's a form XObject. If so, rewrite its resources and add back as same number. *)
begin match Pdf.lookup_direct pdf "/Subtype" v with
| Some (Pdf.Name "/Form") ->
(*Printf.printf "Processing form xobject %s for patterns\n" k; *)
let form_xobject = Pdf.lookup_obj pdf i in
begin match Pdf.lookup_direct pdf "/Resources" form_xobject with
| Some resources ->
let form_xobject' =
Pdf.add_dict_entry form_xobject "/Resources" (change_pattern_matrices_resources pdf tr resources)
in
Pdf.addobj_given_num pdf (i, form_xobject')
| _ -> ()
end
| _ -> ()
end;
| _ -> raise (Pdf.PDFError "change_pattern_matrices_page"))
elts;
page
| _ -> page*)
(* Union two resource dictionaries from the same PDF. *)
let combine_pdf_resources pdf a b =
let a_entries =
match a with
| Pdf.Dictionary entries -> entries
| _ -> []
in let b_entries =
match b with
| Pdf.Dictionary entries -> entries
| _ -> []
in
let resource_keys =
["/Font"; "/ExtGState"; "/ColorSpace"; "/Pattern";
"/Shading"; "/XObject"; "/Properties"]
in
let combine_entries key =
let a_entries =
match Pdf.lookup_direct pdf key a with
| Some (Pdf.Dictionary d) -> d
| _ -> []
in let b_entries =
match Pdf.lookup_direct pdf key b with
| Some (Pdf.Dictionary d) -> d
| _ -> []
in
if a_entries = [] && b_entries = [] then
None
else
Some (key, Pdf.Dictionary (a_entries @ b_entries))
in
let unknown_keys_a = lose (fun (k, _) -> mem k resource_keys) a_entries in
let unknown_keys_b = lose (fun (k, _) -> mem k resource_keys) b_entries in
let combined_known_entries = option_map combine_entries resource_keys in
fold_left
(fun dict (k, v) -> Pdf.add_dict_entry dict k v)
(Pdf.Dictionary [])
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)

View File

@ -1,11 +0,0 @@
val process_xobjects : Pdf.t ->
Pdfpage.t ->
(Pdf.t ->
Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) ->
unit
(*val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t*)
val combine_pdf_resources : Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject -> Pdf.pdfobject
val ppstub : ('a -> 'b -> 'c) -> 'a -> 'b -> 'c * 'a * Pdftransform.transform_matrix