2013-08-20 16:32:57 +02:00
|
|
|
(* CPDF Core routines *)
|
|
|
|
open Pdfutil
|
|
|
|
open Pdfio
|
2021-10-02 13:22:59 +02:00
|
|
|
open Cpdferror
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2021-05-27 18:31:12 +02:00
|
|
|
(* For uses of process_pages which don't need to deal with matrices, this
|
|
|
|
function transforms into one which returns the identity matrix *)
|
|
|
|
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
|
|
|
|
|
2016-04-03 20:46:54 +02:00
|
|
|
(* Add stack operators to a content stream to ensure it is composeable. On
|
|
|
|
-fast, we don't check for Q deficit, assuming PDF is ISO. *)
|
|
|
|
let protect fast pdf resources content =
|
|
|
|
let deficit =
|
|
|
|
if fast then 0 else
|
|
|
|
let ops = Pdfops.parse_operators pdf resources content in
|
|
|
|
let qs = length (keep (eq Pdfops.Op_q) ops) in
|
|
|
|
let bigqs = length (keep (eq Pdfops.Op_Q) ops) in
|
|
|
|
let deficit = if qs > bigqs then qs - bigqs else 0 in
|
2021-07-23 17:26:07 +02:00
|
|
|
if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n%!" deficit;
|
2016-04-03 20:46:54 +02:00
|
|
|
deficit
|
|
|
|
in
|
|
|
|
let addstream ops = Pdf.addobj pdf (Pdfops.stream_of_ops ops) in
|
|
|
|
let q = addstream [Pdfops.Op_q] in
|
|
|
|
let qs = addstream (many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) in
|
|
|
|
[Pdf.Indirect q] @ content @ [Pdf.Indirect qs]
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2021-10-26 17:18:09 +02:00
|
|
|
(* If a cropbox exists, make it the mediabox. If not, change nothing. *)
|
|
|
|
let copy_cropbox_to_mediabox pdf range =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages
|
2021-10-26 17:18:09 +02:00
|
|
|
(ppstub (fun _ page ->
|
|
|
|
match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
|
|
|
|
| Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject}
|
|
|
|
| None -> page))
|
|
|
|
pdf
|
|
|
|
range
|
|
|
|
|
2013-08-20 16:32:57 +02:00
|
|
|
(* Union two resource dictionaries from the same PDF. *)
|
|
|
|
let combine_pdf_resources pdf a b =
|
|
|
|
let a_entries =
|
|
|
|
match a with
|
|
|
|
| Pdf.Dictionary entries -> entries
|
|
|
|
| _ -> []
|
|
|
|
in let b_entries =
|
|
|
|
match b with
|
|
|
|
| Pdf.Dictionary entries -> entries
|
|
|
|
| _ -> []
|
|
|
|
in
|
|
|
|
let resource_keys =
|
|
|
|
["/Font"; "/ExtGState"; "/ColorSpace"; "/Pattern";
|
|
|
|
"/Shading"; "/XObject"; "/Properties"]
|
|
|
|
in
|
|
|
|
let combine_entries key =
|
|
|
|
let a_entries =
|
|
|
|
match Pdf.lookup_direct pdf key a with
|
|
|
|
| Some (Pdf.Dictionary d) -> d
|
|
|
|
| _ -> []
|
|
|
|
in let b_entries =
|
|
|
|
match Pdf.lookup_direct pdf key b with
|
|
|
|
| Some (Pdf.Dictionary d) -> d
|
|
|
|
| _ -> []
|
|
|
|
in
|
2016-04-03 20:51:47 +02:00
|
|
|
if a_entries = [] && b_entries = [] then
|
|
|
|
None
|
|
|
|
else
|
|
|
|
Some (key, Pdf.Dictionary (a_entries @ b_entries))
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2016-04-03 20:51:47 +02:00
|
|
|
let unknown_keys_a = lose (fun (k, _) -> mem k resource_keys) a_entries in
|
|
|
|
let unknown_keys_b = lose (fun (k, _) -> mem k resource_keys) b_entries in
|
|
|
|
let combined_known_entries = option_map combine_entries resource_keys in
|
2021-10-26 16:36:56 +02:00
|
|
|
fold_left
|
|
|
|
(fun dict (k, v) -> Pdf.add_dict_entry dict k v)
|
|
|
|
(Pdf.Dictionary [])
|
|
|
|
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
|
|
|
|
(* Does the page have a defined box e.g "/CropBox" *)
|
|
|
|
let hasbox pdf page boxname =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
if page > length pages || page < 1 then raise (Failure "hasbox: bad page") else
|
|
|
|
let p = select page pages in
|
|
|
|
match Pdf.lookup_direct pdf boxname p.Pdfpage.rest with
|
|
|
|
| Some _ -> true
|
|
|
|
| _ -> false
|
|
|
|
|
|
|
|
|
|
|
|
(* \section{Shift page data} *)
|
|
|
|
let make_mediabox (xmin, ymin, xmax, ymax) =
|
|
|
|
Pdf.Array
|
|
|
|
[Pdf.Real xmin; Pdf.Real ymin; Pdf.Real xmax; Pdf.Real ymax]
|
|
|
|
|
|
|
|
(* Change the media box and other known boxes by the function [f] which takes
|
|
|
|
xmin, xmax, ymin, ymax as input. *)
|
|
|
|
let change_boxes f pdf page =
|
|
|
|
let names = ["/TrimBox"; "/ArtBox"; "/CropBox"; "/BleedBox"]
|
|
|
|
in let getbox n =
|
|
|
|
Pdf.lookup_direct pdf n page.Pdfpage.rest
|
|
|
|
in
|
|
|
|
let boxes = combine names (map getbox names) in
|
|
|
|
let toreplace = lose (function (_, None) -> true | _ -> false) boxes in
|
|
|
|
let toreplace =
|
|
|
|
map
|
|
|
|
(function (name, Some value) -> (name, value) | _ -> assert false)
|
|
|
|
toreplace
|
|
|
|
in
|
|
|
|
let rest' =
|
|
|
|
fold_left
|
|
|
|
(fun e (k, v) ->
|
|
|
|
let v =
|
|
|
|
make_mediabox (f (Pdf.parse_rectangle v))
|
|
|
|
in
|
|
|
|
Pdf.replace_dict_entry e k v)
|
|
|
|
page.Pdfpage.rest
|
|
|
|
toreplace
|
|
|
|
in
|
|
|
|
{page with
|
|
|
|
Pdfpage.mediabox =
|
|
|
|
make_mediabox (f (Pdf.parse_rectangle page.Pdfpage.mediabox));
|
|
|
|
Pdfpage.rest = rest'}
|
|
|
|
|
2019-07-16 14:05:06 +02:00
|
|
|
let process_xobject f pdf resources i =
|
|
|
|
let xobj = Pdf.lookup_obj pdf i in
|
|
|
|
match Pdf.lookup_direct pdf "/Subtype" xobj with
|
|
|
|
| None -> raise (Pdf.PDFError "No /Subtype in Xobject")
|
|
|
|
| Some (Pdf.Name "/Form") ->
|
|
|
|
Pdf.getstream xobj;
|
|
|
|
begin match xobj with
|
|
|
|
| Pdf.Stream ({contents = Pdf.Dictionary dict, Pdf.Got bytes} as rf) ->
|
|
|
|
begin match f pdf resources [Pdf.Stream rf] with
|
|
|
|
| [Pdf.Stream {contents = (Pdf.Dictionary dict', data)}] ->
|
|
|
|
let dict' =
|
|
|
|
Pdf.remove_dict_entry
|
|
|
|
(Pdf.Dictionary (mergedict dict dict'))
|
|
|
|
"/Filter"
|
|
|
|
in
|
|
|
|
rf := (dict', data)
|
|
|
|
| _ -> assert false
|
|
|
|
end
|
|
|
|
| _ -> assert false (* getstream would have complained already *)
|
|
|
|
end
|
|
|
|
| Some _ -> ()
|
2019-07-15 19:46:17 +02:00
|
|
|
|
2019-07-16 14:05:06 +02:00
|
|
|
let process_xobjects pdf page f =
|
|
|
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
|
|
|
| Some (Pdf.Dictionary elts) ->
|
|
|
|
iter
|
|
|
|
(fun (k, v) ->
|
|
|
|
match v with
|
|
|
|
| Pdf.Indirect i -> process_xobject f pdf page.Pdfpage.resources i
|
|
|
|
| _ -> raise (Pdf.PDFError "process_xobject"))
|
|
|
|
elts
|
|
|
|
| _ -> ()
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2019-07-16 14:05:06 +02:00
|
|
|
(* The content transformed by altering any use of [Op_cm]. But we must also
|
|
|
|
alter any /Matrix entries in pattern dictionaries *)
|
2019-07-16 15:55:17 +02:00
|
|
|
let change_pattern_matrices_resources pdf tr resources =
|
2013-08-20 16:32:57 +02:00
|
|
|
try
|
2019-07-16 15:55:17 +02:00
|
|
|
begin match Pdf.lookup_direct pdf "/Pattern" resources with
|
2019-07-16 14:05:06 +02:00
|
|
|
| Some (Pdf.Dictionary patterns) ->
|
|
|
|
let entries =
|
|
|
|
map
|
|
|
|
(fun (name, p) ->
|
2019-07-18 18:26:18 +02:00
|
|
|
(*Printf.printf "Changing matrices of pattern %s\n" name;*)
|
2019-07-16 14:05:06 +02:00
|
|
|
let old_pattern = Pdf.direct pdf p in
|
|
|
|
let new_pattern =
|
|
|
|
let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in
|
|
|
|
let new_tr = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) existing_tr in
|
|
|
|
Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr)
|
|
|
|
in
|
|
|
|
name, Pdf.Indirect (Pdf.addobj pdf new_pattern))
|
|
|
|
patterns
|
|
|
|
in
|
2019-07-16 15:55:17 +02:00
|
|
|
Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries)
|
|
|
|
| _ -> resources
|
2019-07-16 14:05:06 +02:00
|
|
|
end
|
|
|
|
with
|
|
|
|
Pdftransform.NonInvertable ->
|
2021-07-23 17:26:07 +02:00
|
|
|
Printf.eprintf "Warning: noninvertible matrix\n%!";
|
2019-07-16 15:55:17 +02:00
|
|
|
resources
|
|
|
|
|
|
|
|
let change_pattern_matrices_page pdf tr page =
|
|
|
|
let page =
|
|
|
|
{page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources}
|
|
|
|
in
|
|
|
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
|
|
|
| Some (Pdf.Dictionary elts) ->
|
|
|
|
iter
|
|
|
|
(fun (k, v) ->
|
|
|
|
match v with
|
|
|
|
| Pdf.Indirect i ->
|
|
|
|
(* Check if it's a form XObject. If so, rewrite its resources and add back as same number. *)
|
|
|
|
begin match Pdf.lookup_direct pdf "/Subtype" v with
|
|
|
|
| Some (Pdf.Name "/Form") ->
|
2019-07-18 18:26:18 +02:00
|
|
|
(*Printf.printf "Processing form xobject %s for patterns\n" k; *)
|
2019-07-16 15:55:17 +02:00
|
|
|
let form_xobject = Pdf.lookup_obj pdf i in
|
|
|
|
begin match Pdf.lookup_direct pdf "/Resources" form_xobject with
|
|
|
|
| Some resources ->
|
|
|
|
let form_xobject' =
|
|
|
|
Pdf.add_dict_entry form_xobject "/Resources" (change_pattern_matrices_resources pdf tr resources)
|
|
|
|
in
|
|
|
|
Pdf.addobj_given_num pdf (i, form_xobject')
|
|
|
|
| _ -> ()
|
|
|
|
end
|
|
|
|
| _ -> ()
|
|
|
|
end;
|
|
|
|
| _ -> raise (Pdf.PDFError "change_pattern_matrices_page"))
|
|
|
|
elts;
|
2019-07-16 14:05:06 +02:00
|
|
|
page
|
2019-07-16 15:55:17 +02:00
|
|
|
| _ -> page
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2020-07-23 16:17:27 +02:00
|
|
|
let transform_rect transform rect =
|
|
|
|
let minx, miny, maxx, maxy = Pdf.parse_rectangle rect in
|
|
|
|
let (x0, y0) = Pdftransform.transform_matrix transform (minx, miny) in
|
|
|
|
let (x1, y1) = Pdftransform.transform_matrix transform (maxx, maxy) in
|
|
|
|
let (x2, y2) = Pdftransform.transform_matrix transform (minx, maxy) in
|
|
|
|
let (x3, y3) = Pdftransform.transform_matrix transform (maxx, miny) in
|
|
|
|
let minx = fmin (fmin x0 x1) (fmin x2 x3) in
|
|
|
|
let miny = fmin (fmin y0 y1) (fmin y2 y3) in
|
|
|
|
let maxx = fmax (fmax x0 x1) (fmax x2 x3) in
|
|
|
|
let maxy = fmax (fmax y0 y1) (fmax y2 y3) in
|
|
|
|
Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy]
|
|
|
|
|
2021-12-15 09:53:34 +01:00
|
|
|
let transform_quadpoint_single transform = function
|
|
|
|
| [x1; y1; x2; y2; x3; y3; x4; y4] ->
|
|
|
|
let x1, y1, x2, y2, x3, y3, x4, y4 =
|
|
|
|
Pdf.getnum x1, Pdf.getnum y1,
|
|
|
|
Pdf.getnum x2, Pdf.getnum y2,
|
|
|
|
Pdf.getnum x3, Pdf.getnum y3,
|
|
|
|
Pdf.getnum x4, Pdf.getnum y4
|
|
|
|
in
|
|
|
|
let (x1, y1) = Pdftransform.transform_matrix transform (x1, y1) in
|
|
|
|
let (x2, y2) = Pdftransform.transform_matrix transform (x2, y2) in
|
|
|
|
let (x3, y3) = Pdftransform.transform_matrix transform (x3, y3) in
|
|
|
|
let (x4, y4) = Pdftransform.transform_matrix transform (x4, y4) in
|
|
|
|
map (fun x -> Pdf.Real x) [x1; y1; x2; y2; x3; y3; x4; y4]
|
|
|
|
| qp ->
|
|
|
|
Printf.eprintf "Malformed /QuadPoints format: must be a multiple of 8 entries\n";
|
|
|
|
qp
|
|
|
|
|
|
|
|
let transform_quadpoints transform = function
|
|
|
|
| Pdf.Array qps ->
|
|
|
|
Pdf.Array (flatten (map (transform_quadpoint_single transform) (splitinto 8 qps)))
|
|
|
|
| qp ->
|
|
|
|
Printf.eprintf "Unknown or malformed /QuadPoints format %s\n" (Pdfwrite.string_of_pdf qp);
|
|
|
|
qp
|
|
|
|
|
|
|
|
(* Apply transformations to any annotations in /Annots (i.e their /Rect and /QuadPoints entries) *)
|
2019-07-10 14:16:32 +02:00
|
|
|
let transform_annotations pdf transform rest =
|
|
|
|
match Pdf.lookup_direct pdf "/Annots" rest with
|
|
|
|
| Some (Pdf.Array annots) ->
|
|
|
|
(* Always indirect references, so alter in place *)
|
2020-03-04 19:50:32 +01:00
|
|
|
iter
|
2019-07-10 14:16:32 +02:00
|
|
|
(function
|
|
|
|
| Pdf.Indirect i ->
|
|
|
|
let annot = Pdf.lookup_obj pdf i in
|
2021-12-15 09:53:34 +01:00
|
|
|
let rect' =
|
|
|
|
match Pdf.lookup_direct pdf "/Rect" annot with
|
|
|
|
| Some rect -> transform_rect transform rect
|
|
|
|
| None -> raise (Pdf.PDFError "transform_annotations: no rect")
|
|
|
|
in
|
|
|
|
let quadpoints' =
|
|
|
|
match Pdf.lookup_direct pdf "/QuadPoints" annot with
|
|
|
|
| Some qp -> Some (transform_quadpoints transform qp)
|
|
|
|
| None -> None
|
2019-07-10 14:16:32 +02:00
|
|
|
in
|
2021-12-15 09:53:34 +01:00
|
|
|
let annot = Pdf.add_dict_entry annot "/Rect" rect' in
|
|
|
|
let annot =
|
|
|
|
match quadpoints' with
|
|
|
|
| Some qp -> Pdf.add_dict_entry annot "/QuadPoints" qp
|
|
|
|
| None -> annot
|
|
|
|
in
|
|
|
|
Pdf.addobj_given_num pdf (i, annot)
|
2021-07-23 17:26:07 +02:00
|
|
|
| _ -> Printf.eprintf "transform_annotations: not indirect\n%!")
|
2019-07-10 14:16:32 +02:00
|
|
|
annots
|
|
|
|
| _ -> ()
|
|
|
|
|
2016-07-21 18:02:11 +02:00
|
|
|
let shift_page ?(fast=false) dxdylist pdf pnum page =
|
|
|
|
let dx, dy = List.nth dxdylist (pnum - 1) in
|
|
|
|
let transform_op =
|
|
|
|
Pdfops.Op_cm (Pdftransform.matrix_of_op (Pdftransform.Translate (dx, dy)))
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2019-07-16 14:05:06 +02:00
|
|
|
let page =
|
|
|
|
change_pattern_matrices_page pdf (Pdftransform.mktranslate ~-.dx ~-.dy) page
|
2016-07-21 18:02:11 +02:00
|
|
|
in
|
2019-08-19 12:49:53 +02:00
|
|
|
transform_annotations pdf (Pdftransform.mktranslate dx dy) page.Pdfpage.rest;
|
2021-05-31 19:10:39 +02:00
|
|
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, Pdftransform.mktranslate dx dy)
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2016-07-21 18:02:11 +02:00
|
|
|
let shift_pdf ?(fast=false) dxdylist pdf range =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (shift_page ~fast dxdylist pdf) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* Change a page's media box so its minimum x and y are 0, making other
|
|
|
|
operations simpler to think about. Any shift that is done is reflected in
|
|
|
|
other boxes (clip etc.) *)
|
|
|
|
let rectify_boxes ?(fast=false) pdf page =
|
|
|
|
let minx, miny, _, _ =
|
|
|
|
Pdf.parse_rectangle page.Pdfpage.mediabox
|
|
|
|
in
|
|
|
|
let f (iminx, iminy, imaxx, imaxy) =
|
|
|
|
iminx -. minx, iminy -. miny, imaxx -. minx, imaxy -. miny
|
|
|
|
in
|
|
|
|
let page = change_boxes f pdf page in
|
|
|
|
if minx <> 0. || miny <> 0.
|
2021-05-31 19:10:39 +02:00
|
|
|
then
|
|
|
|
begin let p, _, _ = shift_page ~fast [(-.minx),(-.miny)] pdf 1 page in p end
|
2013-08-20 16:32:57 +02:00
|
|
|
else page
|
|
|
|
|
|
|
|
(* \section{Flip pages} *)
|
2021-05-31 19:10:39 +02:00
|
|
|
let flip_page ?(fast=false) transform_op pdf pnum page =
|
2013-08-20 16:32:57 +02:00
|
|
|
let minx, miny, maxx, maxy =
|
|
|
|
Pdf.parse_rectangle page.Pdfpage.mediabox
|
|
|
|
in
|
|
|
|
let tr = transform_op minx miny maxx maxy in
|
2019-07-16 14:05:06 +02:00
|
|
|
let page = change_pattern_matrices_page pdf tr page in
|
2019-07-10 14:16:32 +02:00
|
|
|
transform_annotations pdf tr page.Pdfpage.rest;
|
2021-05-31 19:10:39 +02:00
|
|
|
(Pdfpage.prepend_operators pdf [Pdfops.Op_cm tr] ~fast page, pnum, tr)
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let vflip_pdf ?(fast=false) pdf range =
|
|
|
|
let transform_op _ miny _ maxy =
|
|
|
|
Pdftransform.matrix_of_op
|
|
|
|
(Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.))
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (flip_page ~fast transform_op pdf) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let hflip_pdf ?(fast=false) pdf range =
|
|
|
|
let transform_op minx _ maxx _ =
|
|
|
|
Pdftransform.matrix_of_op
|
|
|
|
(Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.))
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (flip_page ~fast transform_op pdf) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2015-02-17 16:48:00 +01:00
|
|
|
let stamp_shift_of_position topline midline sw sh w h p =
|
|
|
|
let half x = x /. 2.
|
|
|
|
and dy =
|
|
|
|
if midline then sh /. 2.
|
|
|
|
else if topline then sh
|
|
|
|
else 0.
|
|
|
|
in
|
2021-08-12 21:38:55 +02:00
|
|
|
let open Cpdfposition in
|
|
|
|
match p with
|
|
|
|
| PosCentre (ox, oy) -> ox -. half sw, oy -. dy
|
|
|
|
| PosLeft (ox, oy) -> ox, oy -. dy
|
|
|
|
| PosRight (ox, oy) -> ox -. sw, oy -. dy
|
|
|
|
| Top o -> half w -. half sw, h -. o -. sh -. dy
|
|
|
|
| TopLeft o -> o, h -. sh -. o -. dy
|
|
|
|
| TopRight o -> w -. sw -. o, h -. sh -. o -. dy
|
|
|
|
| Left o -> o, half h -. half sh -. dy
|
|
|
|
| BottomLeft o -> o, o -. dy
|
|
|
|
| Bottom o -> half w -. half sw, o -. dy
|
|
|
|
| BottomRight o -> w -. sw -. o, o -. dy
|
|
|
|
| Right o -> w -. sw -. o, half h -. half sh -. dy
|
|
|
|
| Diagonal | ReverseDiagonal | Centre ->
|
|
|
|
half w -. half sw, half h -. half sh -. dy
|
2015-02-17 16:48:00 +01:00
|
|
|
|
2016-11-15 20:25:20 +01:00
|
|
|
(* Combine Pdfpage.rest items for two PDFs. For now, we combine /Annots, and
|
|
|
|
* copy everything else from adict. What else should we combine? *)
|
|
|
|
let combine_page_items pdf adict bdict =
|
|
|
|
let getannots dict =
|
|
|
|
begin match dict with
|
|
|
|
Pdf.Dictionary d ->
|
|
|
|
begin match lookup "/Annots" d with
|
|
|
|
Some (Pdf.Array items) -> items
|
|
|
|
| _ -> []
|
|
|
|
end
|
|
|
|
| _ -> []
|
|
|
|
end
|
|
|
|
in
|
|
|
|
let a_annots = getannots adict in
|
|
|
|
let b_annots = getannots bdict in
|
|
|
|
match a_annots @ b_annots with
|
|
|
|
[] -> adict
|
|
|
|
| annots -> Pdf.add_dict_entry adict "/Annots" (Pdf.Array annots)
|
|
|
|
|
2016-01-08 13:28:07 +01:00
|
|
|
let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover pdf o u opdf =
|
2014-10-06 15:32:53 +02:00
|
|
|
(* Scale page stamp o to fit page u *)
|
|
|
|
let sxmin, symin, sxmax, symax =
|
|
|
|
Pdf.parse_rectangle
|
|
|
|
(match Pdf.lookup_direct pdf "/CropBox" o.Pdfpage.rest with | Some r -> r | None -> o.Pdfpage.mediabox)
|
|
|
|
in let txmin, tymin, txmax, tymax =
|
|
|
|
Pdf.parse_rectangle
|
|
|
|
(match Pdf.lookup_direct pdf "/CropBox" u.Pdfpage.rest with | Some r -> r | None -> u.Pdfpage.mediabox)
|
|
|
|
in
|
|
|
|
let o =
|
|
|
|
if scale_to_fit then
|
|
|
|
let xmag = (txmax -. txmin) /. (sxmax -. sxmin) in
|
|
|
|
let ymag = (tymax -. tymin) /. (symax -. symin) in
|
|
|
|
let scale =
|
|
|
|
if xmag < 0.999 && ymag < 0.999 then
|
|
|
|
if xmag > ymag then xmag else ymag
|
|
|
|
else if xmag >= 1.001 && ymag >= 1.001 then
|
|
|
|
if xmag > ymag then ymag else xmag
|
|
|
|
else if xmag >= 1.001 then ymag
|
|
|
|
else xmag
|
|
|
|
in
|
|
|
|
let dx = txmin +. ((txmax -. txmin) -. (sxmax -. sxmin) *. scale) /. 2. in
|
|
|
|
let dy = tymin +. ((tymax -. tymin) -. (symax -. symin) *. scale) /. 2. in
|
2019-07-10 16:26:01 +02:00
|
|
|
let matrix =
|
|
|
|
(Pdftransform.matrix_of_transform
|
2016-01-08 13:28:07 +01:00
|
|
|
([Pdftransform.Translate (dx, dy)] @
|
|
|
|
(if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @
|
|
|
|
[Pdftransform.Scale ((sxmin, symin), scale, scale)]))
|
2014-10-06 15:32:53 +02:00
|
|
|
in
|
2019-07-10 16:26:01 +02:00
|
|
|
transform_annotations pdf matrix o.Pdfpage.rest;
|
|
|
|
let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in
|
2019-07-16 14:05:06 +02:00
|
|
|
change_pattern_matrices_page pdf matrix r
|
2014-10-06 15:32:53 +02:00
|
|
|
else
|
2016-04-03 20:46:54 +02:00
|
|
|
let sw = sxmax -. sxmin and sh = symax -. symin
|
|
|
|
and w = txmax -. txmin and h = tymax -. tymin in
|
2015-02-17 16:48:00 +01:00
|
|
|
let dx, dy = stamp_shift_of_position topline midline sw sh w h position in
|
2019-07-10 16:26:01 +02:00
|
|
|
let matrix =
|
|
|
|
(Pdftransform.matrix_of_transform
|
|
|
|
((if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @
|
|
|
|
[Pdftransform.Translate (dx, dy)]))
|
2014-10-06 15:32:53 +02:00
|
|
|
in
|
2019-07-10 16:26:01 +02:00
|
|
|
transform_annotations pdf matrix o.Pdfpage.rest;
|
|
|
|
let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in
|
2019-07-16 14:05:06 +02:00
|
|
|
change_pattern_matrices_page pdf matrix r
|
2014-10-06 15:32:53 +02:00
|
|
|
in
|
|
|
|
{u with
|
|
|
|
Pdfpage.content =
|
|
|
|
(if isover then ( @ ) else ( @@ ))
|
2016-04-03 20:46:54 +02:00
|
|
|
(protect fast pdf u.Pdfpage.resources u.Pdfpage.content)
|
|
|
|
(protect fast pdf o.Pdfpage.resources o.Pdfpage.content);
|
2016-11-15 20:25:20 +01:00
|
|
|
Pdfpage.rest =
|
|
|
|
combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest;
|
2014-10-06 15:32:53 +02:00
|
|
|
Pdfpage.resources =
|
|
|
|
combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources}
|
|
|
|
|
|
|
|
(* Alter bookmark destinations given a hash table of (old page reference
|
|
|
|
* number, new page reference number) pairings *)
|
|
|
|
let change_destination t = function
|
|
|
|
Pdfdest.XYZ (Pdfdest.PageObject p, a, b, c) ->
|
|
|
|
Pdfdest.XYZ (Pdfdest.PageObject (Hashtbl.find t p), a, b, c)
|
|
|
|
| Pdfdest.Fit (Pdfdest.PageObject p) ->
|
|
|
|
Pdfdest.Fit (Pdfdest.PageObject (Hashtbl.find t p))
|
|
|
|
| Pdfdest.FitH (Pdfdest.PageObject p, x) ->
|
|
|
|
Pdfdest.FitH (Pdfdest.PageObject (Hashtbl.find t p), x)
|
|
|
|
| Pdfdest.FitV (Pdfdest.PageObject p, x) ->
|
|
|
|
Pdfdest.FitV (Pdfdest.PageObject (Hashtbl.find t p), x)
|
|
|
|
| Pdfdest.FitR (Pdfdest.PageObject p, a, b, c, d) ->
|
|
|
|
Pdfdest.FitR (Pdfdest.PageObject (Hashtbl.find t p), a, b, c, d)
|
|
|
|
| Pdfdest.FitB (Pdfdest.PageObject p) ->
|
|
|
|
Pdfdest.Fit (Pdfdest.PageObject (Hashtbl.find t p))
|
|
|
|
| Pdfdest.FitBH (Pdfdest.PageObject p, x) ->
|
|
|
|
Pdfdest.FitBH (Pdfdest.PageObject (Hashtbl.find t p), x)
|
|
|
|
| Pdfdest.FitBV (Pdfdest.PageObject p, x) ->
|
|
|
|
Pdfdest.FitBV (Pdfdest.PageObject (Hashtbl.find t p), x)
|
|
|
|
| x -> x
|
|
|
|
|
|
|
|
let change_bookmark t m =
|
|
|
|
{m with Pdfmarks.target =
|
|
|
|
try change_destination t m.Pdfmarks.target with Not_found -> m.Pdfmarks.target}
|
|
|
|
|
2016-01-08 13:28:07 +01:00
|
|
|
let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf =
|
2016-03-30 18:04:28 +02:00
|
|
|
let prefix = Pdfpage.shortest_unused_prefix pdf in
|
|
|
|
Pdfpage.add_prefix over prefix;
|
2014-10-06 15:32:53 +02:00
|
|
|
let marks = Pdfmarks.read_bookmarks pdf in
|
|
|
|
let marks_refnumbers = Pdf.page_reference_numbers pdf in
|
|
|
|
let pdf = Pdfmarks.remove_bookmarks pdf in
|
|
|
|
let over = Pdfmarks.remove_bookmarks over in
|
|
|
|
let pageseqs = ilist 1 (Pdfpage.endpage pdf) in
|
|
|
|
let over_firstpage_pdf =
|
|
|
|
match Pdfpage.pages_of_pagetree over with
|
|
|
|
| [] -> error "empty PDF"
|
2016-11-27 16:39:19 +01:00
|
|
|
| h::_ -> Pdfpage.change_pages ~changes:[(1, 1)] true over [h]
|
2014-10-06 15:32:53 +02:00
|
|
|
in
|
|
|
|
let merged =
|
|
|
|
Pdfmerge.merge_pdfs
|
|
|
|
false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]]
|
|
|
|
in
|
2014-11-16 17:01:12 +01:00
|
|
|
let merged =
|
|
|
|
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
|
2014-10-06 15:32:53 +02:00
|
|
|
in
|
2021-12-19 13:55:06 +01:00
|
|
|
let merged = Cpdfmetadata.copy_id true pdf merged in
|
2016-03-30 18:04:28 +02:00
|
|
|
let merged_pages = Pdfpage.pages_of_pagetree merged in
|
2014-11-16 17:01:12 +01:00
|
|
|
let under_pages, over_page =
|
2016-03-30 18:04:28 +02:00
|
|
|
all_but_last merged_pages, last merged_pages
|
2014-10-06 15:32:53 +02:00
|
|
|
in
|
2014-11-16 17:01:12 +01:00
|
|
|
let new_pages =
|
|
|
|
map2
|
|
|
|
(fun pageseq under_page ->
|
2016-03-30 18:04:28 +02:00
|
|
|
do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover merged
|
2014-11-16 17:01:12 +01:00
|
|
|
(if mem pageseq range then over_page else
|
|
|
|
Pdfpage.blankpage Pdfpaper.a4)
|
|
|
|
under_page over)
|
|
|
|
pageseqs
|
|
|
|
under_pages
|
|
|
|
in
|
2016-11-27 16:39:19 +01:00
|
|
|
let changed =
|
|
|
|
let changes =
|
2020-03-04 19:50:32 +01:00
|
|
|
map (fun x -> (x, x)) (ilist 1 (length new_pages))
|
2016-11-27 16:39:19 +01:00
|
|
|
in
|
|
|
|
Pdfpage.change_pages ~changes true merged new_pages
|
|
|
|
in
|
|
|
|
let new_refnumbers = Pdf.page_reference_numbers changed in
|
2020-03-04 19:50:32 +01:00
|
|
|
let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in
|
2016-11-27 16:39:19 +01:00
|
|
|
let new_marks = map (change_bookmark changetable) marks in
|
|
|
|
Pdfmarks.add_bookmarks new_marks changed
|
2014-10-06 15:32:53 +02:00
|
|
|
|
2020-03-20 17:29:54 +01:00
|
|
|
let add_xobject_to_page xobjname xobjnum page pdf =
|
|
|
|
let resources' =
|
|
|
|
let xobjects =
|
|
|
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
|
|
|
| Some xobjects -> xobjects
|
|
|
|
| _ -> Pdf.Dictionary []
|
|
|
|
in
|
|
|
|
let new_xobjects =
|
|
|
|
Pdf.add_dict_entry xobjects xobjname (Pdf.Indirect xobjnum)
|
|
|
|
in
|
|
|
|
Pdf.add_dict_entry page.Pdfpage.resources "/XObject" new_xobjects
|
|
|
|
in
|
|
|
|
{page with Pdfpage.resources = resources'}
|
|
|
|
|
2020-03-19 17:23:16 +01:00
|
|
|
let add_page_as_xobject pdf range page name =
|
2020-03-20 17:29:54 +01:00
|
|
|
let xobject_data =
|
|
|
|
match Pdfops.stream_of_ops (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) with
|
|
|
|
Pdf.Stream {contents = (_, Got b)} -> b
|
|
|
|
| _ -> assert false
|
|
|
|
in
|
2020-03-19 17:23:16 +01:00
|
|
|
let xobject_dict =
|
|
|
|
["/Type", Pdf.Name "/XObject";
|
2020-03-25 14:46:54 +01:00
|
|
|
"/Subtype", Pdf.Name "/Form";
|
|
|
|
"/BBox", page.Pdfpage.mediabox;
|
|
|
|
"/Resources", page.Pdfpage.resources;
|
|
|
|
"/Length", Pdf.Integer (bytes_size xobject_data)]
|
2020-03-19 17:23:16 +01:00
|
|
|
in
|
|
|
|
let xobject =
|
|
|
|
Pdf.Stream {contents = (Pdf.Dictionary xobject_dict, Pdf.Got xobject_data)}
|
|
|
|
in
|
|
|
|
let xobject_objnum = Pdf.addobj pdf xobject in
|
2020-03-20 17:29:54 +01:00
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
let new_pages =
|
|
|
|
List.map2
|
|
|
|
(fun page pnum ->
|
|
|
|
if mem pnum range
|
|
|
|
then add_xobject_to_page name xobject_objnum page pdf
|
|
|
|
else page)
|
|
|
|
pages
|
|
|
|
(indx pages)
|
|
|
|
in
|
|
|
|
Pdfpage.change_pages true pdf new_pages
|
2020-03-18 18:08:30 +01:00
|
|
|
|
2020-03-19 18:08:15 +01:00
|
|
|
(* n.b the use of change_pages here ensures no inheritable resources in the
|
|
|
|
* stamp, therefore creation of xobject from page is as simple as expected. *)
|
2020-03-19 17:23:16 +01:00
|
|
|
let stamp_as_xobject pdf range over =
|
2020-03-18 18:08:30 +01:00
|
|
|
let prefix = Pdfpage.shortest_unused_prefix pdf in
|
|
|
|
Pdfpage.add_prefix over prefix;
|
|
|
|
let marks = Pdfmarks.read_bookmarks pdf in
|
|
|
|
let marks_refnumbers = Pdf.page_reference_numbers pdf in
|
|
|
|
let pdf = Pdfmarks.remove_bookmarks pdf in
|
|
|
|
let over = Pdfmarks.remove_bookmarks over in
|
|
|
|
let pageseqs = ilist 1 (Pdfpage.endpage pdf) in
|
|
|
|
let over_firstpage_pdf =
|
|
|
|
match Pdfpage.pages_of_pagetree over with
|
|
|
|
| [] -> error "empty PDF"
|
|
|
|
| h::_ -> Pdfpage.change_pages ~changes:[(1, 1)] true over [h]
|
|
|
|
in
|
|
|
|
let merged =
|
|
|
|
Pdfmerge.merge_pdfs
|
|
|
|
false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]]
|
|
|
|
in
|
|
|
|
let merged =
|
|
|
|
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
|
|
|
|
in
|
2021-12-19 13:55:06 +01:00
|
|
|
let merged = Cpdfmetadata.copy_id true pdf merged in
|
2020-03-18 18:08:30 +01:00
|
|
|
let merged_pages = Pdfpage.pages_of_pagetree merged in
|
|
|
|
let under_pages, over_page =
|
|
|
|
all_but_last merged_pages, last merged_pages
|
|
|
|
in
|
|
|
|
let new_pages = under_pages in
|
|
|
|
let changed =
|
|
|
|
let changes =
|
|
|
|
map (fun x -> (x, x)) (ilist 1 (length new_pages))
|
|
|
|
in
|
|
|
|
Pdfpage.change_pages ~changes true merged new_pages
|
|
|
|
in
|
|
|
|
let new_refnumbers = Pdf.page_reference_numbers changed in
|
|
|
|
let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in
|
|
|
|
let new_marks = map (change_bookmark changetable) marks in
|
|
|
|
let pdf = Pdfmarks.add_bookmarks new_marks changed in
|
2020-03-20 17:29:54 +01:00
|
|
|
let name = "/" ^ Pdfpage.shortest_unused_prefix pdf ^ "CPDFXObj" in
|
2020-03-19 17:23:16 +01:00
|
|
|
(add_page_as_xobject pdf range over_page name, name)
|
2020-03-18 18:08:30 +01:00
|
|
|
|
2014-10-06 15:32:53 +02:00
|
|
|
(* Combine pages from two PDFs. For now, assume equal length. *)
|
|
|
|
|
|
|
|
(* If [over] has more pages than [under], chop the excess. If the converse, pad
|
|
|
|
[over] to the same length *)
|
|
|
|
let equalize_pages under over =
|
2016-04-03 13:48:44 +02:00
|
|
|
let length_under = Pdfpage.endpage under in
|
|
|
|
let length_over = Pdfpage.endpage over in
|
2014-10-06 15:32:53 +02:00
|
|
|
if length_over > length_under then
|
2016-11-27 16:39:19 +01:00
|
|
|
let changes =
|
2020-03-04 19:50:32 +01:00
|
|
|
map (fun x -> (x, x)) (ilist 1 length_under)
|
2016-11-27 16:39:19 +01:00
|
|
|
in
|
|
|
|
(under,
|
|
|
|
(Pdfpage.change_pages
|
|
|
|
~changes true over (take (Pdfpage.pages_of_pagetree over) length_under)))
|
2014-10-06 15:32:53 +02:00
|
|
|
else if length_under > length_over then
|
2016-11-27 16:39:19 +01:00
|
|
|
let changes =
|
2020-03-04 19:50:32 +01:00
|
|
|
map (fun x -> (x, x)) (ilist 1 length_over)
|
2016-11-27 16:39:19 +01:00
|
|
|
in
|
|
|
|
(under,
|
|
|
|
Pdfpage.change_pages
|
|
|
|
~changes true over
|
|
|
|
(Pdfpage.pages_of_pagetree over @
|
|
|
|
(many (Pdfpage.blankpage Pdfpaper.a4) (length_under - length_over))))
|
2014-10-06 15:32:53 +02:00
|
|
|
else
|
|
|
|
under, over
|
|
|
|
|
2016-11-25 18:38:19 +01:00
|
|
|
let combine_pages fast under over scaletofit swap equalize =
|
|
|
|
let debug_combine_pages = false in
|
2016-11-17 19:42:40 +01:00
|
|
|
let debug_pdf pdf n =
|
|
|
|
if debug_combine_pages then
|
2016-11-25 18:38:19 +01:00
|
|
|
begin Pdf.remove_unreferenced pdf; Pdfwrite.pdf_to_file pdf n end
|
2016-11-17 19:42:40 +01:00
|
|
|
in
|
2016-04-03 13:48:44 +02:00
|
|
|
Pdfpage.add_prefix over (Pdfpage.shortest_unused_prefix under);
|
2016-11-25 18:38:19 +01:00
|
|
|
let marks_under, marks_over = Pdfmarks.read_bookmarks under, Pdfmarks.read_bookmarks over in
|
2016-04-03 13:48:44 +02:00
|
|
|
let under, over = if equalize then equalize_pages under over else under, over in
|
2016-11-25 18:38:19 +01:00
|
|
|
let under_length, over_length = Pdfpage.endpage under, Pdfpage.endpage over in
|
2016-04-03 13:48:44 +02:00
|
|
|
if under_length <> over_length then
|
|
|
|
raise (Pdf.PDFError "combine_pages: not of equal length")
|
|
|
|
else
|
|
|
|
let pageseqs_under = ilist 1 (Pdfpage.endpage under) in
|
|
|
|
let pageseqs_over = ilist 1 (Pdfpage.endpage over) in
|
|
|
|
let merged =
|
|
|
|
Pdfmerge.merge_pdfs
|
|
|
|
false false ["a"; "b"] [under; over] [pageseqs_under; pageseqs_over]
|
|
|
|
in
|
2016-11-17 19:42:40 +01:00
|
|
|
debug_pdf merged "merged.pdf";
|
2016-04-03 13:48:44 +02:00
|
|
|
let under_pages, over_pages =
|
|
|
|
cleave (Pdfpage.pages_of_pagetree merged) under_length
|
|
|
|
in
|
|
|
|
let new_pages =
|
|
|
|
map2
|
|
|
|
(fun o u ->
|
|
|
|
do_stamp
|
2016-11-25 18:38:19 +01:00
|
|
|
false fast (BottomLeft 0.) false false scaletofit (not swap) merged o u over)
|
2016-04-03 13:48:44 +02:00
|
|
|
over_pages under_pages
|
|
|
|
in
|
2016-11-25 18:38:19 +01:00
|
|
|
(* Build the changes. 123456 -> 123123 *)
|
|
|
|
let changes =
|
2020-03-04 19:50:32 +01:00
|
|
|
let len = length new_pages in
|
2016-11-25 18:38:19 +01:00
|
|
|
combine (ilist 1 (len * 2)) (let x = ilist 1 len in x @ x)
|
|
|
|
in
|
|
|
|
let changed = Pdfpage.change_pages ~changes true merged new_pages in
|
|
|
|
let r = Pdfmarks.add_bookmarks (marks_under @ marks_over) changed in
|
|
|
|
debug_pdf r "final.pdf";
|
|
|
|
r
|
2014-10-06 15:32:53 +02:00
|
|
|
|
2013-08-20 16:32:57 +02:00
|
|
|
(* \section{Set media box} *)
|
2016-07-21 18:02:11 +02:00
|
|
|
let set_mediabox xywhlist pdf range =
|
|
|
|
let crop_page pnum page =
|
|
|
|
let x, y, w, h = List.nth xywhlist (pnum - 1) in
|
2013-08-20 16:32:57 +02:00
|
|
|
{page with
|
|
|
|
Pdfpage.mediabox =
|
|
|
|
(Pdf.Array
|
|
|
|
[Pdf.Real x; Pdf.Real y;
|
|
|
|
Pdf.Real (x +. w); Pdf.Real (y +. h)])}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub crop_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2019-08-08 17:57:25 +02:00
|
|
|
(* Just used by cpdflib for historical reasons *)
|
|
|
|
let setBox box minx maxx miny maxy pdf range =
|
2013-08-20 16:32:57 +02:00
|
|
|
let set_box_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
Pdf.add_dict_entry
|
|
|
|
page.Pdfpage.rest box
|
|
|
|
(Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub set_box_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* \section{Cropping} *)
|
2019-06-26 15:43:24 +02:00
|
|
|
let crop_pdf ?(box="/CropBox") xywhlist pdf range =
|
2016-07-21 18:02:11 +02:00
|
|
|
let crop_page pagenum page =
|
2013-08-20 16:32:57 +02:00
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.add_dict_entry
|
|
|
|
page.Pdfpage.rest
|
2019-06-26 15:43:24 +02:00
|
|
|
box
|
2016-07-21 18:02:11 +02:00
|
|
|
(let x, y, w, h = List.nth xywhlist (pagenum - 1) in
|
|
|
|
(Pdf.Array
|
|
|
|
[Pdf.Real x; Pdf.Real y;
|
|
|
|
Pdf.Real (x +. w); Pdf.Real (y +. h)])))}
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub crop_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2017-05-19 20:10:49 +02:00
|
|
|
(* Clip a page to one of its boxes, or the media box if that box is not
|
|
|
|
* present. This is a hard clip, done by using a clipping rectangle, so that
|
|
|
|
* the page may then be used as a stamp without extraneous material reapearing.
|
|
|
|
* *)
|
|
|
|
let hard_box pdf range boxname mediabox_if_missing fast =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages
|
2021-05-27 18:31:12 +02:00
|
|
|
(ppstub (fun pagenum page ->
|
2017-05-19 20:10:49 +02:00
|
|
|
let minx, miny, maxx, maxy =
|
|
|
|
if boxname = "/MediaBox" then
|
|
|
|
Pdf.parse_rectangle page.Pdfpage.mediabox
|
|
|
|
else
|
|
|
|
match Pdf.lookup_direct pdf boxname page.Pdfpage.rest with
|
|
|
|
| Some a -> Pdf.parse_rectangle a
|
|
|
|
| _ ->
|
|
|
|
if mediabox_if_missing
|
|
|
|
then Pdf.parse_rectangle page.Pdfpage.mediabox
|
2019-08-01 15:10:02 +02:00
|
|
|
else error (Printf.sprintf "hard_box: box %s not found" boxname)
|
2017-05-19 20:10:49 +02:00
|
|
|
in
|
2017-05-20 14:23:32 +02:00
|
|
|
let ops = [Pdfops.Op_re (minx, miny, maxx -. minx, maxy -. miny); Pdfops.Op_W; Pdfops.Op_n] in
|
2021-10-26 17:18:09 +02:00
|
|
|
Pdfpage.prepend_operators pdf ops ~fast page))
|
2017-05-19 20:10:49 +02:00
|
|
|
pdf
|
|
|
|
range
|
|
|
|
|
2013-08-20 16:32:57 +02:00
|
|
|
let remove_cropping_pdf pdf range =
|
|
|
|
let remove_cropping_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub remove_cropping_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let remove_trim_pdf pdf range =
|
|
|
|
let remove_trim_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub remove_trim_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let remove_art_pdf pdf range =
|
|
|
|
let remove_art_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub remove_art_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let remove_bleed_pdf pdf range =
|
|
|
|
let remove_bleed_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub remove_bleed_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* \section{Rotating pages} *)
|
|
|
|
let rotate_pdf r pdf range =
|
|
|
|
let rotate_page _ page =
|
|
|
|
{page with Pdfpage.rotate =
|
|
|
|
Pdfpage.rotation_of_int r}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub rotate_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let rotate_pdf_by r pdf range =
|
|
|
|
let rotate_page_by _ page =
|
|
|
|
{page with Pdfpage.rotate =
|
|
|
|
Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub rotate_page_by) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2021-05-31 19:10:39 +02:00
|
|
|
let rotate_page_contents ~fast rotpoint r pdf pnum page =
|
2013-08-20 16:32:57 +02:00
|
|
|
let rotation_point =
|
|
|
|
match rotpoint with
|
|
|
|
| None ->
|
|
|
|
let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in
|
|
|
|
(minx +. maxx) /. 2., (miny +. maxy) /. 2.
|
|
|
|
| Some point -> point
|
|
|
|
in
|
|
|
|
let tr =
|
|
|
|
Pdftransform.matrix_of_op
|
|
|
|
(Pdftransform.Rotate (rotation_point, -.(rad_of_deg r)))
|
|
|
|
in let tr2 =
|
|
|
|
Pdftransform.matrix_of_op
|
|
|
|
(Pdftransform.Rotate (rotation_point, rad_of_deg r))
|
|
|
|
in
|
|
|
|
let transform_op = Pdfops.Op_cm tr in
|
2019-07-16 14:05:06 +02:00
|
|
|
let page = change_pattern_matrices_page pdf tr2 page in
|
|
|
|
transform_annotations pdf tr page.Pdfpage.rest;
|
2021-05-31 19:10:39 +02:00
|
|
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr)
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let rotate_contents ?(fast=false) r pdf range =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (rotate_page_contents ~fast None r pdf) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* Return the pages from the pdf in the range, unordered. *)
|
|
|
|
let select_pages range pdf =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
option_map (function n -> try Some (select n pages) with _ -> None) range
|
|
|
|
|
|
|
|
|
|
|
|
(* Upright functionality *)
|
|
|
|
|
2015-06-04 14:35:56 +02:00
|
|
|
(* If all pages are already upright, and the mediabox is (0,0)-based, do nothing
|
|
|
|
to save time. *)
|
2013-08-20 16:32:57 +02:00
|
|
|
let allupright range pdf =
|
|
|
|
let page_is_upright page =
|
2015-06-04 14:35:56 +02:00
|
|
|
page.Pdfpage.rotate = Pdfpage.Rotate0 &&
|
|
|
|
(let (minx, miny, _, _) = Pdf.parse_rectangle page.Pdfpage.mediabox in
|
|
|
|
minx < 0.001 && miny < 0.001 && minx > ~-.0.001 && miny > ~-.0.001)
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
|
|
|
not (mem false (map page_is_upright (select_pages range pdf)))
|
|
|
|
|
|
|
|
let upright_transform page =
|
|
|
|
let rotate =
|
|
|
|
Pdfpage.int_of_rotation page.Pdfpage.rotate
|
|
|
|
and cx, cy =
|
|
|
|
let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in
|
|
|
|
(minx +. maxx) /. 2., (miny +. maxy) /. 2.
|
|
|
|
in
|
|
|
|
Pdftransform.mkrotate (cx, cy) (rad_of_deg (~-.(float rotate)))
|
|
|
|
|
|
|
|
let transform_boxes tr pdf page =
|
|
|
|
let f (minx, miny, maxx, maxy) =
|
|
|
|
let minx, miny = Pdftransform.transform_matrix tr (minx, miny)
|
|
|
|
and maxx, maxy = Pdftransform.transform_matrix tr (maxx, maxy) in
|
|
|
|
(minx, miny, maxx, maxy)
|
|
|
|
in
|
|
|
|
change_boxes f pdf page
|
|
|
|
|
|
|
|
let transform_contents ?(fast=false) tr pdf page =
|
|
|
|
let transform_op = Pdfops.Op_cm tr in
|
2019-07-16 14:05:06 +02:00
|
|
|
let page = change_pattern_matrices_page pdf (Pdftransform.matrix_invert tr) page in
|
2019-07-10 14:16:32 +02:00
|
|
|
transform_annotations pdf tr page.Pdfpage.rest;
|
2019-07-16 14:05:06 +02:00
|
|
|
Pdfpage.prepend_operators pdf [transform_op] ~fast page
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let upright ?(fast=false) range pdf =
|
|
|
|
if allupright range pdf then pdf else
|
2021-05-31 19:10:39 +02:00
|
|
|
let upright_page _ pnum page =
|
2013-08-20 16:32:57 +02:00
|
|
|
let tr = upright_transform page in
|
|
|
|
let page = transform_boxes tr pdf page in
|
|
|
|
let page = transform_contents ~fast tr pdf page in
|
2021-05-31 19:10:39 +02:00
|
|
|
(rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr)
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (upright_page pdf) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* \section{Scale page data} *)
|
2016-07-21 18:02:11 +02:00
|
|
|
let scale_pdf ?(fast=false) sxsylist pdf range =
|
|
|
|
let scale_page pnum page =
|
|
|
|
let sx, sy = List.nth sxsylist (pnum - 1) in
|
|
|
|
let f (xmin, ymin, xmax, ymax) =
|
|
|
|
xmin *. sx, ymin *. sy, xmax *. sx, ymax *. sy
|
|
|
|
in
|
|
|
|
let page = change_boxes f pdf page
|
|
|
|
and matrix = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), sx, sy)) in
|
|
|
|
let transform_op =
|
|
|
|
Pdfops.Op_cm matrix
|
2019-07-16 14:05:06 +02:00
|
|
|
and page =
|
|
|
|
change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page
|
2016-07-21 18:02:11 +02:00
|
|
|
in
|
2019-07-10 14:16:32 +02:00
|
|
|
transform_annotations pdf matrix page.Pdfpage.rest;
|
2021-05-28 19:00:27 +02:00
|
|
|
(Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix)
|
2016-07-21 18:02:11 +02:00
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages scale_page pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* Scale to fit page of size x * y *)
|
2019-03-19 15:03:28 +01:00
|
|
|
let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range =
|
2016-07-21 18:02:11 +02:00
|
|
|
let scale_page_to_fit pnum page =
|
|
|
|
let x, y = List.nth xylist (pnum - 1) in
|
2013-08-20 16:32:57 +02:00
|
|
|
let matrix =
|
|
|
|
let (minx, miny, maxx, maxy) =
|
|
|
|
(* Use cropbox if available *)
|
|
|
|
Pdf.parse_rectangle
|
|
|
|
(match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
|
|
|
|
| Some r -> r
|
|
|
|
| None -> page.Pdfpage.mediabox)
|
|
|
|
in
|
|
|
|
if maxx <= 0. || maxy <= 0. then failwith "Zero-sized pages are invalid" else
|
|
|
|
let fx = x /. maxx in let fy = y /. maxy in
|
|
|
|
let scale = fmin fx fy *. input_scale in
|
2019-03-19 15:03:28 +01:00
|
|
|
let trans_x =
|
|
|
|
match position with
|
2021-08-12 21:38:55 +02:00
|
|
|
Cpdfposition.Left _ -> 0.
|
|
|
|
| Cpdfposition.Right _ -> (x -. (maxx *. scale))
|
2019-03-19 15:03:28 +01:00
|
|
|
| _ -> (x -. (maxx *. scale)) /. 2.
|
|
|
|
and trans_y =
|
|
|
|
match position with
|
2021-08-12 21:38:55 +02:00
|
|
|
| Cpdfposition.Top _ -> (y -. (maxy *. scale))
|
|
|
|
| Cpdfposition.Bottom _ -> 0.
|
2019-03-19 15:03:28 +01:00
|
|
|
| _ -> (y -. (maxy *. scale)) /. 2.
|
|
|
|
in
|
2013-08-20 16:32:57 +02:00
|
|
|
(Pdftransform.matrix_of_transform
|
|
|
|
[Pdftransform.Translate (trans_x, trans_y);
|
|
|
|
Pdftransform.Scale ((0., 0.), scale, scale)])
|
|
|
|
in
|
|
|
|
let page =
|
|
|
|
change_boxes
|
2016-11-13 19:01:01 +01:00
|
|
|
(function (minx, miny, maxx, maxy) -> 0., 0., x, y)
|
2013-08-20 16:32:57 +02:00
|
|
|
pdf page
|
|
|
|
in
|
2019-07-10 14:16:32 +02:00
|
|
|
transform_annotations pdf matrix page.Pdfpage.rest;
|
2021-05-31 19:10:39 +02:00
|
|
|
(Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast
|
|
|
|
(change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page), pnum, matrix)
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages scale_page_to_fit pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* Scale contents *)
|
2021-05-31 19:10:39 +02:00
|
|
|
let scale_page_contents ?(fast=false) scale position pdf pnum page =
|
2013-08-20 16:32:57 +02:00
|
|
|
let (minx, miny, maxx, maxy) as box =
|
|
|
|
(* Use cropbox if available *)
|
|
|
|
Pdf.parse_rectangle
|
|
|
|
(match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
|
|
|
|
| Some r -> r
|
|
|
|
| None -> page.Pdfpage.mediabox)
|
|
|
|
in
|
2021-08-12 21:38:55 +02:00
|
|
|
let sx, sy, _ = Cpdfposition.calculate_position true 0. box Horizontal position in
|
2013-08-20 16:32:57 +02:00
|
|
|
let tx, ty =
|
2021-08-12 21:38:55 +02:00
|
|
|
let open Cpdfposition in
|
2013-08-20 16:32:57 +02:00
|
|
|
match position with
|
|
|
|
| Top t -> 0., -.t
|
|
|
|
| TopLeft t -> t, -.t
|
2021-11-22 21:20:48 +01:00
|
|
|
| TopRight t -> -.t, -.t
|
2013-08-20 16:32:57 +02:00
|
|
|
| Left t -> t, 0.
|
|
|
|
| BottomLeft t -> t, t
|
|
|
|
| Bottom t -> 0., t
|
2021-11-22 21:20:48 +01:00
|
|
|
| BottomRight t -> -.t, t
|
2013-08-20 16:32:57 +02:00
|
|
|
| Right t -> -.t, 0.
|
|
|
|
| _ -> 0., 0. (* centre it... FIXME: We will add a center position, eventually, for text and this... *)
|
|
|
|
in
|
|
|
|
let transform =
|
|
|
|
Pdftransform.matrix_of_transform
|
|
|
|
[Pdftransform.Translate (tx, ty);
|
|
|
|
Pdftransform.Scale ((sx, sy), scale, scale)]
|
|
|
|
in
|
|
|
|
let transform_op = Pdfops.Op_cm transform in
|
2019-07-16 14:05:06 +02:00
|
|
|
let page = change_pattern_matrices_page pdf transform page in
|
2019-07-10 12:18:01 +02:00
|
|
|
transform_annotations pdf transform page.Pdfpage.rest;
|
2021-05-31 19:10:39 +02:00
|
|
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform)
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
let scale_contents ?(fast=false) position scale pdf range =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (scale_page_contents ~fast scale position pdf) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2021-10-19 17:18:15 +02:00
|
|
|
(* Imposition *)
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2019-07-11 16:35:35 +02:00
|
|
|
(* Union two rest dictionaries from the same PDF. *)
|
|
|
|
let combine_pdf_rests pdf a b =
|
|
|
|
let a_entries =
|
|
|
|
match a with
|
|
|
|
| Pdf.Dictionary entries -> entries
|
|
|
|
| _ -> []
|
|
|
|
in let b_entries =
|
|
|
|
match b with
|
|
|
|
| Pdf.Dictionary entries -> entries
|
|
|
|
| _ -> []
|
|
|
|
in
|
2021-10-26 16:36:56 +02:00
|
|
|
let keys_to_combine = ["/Annots"] in
|
2019-07-11 16:35:35 +02:00
|
|
|
let combine_entries key =
|
|
|
|
let a_entries =
|
|
|
|
match Pdf.lookup_direct pdf key a with
|
|
|
|
| Some (Pdf.Array d) -> d
|
|
|
|
| _ -> []
|
|
|
|
in let b_entries =
|
|
|
|
match Pdf.lookup_direct pdf key b with
|
|
|
|
| Some (Pdf.Array d) -> d
|
|
|
|
| _ -> []
|
|
|
|
in
|
|
|
|
if a_entries = [] && b_entries = [] then
|
|
|
|
None
|
|
|
|
else
|
|
|
|
Some (key, Pdf.Array (a_entries @ b_entries))
|
|
|
|
in
|
|
|
|
let unknown_keys_a = lose (fun (k, _) -> mem k keys_to_combine) a_entries in
|
|
|
|
let unknown_keys_b = lose (fun (k, _) -> mem k keys_to_combine) b_entries in
|
|
|
|
let combined_known_entries = option_map combine_entries keys_to_combine in
|
2021-10-25 19:27:40 +02:00
|
|
|
fold_left
|
|
|
|
(fun dict (k, v) -> Pdf.add_dict_entry dict k v)
|
|
|
|
(Pdf.Dictionary [])
|
|
|
|
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)
|
2021-10-19 17:18:15 +02:00
|
|
|
|
|
|
|
(* Calculate the transformation matrices for a single imposed output page. *)
|
2021-10-25 16:35:35 +02:00
|
|
|
|
|
|
|
(* make margins by scaling for a fitted impose. *)
|
2021-10-21 14:51:14 +02:00
|
|
|
let make_margin output_mediabox margin tr =
|
|
|
|
if margin = 0. then tr else
|
|
|
|
let width, height =
|
|
|
|
match Pdf.parse_rectangle output_mediabox with
|
|
|
|
xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin
|
|
|
|
in
|
|
|
|
if margin > width /. 2. || margin > height /. 2. then error "margin would fill whole page!" else
|
2021-10-21 19:10:47 +02:00
|
|
|
let hfactor = (width -. margin -. margin) /. width in
|
|
|
|
let vfactor = (height -. margin -. margin) /. height in
|
|
|
|
let factor = fmin hfactor vfactor in
|
2021-10-21 14:51:14 +02:00
|
|
|
let scale = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), factor, factor)) in
|
2021-10-21 19:10:47 +02:00
|
|
|
let shift =
|
|
|
|
Pdftransform.matrix_of_op (Pdftransform.Translate ((width -. width *. factor) /. 2.,
|
|
|
|
(height -. height *. factor) /. 2.))
|
|
|
|
in
|
2021-10-21 14:51:14 +02:00
|
|
|
(Pdftransform.matrix_compose shift (Pdftransform.matrix_compose scale tr))
|
|
|
|
|
2021-11-12 22:59:24 +01:00
|
|
|
(* FIXME fixup -center for next release. For now it has been disabled. *)
|
2021-10-26 16:09:07 +02:00
|
|
|
let impose_transforms fit fx fy columns rtl btt center margin mediabox output_mediabox fit_extra_hspace fit_extra_vspace len =
|
2021-10-19 17:18:15 +02:00
|
|
|
let width, height =
|
|
|
|
match Pdf.parse_rectangle mediabox with
|
|
|
|
xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin
|
|
|
|
in
|
|
|
|
let trs = ref [] in
|
2021-10-19 18:55:03 +02:00
|
|
|
let len = ref len in
|
2021-10-21 19:10:47 +02:00
|
|
|
let cent_extra_x = ref 0. in
|
|
|
|
let cent_extra_y = ref 0. in
|
2021-10-25 15:07:05 +02:00
|
|
|
let addtr x y row col px py =
|
|
|
|
let cex, cey =
|
2021-10-21 19:10:47 +02:00
|
|
|
(if rtl then ~-.(!cent_extra_x) else !cent_extra_x), (if btt then ~-.(!cent_extra_y) else !cent_extra_y)
|
2021-10-19 19:43:12 +02:00
|
|
|
in
|
2021-10-25 15:07:05 +02:00
|
|
|
let spacecol = if rtl then x - col - 1 else col in
|
|
|
|
let total_fit_extra_hspace = fit_extra_hspace *. (float_of_int spacecol +. 1.) in
|
2021-10-25 15:23:21 +02:00
|
|
|
let total_fit_extra_vspace = fit_extra_vspace *. (float_of_int row +. 1.) in
|
|
|
|
(*Printf.printf "row = %i, py = %f, ey = %f, fit_extra_vspace = %f, total_fit_extra_vspace = %f\n" row py cey fit_extra_vspace total_fit_extra_vspace;*)
|
2021-10-21 19:10:47 +02:00
|
|
|
trs :=
|
|
|
|
Pdftransform.matrix_of_transform
|
2021-10-25 15:07:05 +02:00
|
|
|
[Pdftransform.Translate (px +. cex +. total_fit_extra_hspace, py +. cey +. total_fit_extra_vspace)]
|
2021-10-21 19:10:47 +02:00
|
|
|
::!trs
|
2021-10-19 17:56:39 +02:00
|
|
|
in
|
2021-10-19 18:55:03 +02:00
|
|
|
let x = int_of_float fx in
|
|
|
|
let y = int_of_float fy in
|
2021-10-21 19:36:23 +02:00
|
|
|
let final_full_cols = !len mod x in
|
2021-10-25 15:45:55 +02:00
|
|
|
let final_full_rows = !len mod y in
|
2021-10-19 17:56:39 +02:00
|
|
|
let order row col =
|
2021-10-19 18:55:03 +02:00
|
|
|
((if btt then y - row - 1 else row), (if rtl then x - col - 1 else col))
|
2021-10-19 17:56:39 +02:00
|
|
|
in
|
|
|
|
if columns then
|
2021-10-19 17:18:15 +02:00
|
|
|
for col = 0 to x - 1 do
|
2021-10-21 19:10:47 +02:00
|
|
|
if center && !len < y then if !cent_extra_y = 0. then cent_extra_y := ~-.(height *. float_of_int (y - !len)) /. 2.;
|
2021-10-19 17:56:39 +02:00
|
|
|
for row = y - 1 downto 0 do
|
2021-10-25 15:45:55 +02:00
|
|
|
let original_row = row in
|
2021-10-19 17:56:39 +02:00
|
|
|
let row, col = order row col in
|
2021-10-25 15:45:55 +02:00
|
|
|
let adjusted_row =
|
|
|
|
let final_empty_rows = y - final_full_rows in
|
|
|
|
if center && !len <= final_full_rows then original_row + (y - 1 - 1 - (final_empty_rows / 2)) else original_row
|
|
|
|
in
|
|
|
|
if !len > 0 then addtr x y adjusted_row col (width *. float_of_int col) (height *. float_of_int row);
|
|
|
|
len := !len - 1
|
2021-10-19 17:56:39 +02:00
|
|
|
done
|
2021-10-19 17:18:15 +02:00
|
|
|
done
|
2021-10-19 17:56:39 +02:00
|
|
|
else
|
|
|
|
for row = y - 1 downto 0 do
|
2021-10-21 19:10:47 +02:00
|
|
|
if center && !len < x then if !cent_extra_x = 0. then cent_extra_x := (width *. float_of_int (x - !len)) /. 2.;
|
2021-10-19 17:56:39 +02:00
|
|
|
for col = 0 to x - 1 do
|
2021-10-21 19:36:23 +02:00
|
|
|
let original_col = col in
|
2021-10-19 17:56:39 +02:00
|
|
|
let row, col = order row col in
|
2021-10-21 19:10:47 +02:00
|
|
|
let adjusted_col =
|
2021-10-21 19:36:23 +02:00
|
|
|
let final_empty_cols = x - final_full_cols in
|
|
|
|
if center && !len <= final_full_cols then original_col + (x - 1 - 1 - (final_empty_cols / 2)) else original_col
|
2021-10-21 19:10:47 +02:00
|
|
|
in
|
2021-10-25 15:45:55 +02:00
|
|
|
if !len > 0 then addtr x y row adjusted_col (width *. float_of_int col) (height *. float_of_int row);
|
|
|
|
len := !len - 1
|
2021-10-19 17:56:39 +02:00
|
|
|
done
|
|
|
|
done;
|
2021-10-25 16:35:35 +02:00
|
|
|
map (if fit then make_margin output_mediabox margin else Fun.id) (rev !trs)
|
2021-10-19 17:18:15 +02:00
|
|
|
|
2019-07-08 16:42:19 +02:00
|
|
|
(* Combine two pages into one throughout the document. The pages have already
|
2021-10-19 17:18:15 +02:00
|
|
|
had their objects renumbered so as not to clash. *)
|
2021-10-26 16:09:07 +02:00
|
|
|
let impose_pages fit x y columns rtl btt center margin output_mediabox fast fit_extra_hspace fit_extra_vspace pdf = function
|
2013-08-20 16:32:57 +02:00
|
|
|
| [] -> assert false
|
|
|
|
| (h::_) as pages ->
|
2019-07-11 16:35:35 +02:00
|
|
|
let transforms =
|
2021-10-26 17:39:26 +02:00
|
|
|
impose_transforms
|
|
|
|
fit x y columns rtl btt center margin h.Pdfpage.mediabox
|
|
|
|
output_mediabox fit_extra_hspace fit_extra_vspace (length pages)
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2019-07-11 16:35:35 +02:00
|
|
|
(* Change the pattern matrices before combining resources *)
|
|
|
|
let pages, h =
|
2021-10-19 17:18:15 +02:00
|
|
|
let r = map2 (fun p t -> change_pattern_matrices_page pdf t p) pages transforms in
|
|
|
|
(r, List.hd r)
|
2019-07-11 16:35:35 +02:00
|
|
|
in
|
|
|
|
let resources' = pair_reduce (combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in
|
|
|
|
let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in
|
2013-08-20 16:32:57 +02:00
|
|
|
let content' =
|
2021-10-26 17:18:09 +02:00
|
|
|
let transform_stream transform contents =
|
|
|
|
(* If fast, no mismatched q/Q protection and no parsing of operators. *)
|
|
|
|
if fast then
|
2021-10-26 17:39:26 +02:00
|
|
|
[Pdfops.stream_of_ops [Pdfops.Op_q; Pdfops.Op_cm transform]] @ contents @ [Pdfops.stream_of_ops [Pdfops.Op_Q]]
|
2021-10-26 17:18:09 +02:00
|
|
|
else
|
|
|
|
(* If slow, use protect from Pdfpage. *)
|
|
|
|
let ops = Pdfpage.protect pdf resources' contents @ Pdfops.parse_operators pdf resources' contents in
|
2021-10-26 17:39:26 +02:00
|
|
|
[Pdfops.stream_of_ops ([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ ops @ [Pdfops.Op_Q])]
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2020-03-04 19:50:32 +01:00
|
|
|
flatten
|
2019-07-09 16:03:53 +02:00
|
|
|
(map2
|
2021-10-26 17:39:26 +02:00
|
|
|
(fun p t -> transform_annotations pdf t p.Pdfpage.rest; transform_stream t p.Pdfpage.content)
|
|
|
|
pages
|
|
|
|
transforms)
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2021-10-21 14:51:14 +02:00
|
|
|
{Pdfpage.mediabox = output_mediabox;
|
2013-08-20 16:32:57 +02:00
|
|
|
Pdfpage.rotate = h.Pdfpage.rotate;
|
|
|
|
Pdfpage.content = content';
|
2019-07-08 16:42:19 +02:00
|
|
|
Pdfpage.resources = resources';
|
2021-10-19 17:18:15 +02:00
|
|
|
Pdfpage.rest = rest'}
|
|
|
|
|
2021-10-25 19:48:28 +02:00
|
|
|
(* For fit, we scale contents, move to middle and retain page size. For xy, we
|
2021-10-26 17:18:09 +02:00
|
|
|
expand mediabox and move contents to middle. This function also does the hard boxing. *)
|
2021-10-25 19:48:28 +02:00
|
|
|
let make_space fit ~fast spacing pdf =
|
2021-10-25 19:27:40 +02:00
|
|
|
let endpage = Pdfpage.endpage pdf in
|
|
|
|
let all = ilist 1 endpage in
|
2021-10-26 17:18:09 +02:00
|
|
|
let pdf = hard_box pdf all "/MediaBox" false fast in
|
|
|
|
if spacing = 0. then pdf else
|
|
|
|
let margin = spacing /. 2. in
|
2021-10-25 19:27:40 +02:00
|
|
|
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
|
2021-10-25 19:48:28 +02:00
|
|
|
let width, height =
|
2021-10-25 19:27:40 +02:00
|
|
|
match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with
|
2021-10-25 19:48:28 +02:00
|
|
|
xmin, ymin, xmax, ymax -> (xmax -. xmin, ymax -. ymin)
|
2021-10-25 19:27:40 +02:00
|
|
|
in
|
2021-10-25 19:48:28 +02:00
|
|
|
if fit then
|
2021-10-26 17:18:09 +02:00
|
|
|
(shift_pdf
|
2021-10-25 19:27:40 +02:00
|
|
|
~fast
|
|
|
|
(many (margin, margin) endpage)
|
2021-10-25 19:48:28 +02:00
|
|
|
(scale_contents ~fast (Cpdfposition.BottomLeft 0.) ((width -. spacing) /. width) pdf all)
|
2021-10-26 17:18:09 +02:00
|
|
|
all)
|
2021-10-25 19:48:28 +02:00
|
|
|
else
|
2021-10-26 17:18:09 +02:00
|
|
|
(set_mediabox
|
2021-10-25 19:48:28 +02:00
|
|
|
(many (0., 0., width +. spacing, height +. spacing) endpage)
|
2021-10-26 17:18:09 +02:00
|
|
|
(shift_pdf ~fast (many (margin, margin) endpage) pdf all) all)
|
2021-10-25 19:27:40 +02:00
|
|
|
|
2021-10-26 16:09:07 +02:00
|
|
|
(* We add the border as a thick unfilled rectangle just inside the page edge,
|
|
|
|
only if its linewidth is > 0 since, for us, 0 means none, not single-pixel
|
|
|
|
like in PDF. *)
|
|
|
|
let add_border linewidth ~fast pdf =
|
|
|
|
if linewidth = 0. then pdf else
|
|
|
|
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
|
|
|
|
let _, _, w, h = Pdf.parse_rectangle firstpage.Pdfpage.mediabox in
|
2021-12-21 15:00:58 +01:00
|
|
|
Cpdfaddtext.addrectangle
|
2021-11-15 20:17:15 +01:00
|
|
|
fast (w -. linewidth, h -. linewidth) (RGB (0., 0., 0.)) true linewidth 1. (Cpdfposition.BottomLeft (linewidth /. 2.))
|
2021-10-26 16:09:07 +02:00
|
|
|
false false (ilist 1 (Pdfpage.endpage pdf)) pdf
|
|
|
|
|
2021-10-19 17:18:15 +02:00
|
|
|
let impose ~x ~y ~fit ~columns ~rtl ~btt ~center ~margin ~spacing ~linewidth ~fast pdf =
|
2021-10-26 17:18:09 +02:00
|
|
|
let endpage = Pdfpage.endpage pdf in
|
|
|
|
let pagenums = ilist 1 endpage in
|
|
|
|
let pdf = copy_cropbox_to_mediabox pdf pagenums in
|
|
|
|
let pdf = remove_cropping_pdf pdf pagenums in
|
|
|
|
let pdf = upright pagenums pdf in
|
2021-10-26 16:09:07 +02:00
|
|
|
let pdf = add_border linewidth ~fast pdf in
|
2021-10-25 19:48:28 +02:00
|
|
|
let pdf = make_space fit ~fast spacing pdf in
|
2021-10-20 19:51:53 +02:00
|
|
|
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
|
|
|
|
let _, _, w, h = Pdf.parse_rectangle firstpage.Pdfpage.mediabox in
|
2021-10-19 20:26:02 +02:00
|
|
|
let ix = int_of_float x in
|
|
|
|
let iy = int_of_float y in
|
2021-10-21 19:10:47 +02:00
|
|
|
let n, ix, iy, fit_extra_hspace, fit_extra_vspace =
|
2021-10-20 19:51:53 +02:00
|
|
|
if fit then
|
|
|
|
(* +. 0.001 ensures a page always fits on itself, or on another page of same height or width. *)
|
|
|
|
let across = int_of_float (floor (x /. w +. 0.001)) in
|
|
|
|
let down = int_of_float (floor (y /. h +. 0.001)) in
|
|
|
|
if across < 1 || down < 1 then error "Not even a single page would fit." else
|
2021-10-21 19:10:47 +02:00
|
|
|
let excess_hspace = x -. float_of_int across *. w in
|
|
|
|
let excess_vspace = y -. float_of_int down *. h in
|
2021-10-25 15:45:55 +02:00
|
|
|
(*Printf.printf "across = %i, down =%i, excess_hspace = %f, excess_hspace = %f\n" across down excess_hspace excess_vspace;*)
|
2021-10-21 19:10:47 +02:00
|
|
|
(across * down,
|
|
|
|
across,
|
|
|
|
down,
|
|
|
|
excess_hspace /. (float_of_int across +. 1.),
|
|
|
|
excess_vspace /. (float_of_int down +. 1.))
|
2021-10-20 19:51:53 +02:00
|
|
|
else
|
|
|
|
if ix = 0 && iy = 0 then error "impose-xy: both dimensions cannot be zero" else
|
2021-10-21 19:10:47 +02:00
|
|
|
if ix = 0 then (endpage, endpage, 1, 0., 0.)
|
|
|
|
else if iy = 0 then (endpage, 1, endpage, 0., 0.)
|
|
|
|
else (ix * iy, ix, iy, 0., 0.)
|
2021-10-19 20:26:02 +02:00
|
|
|
in
|
2021-10-19 17:18:15 +02:00
|
|
|
let mediabox' =
|
|
|
|
if fit then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real x; Pdf.Real y] else
|
2021-10-25 16:35:35 +02:00
|
|
|
let m2 = margin *. 2. in
|
|
|
|
if x = 0.0 then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w *. float_of_int endpage +. m2); Pdf.Real (h +. m2)]
|
|
|
|
else if y = 0.0 then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w +. m2); Pdf.Real (h *. float_of_int endpage +. m2)]
|
|
|
|
else Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w *. x +. m2); Pdf.Real (h *. y +. m2)]
|
2021-10-19 17:18:15 +02:00
|
|
|
in
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
let pagesets = splitinto n pages in
|
|
|
|
let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in
|
2021-10-25 16:35:35 +02:00
|
|
|
let pages =
|
2021-10-20 19:51:53 +02:00
|
|
|
map
|
2021-10-25 16:35:35 +02:00
|
|
|
(impose_pages fit (float_of_int ix) (float_of_int iy) columns rtl btt
|
2021-10-26 16:09:07 +02:00
|
|
|
center margin mediabox' fast fit_extra_hspace fit_extra_vspace pdf)
|
2021-10-20 19:51:53 +02:00
|
|
|
renumbered
|
|
|
|
in
|
2021-10-19 17:18:15 +02:00
|
|
|
let changes = map (fun x -> (x, (x + (n - 1)) / n)) pagenums in
|
2021-10-25 16:35:35 +02:00
|
|
|
let pdf = Pdfpage.change_pages ~changes true pdf pages in
|
|
|
|
if fit then pdf else shift_pdf ~fast (many (margin, margin) (length pages)) pdf (ilist 1 (Pdfpage.endpage pdf))
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2021-10-20 19:51:53 +02:00
|
|
|
(* Legacy -twoup-stack. Impose 2x1 on a page twice the size then rotate. *)
|
2021-10-20 15:00:33 +02:00
|
|
|
let twoup_stack fast pdf =
|
|
|
|
let pdf =
|
|
|
|
impose
|
|
|
|
~x:2. ~y:1. ~fit:false ~columns:false ~rtl:false ~btt:false ~center:false
|
|
|
|
~margin:0. ~spacing:0. ~linewidth:0. ~fast pdf
|
|
|
|
in
|
|
|
|
let all = ilist 1 (Pdfpage.endpage pdf) in
|
|
|
|
upright ~fast all (rotate_pdf ~-90 pdf all)
|
2015-05-31 15:01:38 +02:00
|
|
|
|
2021-10-25 15:45:55 +02:00
|
|
|
(* Legacy -two-up. Rotate the pages and shrink them so as to fit 2x1 on a page the same size. *)
|
2021-10-20 15:00:33 +02:00
|
|
|
let twoup fast pdf =
|
|
|
|
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
|
|
|
|
let width, height =
|
|
|
|
match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with
|
|
|
|
xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin
|
|
|
|
in
|
|
|
|
let width_exceeds_height = width > height in
|
|
|
|
let sc =
|
|
|
|
if width_exceeds_height
|
|
|
|
then fmin (height /. width) ((width /. 2.) /. height)
|
|
|
|
else fmin (width /. height) ((height /. 2.) /. width)
|
|
|
|
in
|
|
|
|
let endpage = Pdfpage.endpage pdf in
|
|
|
|
let all = ilist 1 endpage in
|
|
|
|
let pdf = scale_pdf ~fast (many (sc, sc) endpage) pdf all in
|
|
|
|
let pdf =
|
|
|
|
impose
|
|
|
|
~x:2. ~y:1. ~fit:false ~columns:false ~rtl:false ~btt:false ~center:true
|
|
|
|
~margin:0. ~spacing:0. ~linewidth:0. ~fast pdf
|
|
|
|
in
|
|
|
|
let endpage = Pdfpage.endpage pdf in
|
|
|
|
let all = ilist 1 endpage in
|
|
|
|
let pdf = upright all (rotate_pdf ~-90 pdf all) in
|
|
|
|
scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all
|
2015-05-31 15:01:38 +02:00
|
|
|
|
2013-08-20 16:32:57 +02:00
|
|
|
(* \section{Blacken text} *)
|
|
|
|
|
|
|
|
(*
|
|
|
|
\begin{verbatim}
|
|
|
|
Algorithm: Change
|
|
|
|
BT
|
|
|
|
<ops>
|
|
|
|
ET
|
|
|
|
|
|
|
|
...to...
|
|
|
|
|
|
|
|
BT
|
|
|
|
Op_g 0.
|
|
|
|
<ops minus any color, shading or gs operators>
|
|
|
|
ET
|
|
|
|
<ops minus any text positioning or text rendering ones>
|
|
|
|
\end{verbatim}
|
|
|
|
*)
|
2021-11-15 20:17:15 +01:00
|
|
|
let blacktext_ops colour pdf resources content =
|
2013-08-20 16:32:57 +02:00
|
|
|
let not_text = function
|
|
|
|
| Pdfops.Op_Tj _ | Pdfops.Op_TJ _
|
|
|
|
| Pdfops.Op_' _ | Pdfops.Op_'' (_, _, _)
|
|
|
|
| Pdfops.Op_Td (_, _) | Pdfops.Op_TD (_, _)
|
|
|
|
| Pdfops.Op_Tm _ | Pdfops.Op_T'
|
|
|
|
| Pdfops.Op_Tc _
|
|
|
|
| Pdfops.Op_Tw _
|
|
|
|
| Pdfops.Op_Tz _
|
|
|
|
| Pdfops.Op_TL _
|
|
|
|
| Pdfops.Op_Tf (_, _)
|
|
|
|
| Pdfops.Op_Tr _
|
|
|
|
| Pdfops.Op_Ts _ -> false
|
|
|
|
| _ -> true
|
|
|
|
in let textlevel = ref 0
|
|
|
|
in let removed = ref []
|
|
|
|
in let operators =
|
|
|
|
Pdfops.parse_operators pdf resources content
|
|
|
|
in
|
|
|
|
let rec remove_colourops prev = function
|
|
|
|
| [] -> rev prev
|
|
|
|
| Pdfops.Op_BT::more ->
|
|
|
|
incr textlevel;
|
|
|
|
remove_colourops
|
2021-12-21 15:00:58 +01:00
|
|
|
(Cpdfaddtext.colour_op colour::Pdfops.Op_BT::prev)
|
2013-08-20 16:32:57 +02:00
|
|
|
more
|
|
|
|
| Pdfops.Op_ET::more ->
|
|
|
|
decr textlevel;
|
|
|
|
let prev' = !removed @ Pdfops.Op_ET::prev in
|
|
|
|
removed := [];
|
|
|
|
remove_colourops prev' more
|
|
|
|
| (Pdfops.Op_G _
|
|
|
|
| Pdfops.Op_g _
|
|
|
|
| Pdfops.Op_RG (_, _, _)
|
|
|
|
| Pdfops.Op_rg (_, _, _)
|
|
|
|
| Pdfops.Op_k (_, _, _, _)
|
|
|
|
| Pdfops.Op_K (_, _, _, _)
|
|
|
|
| Pdfops.Op_SCN _
|
|
|
|
| Pdfops.Op_SC _
|
|
|
|
| Pdfops.Op_scn _
|
|
|
|
| Pdfops.Op_sc _
|
|
|
|
| Pdfops.Op_SCNName (_, _)
|
|
|
|
| Pdfops.Op_scnName (_, _)
|
|
|
|
| Pdfops.Op_CS _
|
|
|
|
| Pdfops.Op_cs _
|
|
|
|
| Pdfops.Op_sh _
|
|
|
|
| Pdfops.Op_gs _)
|
|
|
|
as op::more ->
|
|
|
|
if !textlevel > 0
|
|
|
|
then
|
|
|
|
begin
|
|
|
|
removed =| op;
|
|
|
|
remove_colourops prev more
|
|
|
|
end
|
|
|
|
else remove_colourops (op::prev) more
|
|
|
|
| op::more ->
|
|
|
|
if !textlevel > 0 && not_text op then removed =| op;
|
|
|
|
remove_colourops (op::prev) more
|
|
|
|
in
|
|
|
|
let operators' = remove_colourops [] operators in
|
|
|
|
[Pdfops.stream_of_ops operators']
|
|
|
|
|
|
|
|
(* Blacken a form xobject, writing it to the same object. *)
|
|
|
|
|
2021-11-15 20:17:15 +01:00
|
|
|
let blacktext c range pdf =
|
2013-08-20 16:32:57 +02:00
|
|
|
let blacktext_page _ page =
|
|
|
|
let content' =
|
2021-11-15 20:17:15 +01:00
|
|
|
blacktext_ops c pdf page.Pdfpage.resources page.Pdfpage.content
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2021-11-15 20:17:15 +01:00
|
|
|
process_xobjects pdf page (blacktext_ops c);
|
2013-08-20 16:32:57 +02:00
|
|
|
{page with Pdfpage.content = content'}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub blacktext_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* \section{Blacken lines} *)
|
2021-11-15 20:17:15 +01:00
|
|
|
let blacklines_ops c pdf resources content =
|
2013-08-20 16:32:57 +02:00
|
|
|
let rec blacken_strokeops prev = function
|
|
|
|
| [] -> rev prev
|
|
|
|
| Pdfops.Op_CS _::t ->
|
2016-11-09 19:15:23 +01:00
|
|
|
blacken_strokeops (Pdfops.Op_CS "/DeviceRGB"::prev) t
|
2013-08-20 16:32:57 +02:00
|
|
|
| (Pdfops.Op_SC _ | Pdfops.Op_SCN _ | Pdfops.Op_SCNName _ | Pdfops.Op_G _
|
|
|
|
| Pdfops.Op_RG _ | Pdfops.Op_K _)::t ->
|
2021-12-21 15:00:58 +01:00
|
|
|
blacken_strokeops (Cpdfaddtext.colour_op_stroke c::prev) t
|
2013-08-20 16:32:57 +02:00
|
|
|
| h::t -> blacken_strokeops (h::prev) t
|
|
|
|
and operators =
|
|
|
|
Pdfops.parse_operators pdf resources content
|
|
|
|
in
|
|
|
|
let operators' = blacken_strokeops [] operators in
|
|
|
|
[Pdfops.stream_of_ops operators']
|
|
|
|
|
2021-11-15 20:17:15 +01:00
|
|
|
let blacklines c range pdf =
|
2013-08-20 16:32:57 +02:00
|
|
|
let blacklines_page _ page =
|
|
|
|
let content' =
|
2021-11-15 20:17:15 +01:00
|
|
|
blacklines_ops c pdf page.Pdfpage.resources page.Pdfpage.content
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2021-11-15 20:17:15 +01:00
|
|
|
process_xobjects pdf page (blacklines_ops c);
|
2013-08-20 16:32:57 +02:00
|
|
|
{page with Pdfpage.content = content'}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub blacklines_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* \section{Blacken Fills} *)
|
2021-11-15 20:17:15 +01:00
|
|
|
let blackfills_ops c pdf resources content =
|
2013-08-20 16:32:57 +02:00
|
|
|
let rec blacken_fillops prev = function
|
|
|
|
| [] -> rev prev
|
|
|
|
| Pdfops.Op_cs _::t ->
|
2016-11-09 19:15:23 +01:00
|
|
|
blacken_fillops (Pdfops.Op_cs "/DeviceRGB"::prev) t
|
2013-08-20 16:32:57 +02:00
|
|
|
| (Pdfops.Op_sc _ | Pdfops.Op_scn _ | Pdfops.Op_scnName _ | Pdfops.Op_g _
|
|
|
|
| Pdfops.Op_rg _ | Pdfops.Op_k _)::t ->
|
2021-12-21 15:00:58 +01:00
|
|
|
blacken_fillops (Cpdfaddtext.colour_op c::prev) t
|
2013-08-20 16:32:57 +02:00
|
|
|
| h::t -> blacken_fillops (h::prev) t
|
|
|
|
and operators =
|
|
|
|
Pdfops.parse_operators pdf resources content
|
|
|
|
in
|
|
|
|
let operators' = blacken_fillops [] operators in
|
|
|
|
[Pdfops.stream_of_ops operators']
|
|
|
|
|
2021-11-15 20:17:15 +01:00
|
|
|
let blackfills c range pdf =
|
2013-08-20 16:32:57 +02:00
|
|
|
let blackfills_page _ page =
|
|
|
|
let content' =
|
2021-11-15 20:17:15 +01:00
|
|
|
blackfills_ops c pdf page.Pdfpage.resources page.Pdfpage.content
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
2021-11-15 20:17:15 +01:00
|
|
|
process_xobjects pdf page (blackfills_ops c);
|
2013-08-20 16:32:57 +02:00
|
|
|
{page with Pdfpage.content = content'}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub blackfills_page) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
|
|
|
(* \section{Set a minimum line width to avoid dropout} *)
|
|
|
|
let thinlines range width pdf =
|
|
|
|
let thinpage _ page =
|
|
|
|
let operators =
|
|
|
|
Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content
|
|
|
|
in
|
|
|
|
let ctmstack = ref [ref Pdftransform.i_matrix] in
|
|
|
|
let scaleof_ctm () =
|
|
|
|
try
|
|
|
|
match Pdftransform.decompose (!(hd !ctmstack)) with
|
|
|
|
(scale, _, _, _, _, _) ->
|
|
|
|
scale
|
|
|
|
with
|
2015-11-25 12:57:48 +01:00
|
|
|
Failure _ (*"hd"*) -> 1.
|
2013-08-20 16:32:57 +02:00
|
|
|
in
|
|
|
|
let rec replace_operators prev = function
|
|
|
|
| [] -> rev prev
|
|
|
|
| (Pdfops.Op_w w)::more ->
|
|
|
|
(* Alter width. *)
|
|
|
|
let width' = width /. scaleof_ctm () in
|
|
|
|
let w' =
|
|
|
|
if w >= width' then Pdfops.Op_w w else Pdfops.Op_w width'
|
|
|
|
in
|
|
|
|
replace_operators (w'::prev) more
|
|
|
|
| (Pdfops.Op_cm m)::more ->
|
|
|
|
(* Update CTM *)
|
|
|
|
begin try
|
|
|
|
let top = hd !ctmstack in
|
|
|
|
top := Pdftransform.matrix_compose !top m
|
|
|
|
with
|
2015-11-25 12:57:48 +01:00
|
|
|
Failure _ (*"hd"*) -> error "Malformed file."
|
2013-08-20 16:32:57 +02:00
|
|
|
end;
|
|
|
|
replace_operators ((Pdfops.Op_cm m)::prev) more
|
|
|
|
| Pdfops.Op_q::more ->
|
|
|
|
(* Push stack *)
|
|
|
|
begin try
|
|
|
|
ctmstack =| ref (!(hd !ctmstack))
|
|
|
|
with
|
2015-11-25 12:57:48 +01:00
|
|
|
Failure _ (*"hd"*) -> error "Malformed file"
|
2013-08-20 16:32:57 +02:00
|
|
|
end;
|
|
|
|
replace_operators (Pdfops.Op_q::prev) more
|
|
|
|
| Pdfops.Op_Q::more ->
|
|
|
|
(* Pop stack *)
|
|
|
|
begin try
|
|
|
|
ctmstack := tl !ctmstack
|
|
|
|
with
|
2015-11-25 12:57:48 +01:00
|
|
|
Failure _ (*"tl"*) -> error "Malformed file"
|
2013-08-20 16:32:57 +02:00
|
|
|
end;
|
|
|
|
replace_operators (Pdfops.Op_Q::prev) more
|
|
|
|
| (Pdfops.Op_gs gsname)::more ->
|
|
|
|
(* Perhaps insert [Op_w]. *)
|
|
|
|
let opw =
|
|
|
|
match Pdf.lookup_direct pdf "/ExtGState" page.Pdfpage.resources with
|
|
|
|
| None -> []
|
|
|
|
| Some ext_state_dict ->
|
|
|
|
match Pdf.lookup_direct pdf gsname ext_state_dict with
|
|
|
|
| None -> []
|
|
|
|
| Some gdict ->
|
|
|
|
match Pdf.lookup_direct pdf "/LW" gdict with
|
|
|
|
| Some s -> (try [Pdfops.Op_w (Pdf.getnum s)] with _ -> [])
|
|
|
|
| None -> []
|
|
|
|
in
|
|
|
|
replace_operators (opw @ ((Pdfops.Op_gs gsname)::prev)) more
|
|
|
|
| x::more -> replace_operators (x::prev) more
|
|
|
|
in
|
|
|
|
let operators = replace_operators [] operators in
|
|
|
|
(* 2. Add an initial 'w' if width more than default width *)
|
|
|
|
let operators =
|
|
|
|
if width > 1. then (Pdfops.Op_w width)::operators else operators
|
|
|
|
in
|
|
|
|
let content' = [Pdfops.stream_of_ops operators] in
|
|
|
|
{page with Pdfpage.content = content'}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub thinpage) pdf range
|
2013-08-20 16:32:57 +02:00
|
|
|
|
2019-10-02 14:41:56 +02:00
|
|
|
(* Parse the new content to make sure syntactically ok, append
|
|
|
|
* as required. Rewrite the content *)
|
|
|
|
let append_page_content_page fast s before pdf n page =
|
|
|
|
let ops =
|
|
|
|
Pdfops.parse_stream pdf page.Pdfpage.resources [bytes_of_string s]
|
|
|
|
in
|
|
|
|
(if before then Pdfpage.prepend_operators else Pdfpage.postpend_operators)
|
|
|
|
pdf ops ~fast page
|
|
|
|
|
|
|
|
let append_page_content s before fast range pdf =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range
|
2019-10-02 14:41:56 +02:00
|
|
|
|
2020-11-12 16:16:58 +01:00
|
|
|
(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes.
|
|
|
|
*
|
|
|
|
* We use different dash lengths and colours to help distinguish coincident
|
|
|
|
* boxes The sequence of operators is postpended to the page content,
|
|
|
|
* appropriately protected to prevent pollution of matrices.
|
|
|
|
*
|
|
|
|
* /MediaBox: Solid red line
|
|
|
|
* /CropBox: Dashed 7 on 7 off green line
|
|
|
|
* /ArtBox: Dashed 5 on 5 off blue line
|
|
|
|
* /TrimBox: Dashed 3 on 3 off orange line
|
|
|
|
* /BleedBox: Dashed 2 on 2 off pink line *)
|
|
|
|
let get_rectangle pdf page box =
|
|
|
|
if box = "/MediaBox" then
|
|
|
|
match page.Pdfpage.mediabox with
|
|
|
|
Pdf.Array [a; b; c; d] as r -> Some (Pdf.parse_rectangle r)
|
|
|
|
| _ -> None
|
|
|
|
else
|
|
|
|
match Pdf.lookup_direct pdf box page.Pdfpage.rest with
|
|
|
|
Some (Pdf.Array [a; b; c; d] as r) -> Some (Pdf.parse_rectangle r)
|
|
|
|
| _ -> None
|
|
|
|
|
|
|
|
let show_boxes_page fast pdf _ page =
|
|
|
|
let make_ops (r, g, b) on off boxname =
|
|
|
|
match get_rectangle pdf page boxname with
|
|
|
|
Some (r1, r2, r3, r4) ->
|
|
|
|
[Pdfops.Op_q;
|
|
|
|
Pdfops.Op_RG (r /. 255., g /. 255., b /. 255.);
|
|
|
|
Pdfops.Op_w 1.;
|
|
|
|
Pdfops.Op_d ((if on = 0. && off = 0. then [] else [on; off]), 0.);
|
|
|
|
Pdfops.Op_re (r1, r2, r3 -. r1, r4 -. r2);
|
|
|
|
Pdfops.Op_S;
|
|
|
|
Pdfops.Op_Q]
|
|
|
|
| None -> []
|
|
|
|
in
|
|
|
|
let ops =
|
|
|
|
make_ops (255., 0., 0.) 0. 0. "/MediaBox"
|
|
|
|
@ make_ops (0., 255., 0.) 7. 7. "/CropBox"
|
|
|
|
@ make_ops (0., 0., 255.) 5. 5. "/ArtBox"
|
|
|
|
@ make_ops (255.,150.,0.) 3. 3. "/TrimBox"
|
|
|
|
@ make_ops (255.,9.,147.) 2. 2. "/BleedBox"
|
|
|
|
in
|
|
|
|
Pdfpage.postpend_operators pdf ops ~fast page
|
|
|
|
|
|
|
|
let show_boxes ?(fast=false) pdf range =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub (show_boxes_page fast pdf)) pdf range
|
|
|
|
|
|
|
|
|
2020-11-12 16:16:58 +01:00
|
|
|
|
|
|
|
let allowance = 9.
|
|
|
|
|
|
|
|
let line (x0, y0, x1, y1) =
|
|
|
|
[Pdfops.Op_m (x0, y0);
|
|
|
|
Pdfops.Op_l (x1, y1);
|
|
|
|
Pdfops.Op_s]
|
|
|
|
|
|
|
|
let trim_marks_page fast pdf n page =
|
|
|
|
match get_rectangle pdf page "/TrimBox", get_rectangle pdf page "/MediaBox" with
|
|
|
|
| Some (tminx, tminy, tmaxx, tmaxy), Some (minx, miny, maxx, maxy) ->
|
|
|
|
let ops =
|
|
|
|
[Pdfops.Op_q;
|
|
|
|
Pdfops.Op_K (1., 1., 1., 1.);
|
|
|
|
Pdfops.Op_w 1.]
|
|
|
|
@ line (minx, tmaxy, tminy -. allowance, tmaxy) (* top left *)
|
|
|
|
@ line (tminx, tmaxy +. allowance, tminx, maxy)
|
|
|
|
@ line (tmaxx +. allowance, tmaxy, maxx, tmaxy) (* top right *)
|
|
|
|
@ line (tmaxx, tmaxy +. allowance, tmaxx, maxy)
|
|
|
|
@ line (tmaxx +. allowance, tminy, maxx, tminy) (* bottom right *)
|
|
|
|
@ line (tmaxx, tminy -. allowance, tmaxx, miny)
|
|
|
|
@ line (tminx -. allowance, tminy, minx, tminy) (* bottom left *)
|
|
|
|
@ line (tminx, tminy -. allowance, tminx, miny)
|
|
|
|
@ [Pdfops.Op_Q]
|
|
|
|
in
|
|
|
|
Pdfpage.postpend_operators pdf ops ~fast page
|
2021-07-23 17:26:07 +02:00
|
|
|
| _, _ ->
|
|
|
|
(*Printf.eprintf "warning: no /TrimBox found on page %i\n%!" n;*)
|
|
|
|
page
|
2020-11-12 16:16:58 +01:00
|
|
|
|
|
|
|
let trim_marks ?(fast=false) pdf range =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub (trim_marks_page fast pdf)) pdf range
|
2020-11-14 17:40:01 +01:00
|
|
|
|
|
|
|
let rec remove_all_text_ops pdf resources content =
|
|
|
|
let is_textop = function
|
|
|
|
Pdfops.Op_Tj _ | Pdfops.Op_' _ | Pdfops.Op_'' _ | Pdfops.Op_TJ _ -> true
|
|
|
|
| _ -> false
|
|
|
|
in
|
|
|
|
let content' =
|
|
|
|
let ops = Pdfops.parse_operators pdf resources content in
|
|
|
|
Pdfops.stream_of_ops
|
|
|
|
(option_map (function x -> if is_textop x then None else Some x) ops)
|
|
|
|
in
|
|
|
|
[content']
|
|
|
|
|
|
|
|
let remove_all_text_page pdf p =
|
|
|
|
let resources = p.Pdfpage.resources in
|
|
|
|
let content = p.Pdfpage.content in
|
|
|
|
process_xobjects pdf p remove_all_text_ops;
|
|
|
|
{p with Pdfpage.content = remove_all_text_ops pdf resources content}, pdf
|
|
|
|
|
|
|
|
let remove_all_text range pdf =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
let pagenums = indx pages in
|
|
|
|
let pdf = ref pdf in
|
|
|
|
let pages' = ref [] in
|
|
|
|
iter2
|
|
|
|
(fun p pagenum ->
|
|
|
|
let p', pdf' =
|
|
|
|
if mem pagenum range
|
|
|
|
then remove_all_text_page !pdf p
|
|
|
|
else p, !pdf
|
|
|
|
in
|
|
|
|
pdf := pdf';
|
|
|
|
pages' =| p')
|
|
|
|
pages
|
|
|
|
pagenums;
|
|
|
|
Pdfpage.change_pages true !pdf (rev !pages')
|
|
|
|
|
2021-10-28 19:44:01 +02:00
|
|
|
(* 1. Extend remove_dict_entry with search term
|
|
|
|
2. Implement replace_dict_entry by analogy to remove_dict_entry *)
|
2021-10-29 16:09:21 +02:00
|
|
|
let rec dict_entry_single_object f pdf = function
|
|
|
|
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d)
|
2021-10-28 19:44:01 +02:00
|
|
|
| (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) ->
|
2021-10-29 16:09:21 +02:00
|
|
|
f (Pdf.Stream {contents = (Pdf.recurse_dict (dict_entry_single_object f pdf) dict, data)})
|
|
|
|
| Pdf.Array a -> Pdf.recurse_array (dict_entry_single_object f pdf) a
|
2021-10-28 19:44:01 +02:00
|
|
|
| x -> x
|
|
|
|
|
2021-10-29 16:09:21 +02:00
|
|
|
(* FIXME are we sure that functional values can never appear in the equality here? *)
|
|
|
|
let remove_dict_entry pdf key search =
|
|
|
|
let f d =
|
|
|
|
match search with
|
|
|
|
| None -> Pdf.remove_dict_entry d key
|
|
|
|
| Some s ->
|
|
|
|
match Pdf.lookup_direct pdf key d with
|
|
|
|
| Some v when v = s -> Pdf.remove_dict_entry d key
|
|
|
|
| _ -> d
|
|
|
|
in
|
|
|
|
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
|
|
|
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
2020-11-14 17:40:01 +01:00
|
|
|
|
2021-10-29 16:09:21 +02:00
|
|
|
let replace_dict_entry pdf key value search =
|
|
|
|
let f d =
|
|
|
|
match search with
|
|
|
|
| None -> Pdf.replace_dict_entry d key value
|
|
|
|
| Some s ->
|
|
|
|
match Pdf.lookup_direct pdf key d with
|
|
|
|
| Some v when v = s -> Pdf.replace_dict_entry d key value
|
|
|
|
| _ -> d
|
|
|
|
in
|
|
|
|
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
|
|
|
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
|
|
|
|
|
|
|
(* FIXME no need to self map here, since nothing changes *)
|
|
|
|
let print_dict_entry pdf key =
|
|
|
|
let f d =
|
|
|
|
match Pdf.lookup_direct pdf key d with
|
|
|
|
| Some v ->
|
|
|
|
(* We use a double newline as a separator. *)
|
|
|
|
Printf.printf "%s\n\n" (Cpdfyojson.Safe.to_string (Cpdfjson.json_of_object pdf (fun _ -> ()) false false v));
|
|
|
|
d
|
|
|
|
| None -> d
|
|
|
|
in
|
|
|
|
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
|
|
|
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
2021-10-28 18:06:46 +02:00
|
|
|
|
2020-11-14 17:40:01 +01:00
|
|
|
let remove_clipping_ops pdf resources content =
|
|
|
|
let ops = Pdfops.parse_operators pdf resources content in
|
|
|
|
let rec process a = function
|
|
|
|
Pdfops.Op_W::Pdfops.Op_n::t -> process (Pdfops.Op_n::a) t
|
|
|
|
| h::t -> process (h::a) t
|
|
|
|
| [] -> rev a
|
|
|
|
in
|
|
|
|
[Pdfops.stream_of_ops (process [] ops)]
|
|
|
|
|
|
|
|
let remove_clipping pdf range =
|
|
|
|
let remove_clipping_page _ page =
|
|
|
|
let content' =
|
|
|
|
remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content
|
|
|
|
in
|
|
|
|
process_xobjects pdf page remove_clipping_ops;
|
|
|
|
{page with Pdfpage.content = content'}
|
|
|
|
in
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub remove_clipping_page) pdf range
|
2021-12-21 16:25:59 +01:00
|
|
|
|
2021-11-12 23:50:31 +01:00
|
|
|
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
|
|
|
|
the contents of the mediabox will be used if the from fox is not available. If
|
|
|
|
mediabox_is_missing is false, the page is unaltered. *)
|
|
|
|
let copy_box f t mediabox_if_missing pdf range =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages
|
2021-11-12 23:50:31 +01:00
|
|
|
(ppstub (fun _ page ->
|
|
|
|
if f = "/MediaBox" then
|
|
|
|
{page with Pdfpage.rest =
|
|
|
|
(Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))}
|
|
|
|
else
|
|
|
|
match Pdf.lookup_direct pdf f page.Pdfpage.rest with
|
|
|
|
| Some pdfobject ->
|
|
|
|
if t = "/MediaBox"
|
|
|
|
then {page with
|
|
|
|
Pdfpage.mediabox = Pdf.direct pdf pdfobject}
|
|
|
|
else {page with Pdfpage.rest =
|
|
|
|
(Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))}
|
|
|
|
| None ->
|
|
|
|
if mediabox_if_missing
|
|
|
|
then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox}
|
|
|
|
else page))
|
|
|
|
pdf
|
|
|
|
range
|
|
|
|
|
|
|
|
let remove_unused_resources_page pdf n page =
|
|
|
|
let xobjects, all_names =
|
|
|
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
|
|
|
| Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d
|
|
|
|
| _ -> Pdf.Dictionary [], []
|
|
|
|
in
|
|
|
|
let names_to_keep =
|
|
|
|
option_map
|
|
|
|
(function Pdfops.Op_Do n -> Some n | _ -> None)
|
|
|
|
(Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)
|
|
|
|
in
|
|
|
|
let names_to_remove = lose (mem' names_to_keep) all_names in
|
|
|
|
let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in
|
|
|
|
{page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict}
|
|
|
|
|
|
|
|
let remove_unused_resources pdf =
|
2021-12-21 14:44:46 +01:00
|
|
|
Cpdfpage.process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))
|