diff --git a/Makefile b/Makefile index f25b714..c791c5c 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ MODS = cpdfyojson cpdfxmlm cpdfutil \ cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \ cpdfbookmarks cpdfpage cpdfaddtext cpdf cpdfimage cpdffont cpdftype \ cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfdraft cpdfspot \ - cpdfpagelabels cpdfcreate cpdfannot cpdfxobject cpdfcommand + cpdfpagelabels cpdfcreate cpdfannot cpdfxobject cpdfimpose cpdfcommand SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml diff --git a/cpdf.ml b/cpdf.ml index eb7168b..8c8ea13 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -3,10 +3,6 @@ open Pdfutil open Pdfio open Cpdferror -(* For uses of process_pages which don't need to deal with matrices, this - function transforms into one which returns the identity matrix *) -let ppstub f n p = (f n p, n, Pdftransform.i_matrix) - (* Add stack operators to a content stream to ensure it is composeable. On -fast, we don't check for Q deficit, assuming PDF is ISO. *) let protect fast pdf resources content = @@ -24,55 +20,6 @@ let protect fast pdf resources content = let qs = addstream (many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) in [Pdf.Indirect q] @ content @ [Pdf.Indirect qs] -(* If a cropbox exists, make it the mediabox. If not, change nothing. *) -let copy_cropbox_to_mediabox pdf range = - Cpdfpage.process_pages - (ppstub (fun _ page -> - match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with - | Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject} - | None -> page)) - pdf - range - -(* Union two resource dictionaries from the same PDF. *) -let combine_pdf_resources pdf a b = - let a_entries = - match a with - | Pdf.Dictionary entries -> entries - | _ -> [] - in let b_entries = - match b with - | Pdf.Dictionary entries -> entries - | _ -> [] - in - let resource_keys = - ["/Font"; "/ExtGState"; "/ColorSpace"; "/Pattern"; - "/Shading"; "/XObject"; "/Properties"] - in - let combine_entries key = - let a_entries = - match Pdf.lookup_direct pdf key a with - | Some (Pdf.Dictionary d) -> d - | _ -> [] - in let b_entries = - match Pdf.lookup_direct pdf key b with - | Some (Pdf.Dictionary d) -> d - | _ -> [] - in - if a_entries = [] && b_entries = [] then - None - else - Some (key, Pdf.Dictionary (a_entries @ b_entries)) - in - let unknown_keys_a = lose (fun (k, _) -> mem k resource_keys) a_entries in - let unknown_keys_b = lose (fun (k, _) -> mem k resource_keys) b_entries in - let combined_known_entries = option_map combine_entries resource_keys in - fold_left - (fun dict (k, v) -> Pdf.add_dict_entry dict k v) - (Pdf.Dictionary []) - (unknown_keys_a @ unknown_keys_b @ combined_known_entries) - - (* Does the page have a defined box e.g "/CropBox" *) let hasbox pdf page boxname = let pages = Pdfpage.pages_of_pagetree pdf in @@ -83,192 +30,6 @@ let hasbox pdf page boxname = | _ -> false -(* \section{Shift page data} *) -let make_mediabox (xmin, ymin, xmax, ymax) = - Pdf.Array - [Pdf.Real xmin; Pdf.Real ymin; Pdf.Real xmax; Pdf.Real ymax] - -(* Change the media box and other known boxes by the function [f] which takes -xmin, xmax, ymin, ymax as input. *) -let change_boxes f pdf page = - let names = ["/TrimBox"; "/ArtBox"; "/CropBox"; "/BleedBox"] - in let getbox n = - Pdf.lookup_direct pdf n page.Pdfpage.rest - in - let boxes = combine names (map getbox names) in - let toreplace = lose (function (_, None) -> true | _ -> false) boxes in - let toreplace = - map - (function (name, Some value) -> (name, value) | _ -> assert false) - toreplace - in - let rest' = - fold_left - (fun e (k, v) -> - let v = - make_mediabox (f (Pdf.parse_rectangle v)) - in - Pdf.replace_dict_entry e k v) - page.Pdfpage.rest - toreplace - in - {page with - Pdfpage.mediabox = - make_mediabox (f (Pdf.parse_rectangle page.Pdfpage.mediabox)); - Pdfpage.rest = rest'} - -(* The content transformed by altering any use of [Op_cm]. But we must also -alter any /Matrix entries in pattern dictionaries *) -let change_pattern_matrices_resources pdf tr resources = - try - begin match Pdf.lookup_direct pdf "/Pattern" resources with - | Some (Pdf.Dictionary patterns) -> - let entries = - map - (fun (name, p) -> - (*Printf.printf "Changing matrices of pattern %s\n" name;*) - let old_pattern = Pdf.direct pdf p in - let new_pattern = - let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in - let new_tr = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) existing_tr in - Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr) - in - name, Pdf.Indirect (Pdf.addobj pdf new_pattern)) - patterns - in - Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries) - | _ -> resources - end - with - Pdftransform.NonInvertable -> - Printf.eprintf "Warning: noninvertible matrix\n%!"; - resources - -let change_pattern_matrices_page pdf tr page = - let page = - {page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources} - in - match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with - | Some (Pdf.Dictionary elts) -> - iter - (fun (k, v) -> - match v with - | Pdf.Indirect i -> - (* Check if it's a form XObject. If so, rewrite its resources and add back as same number. *) - begin match Pdf.lookup_direct pdf "/Subtype" v with - | Some (Pdf.Name "/Form") -> - (*Printf.printf "Processing form xobject %s for patterns\n" k; *) - let form_xobject = Pdf.lookup_obj pdf i in - begin match Pdf.lookup_direct pdf "/Resources" form_xobject with - | Some resources -> - let form_xobject' = - Pdf.add_dict_entry form_xobject "/Resources" (change_pattern_matrices_resources pdf tr resources) - in - Pdf.addobj_given_num pdf (i, form_xobject') - | _ -> () - end - | _ -> () - end; - | _ -> raise (Pdf.PDFError "change_pattern_matrices_page")) - elts; - page - | _ -> page - -let transform_rect transform rect = - let minx, miny, maxx, maxy = Pdf.parse_rectangle rect in - let (x0, y0) = Pdftransform.transform_matrix transform (minx, miny) in - let (x1, y1) = Pdftransform.transform_matrix transform (maxx, maxy) in - let (x2, y2) = Pdftransform.transform_matrix transform (minx, maxy) in - let (x3, y3) = Pdftransform.transform_matrix transform (maxx, miny) in - let minx = fmin (fmin x0 x1) (fmin x2 x3) in - let miny = fmin (fmin y0 y1) (fmin y2 y3) in - let maxx = fmax (fmax x0 x1) (fmax x2 x3) in - let maxy = fmax (fmax y0 y1) (fmax y2 y3) in - Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy] - -let transform_quadpoint_single transform = function - | [x1; y1; x2; y2; x3; y3; x4; y4] -> - let x1, y1, x2, y2, x3, y3, x4, y4 = - Pdf.getnum x1, Pdf.getnum y1, - Pdf.getnum x2, Pdf.getnum y2, - Pdf.getnum x3, Pdf.getnum y3, - Pdf.getnum x4, Pdf.getnum y4 - in - let (x1, y1) = Pdftransform.transform_matrix transform (x1, y1) in - let (x2, y2) = Pdftransform.transform_matrix transform (x2, y2) in - let (x3, y3) = Pdftransform.transform_matrix transform (x3, y3) in - let (x4, y4) = Pdftransform.transform_matrix transform (x4, y4) in - map (fun x -> Pdf.Real x) [x1; y1; x2; y2; x3; y3; x4; y4] - | qp -> - Printf.eprintf "Malformed /QuadPoints format: must be a multiple of 8 entries\n"; - qp - -let transform_quadpoints transform = function -| Pdf.Array qps -> - Pdf.Array (flatten (map (transform_quadpoint_single transform) (splitinto 8 qps))) -| qp -> - Printf.eprintf "Unknown or malformed /QuadPoints format %s\n" (Pdfwrite.string_of_pdf qp); - qp - -(* Apply transformations to any annotations in /Annots (i.e their /Rect and /QuadPoints entries) *) -let transform_annotations pdf transform rest = - match Pdf.lookup_direct pdf "/Annots" rest with - | Some (Pdf.Array annots) -> - (* Always indirect references, so alter in place *) - iter - (function - | Pdf.Indirect i -> - let annot = Pdf.lookup_obj pdf i in - let rect' = - match Pdf.lookup_direct pdf "/Rect" annot with - | Some rect -> transform_rect transform rect - | None -> raise (Pdf.PDFError "transform_annotations: no rect") - in - let quadpoints' = - match Pdf.lookup_direct pdf "/QuadPoints" annot with - | Some qp -> Some (transform_quadpoints transform qp) - | None -> None - in - let annot = Pdf.add_dict_entry annot "/Rect" rect' in - let annot = - match quadpoints' with - | Some qp -> Pdf.add_dict_entry annot "/QuadPoints" qp - | None -> annot - in - Pdf.addobj_given_num pdf (i, annot) - | _ -> Printf.eprintf "transform_annotations: not indirect\n%!") - annots - | _ -> () - -let shift_page ?(fast=false) dxdylist pdf pnum page = - let dx, dy = List.nth dxdylist (pnum - 1) in - let transform_op = - Pdfops.Op_cm (Pdftransform.matrix_of_op (Pdftransform.Translate (dx, dy))) - in - let page = - change_pattern_matrices_page pdf (Pdftransform.mktranslate ~-.dx ~-.dy) page - in - transform_annotations pdf (Pdftransform.mktranslate dx dy) page.Pdfpage.rest; - (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, Pdftransform.mktranslate dx dy) - -let shift_pdf ?(fast=false) dxdylist pdf range = - Cpdfpage.process_pages (shift_page ~fast dxdylist pdf) pdf range - -(* Change a page's media box so its minimum x and y are 0, making other -operations simpler to think about. Any shift that is done is reflected in -other boxes (clip etc.) *) -let rectify_boxes ?(fast=false) pdf page = - let minx, miny, _, _ = - Pdf.parse_rectangle page.Pdfpage.mediabox - in - let f (iminx, iminy, imaxx, imaxy) = - iminx -. minx, iminy -. miny, imaxx -. minx, imaxy -. miny - in - let page = change_boxes f pdf page in - if minx <> 0. || miny <> 0. - then - begin let p, _, _ = shift_page ~fast [(-.minx),(-.miny)] pdf 1 page in p end - else page (* \section{Flip pages} *) let flip_page ?(fast=false) transform_op pdf pnum page = @@ -276,8 +37,8 @@ let flip_page ?(fast=false) transform_op pdf pnum page = Pdf.parse_rectangle page.Pdfpage.mediabox in let tr = transform_op minx miny maxx maxy in - let page = change_pattern_matrices_page pdf tr page in - transform_annotations pdf tr page.Pdfpage.rest; + let page = Cpdfutil.change_pattern_matrices_page pdf tr page in + Cpdfutil.transform_annotations pdf tr page.Pdfpage.rest; (Pdfpage.prepend_operators pdf [Pdfops.Op_cm tr] ~fast page, pnum, tr) let vflip_pdf ?(fast=false) pdf range = @@ -365,9 +126,9 @@ let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isov (if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @ [Pdftransform.Scale ((sxmin, symin), scale, scale)])) in - transform_annotations pdf matrix o.Pdfpage.rest; + Cpdfutil.transform_annotations pdf matrix o.Pdfpage.rest; let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in - change_pattern_matrices_page pdf matrix r + Cpdfutil.change_pattern_matrices_page pdf matrix r else let sw = sxmax -. sxmin and sh = symax -. symin and w = txmax -. txmin and h = tymax -. tymin in @@ -377,9 +138,9 @@ let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isov ((if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @ [Pdftransform.Translate (dx, dy)])) in - transform_annotations pdf matrix o.Pdfpage.rest; + Cpdfutil.transform_annotations pdf matrix o.Pdfpage.rest; let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in - change_pattern_matrices_page pdf matrix r + Cpdfutil.change_pattern_matrices_page pdf matrix r in {u with Pdfpage.content = @@ -389,7 +150,7 @@ let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isov Pdfpage.rest = combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest; Pdfpage.resources = - combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} + Cpdfutil.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf = let prefix = Pdfpage.shortest_unused_prefix pdf in @@ -503,17 +264,6 @@ let combine_pages fast under over scaletofit swap equalize = debug_pdf r "final.pdf"; r -(* \section{Set media box} *) -let set_mediabox xywhlist pdf range = - let crop_page pnum page = - let x, y, w, h = List.nth xywhlist (pnum - 1) in - {page with - Pdfpage.mediabox = - (Pdf.Array - [Pdf.Real x; Pdf.Real y; - Pdf.Real (x +. w); Pdf.Real (y +. h)])} - in - Cpdfpage.process_pages (ppstub crop_page) pdf range (* Just used by cpdflib for historical reasons *) let setBox box minx maxx miny maxy pdf range = @@ -524,7 +274,7 @@ let setBox box minx maxx miny maxy pdf range = page.Pdfpage.rest box (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} in - Cpdfpage.process_pages (ppstub set_box_page) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub set_box_page) pdf range (* \section{Cropping} *) let crop_pdf ?(box="/CropBox") xywhlist pdf range = @@ -539,526 +289,8 @@ let crop_pdf ?(box="/CropBox") xywhlist pdf range = [Pdf.Real x; Pdf.Real y; Pdf.Real (x +. w); Pdf.Real (y +. h)])))} in - Cpdfpage.process_pages (ppstub crop_page) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub crop_page) pdf range -(* Clip a page to one of its boxes, or the media box if that box is not - * present. This is a hard clip, done by using a clipping rectangle, so that - * the page may then be used as a stamp without extraneous material reapearing. - * *) -let hard_box pdf range boxname mediabox_if_missing fast = - Cpdfpage.process_pages - (ppstub (fun pagenum page -> - let minx, miny, maxx, maxy = - if boxname = "/MediaBox" then - Pdf.parse_rectangle page.Pdfpage.mediabox - else - match Pdf.lookup_direct pdf boxname page.Pdfpage.rest with - | Some a -> Pdf.parse_rectangle a - | _ -> - if mediabox_if_missing - then Pdf.parse_rectangle page.Pdfpage.mediabox - else error (Printf.sprintf "hard_box: box %s not found" boxname) - in - let ops = [Pdfops.Op_re (minx, miny, maxx -. minx, maxy -. miny); Pdfops.Op_W; Pdfops.Op_n] in - Pdfpage.prepend_operators pdf ops ~fast page)) - pdf - range - -let remove_cropping_pdf pdf range = - let remove_cropping_page _ page = - {page with - Pdfpage.rest = - (Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")} - in - Cpdfpage.process_pages (ppstub remove_cropping_page) pdf range - -let remove_trim_pdf pdf range = - let remove_trim_page _ page = - {page with - Pdfpage.rest = - (Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")} - in - Cpdfpage.process_pages (ppstub remove_trim_page) pdf range - -let remove_art_pdf pdf range = - let remove_art_page _ page = - {page with - Pdfpage.rest = - (Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")} - in - Cpdfpage.process_pages (ppstub remove_art_page) pdf range - -let remove_bleed_pdf pdf range = - let remove_bleed_page _ page = - {page with - Pdfpage.rest = - (Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")} - in - Cpdfpage.process_pages (ppstub remove_bleed_page) pdf range - -(* \section{Rotating pages} *) -let rotate_pdf r pdf range = - let rotate_page _ page = - {page with Pdfpage.rotate = - Pdfpage.rotation_of_int r} - in - Cpdfpage.process_pages (ppstub rotate_page) pdf range - -let rotate_pdf_by r pdf range = - let rotate_page_by _ page = - {page with Pdfpage.rotate = - Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)} - in - Cpdfpage.process_pages (ppstub rotate_page_by) pdf range - -let rotate_page_contents ~fast rotpoint r pdf pnum page = - let rotation_point = - match rotpoint with - | None -> - let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in - (minx +. maxx) /. 2., (miny +. maxy) /. 2. - | Some point -> point - in - let tr = - Pdftransform.matrix_of_op - (Pdftransform.Rotate (rotation_point, -.(rad_of_deg r))) - in let tr2 = - Pdftransform.matrix_of_op - (Pdftransform.Rotate (rotation_point, rad_of_deg r)) - in - let transform_op = Pdfops.Op_cm tr in - let page = change_pattern_matrices_page pdf tr2 page in - transform_annotations pdf tr page.Pdfpage.rest; - (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr) - -let rotate_contents ?(fast=false) r pdf range = - Cpdfpage.process_pages (rotate_page_contents ~fast None r pdf) pdf range - -(* Return the pages from the pdf in the range, unordered. *) -let select_pages range pdf = - let pages = Pdfpage.pages_of_pagetree pdf in - option_map (function n -> try Some (select n pages) with _ -> None) range - - -(* Upright functionality *) - -(* If all pages are already upright, and the mediabox is (0,0)-based, do nothing -to save time. *) -let allupright range pdf = - let page_is_upright page = - page.Pdfpage.rotate = Pdfpage.Rotate0 && - (let (minx, miny, _, _) = Pdf.parse_rectangle page.Pdfpage.mediabox in - minx < 0.001 && miny < 0.001 && minx > ~-.0.001 && miny > ~-.0.001) - in - not (mem false (map page_is_upright (select_pages range pdf))) - -let upright_transform page = - let rotate = - Pdfpage.int_of_rotation page.Pdfpage.rotate - and cx, cy = - let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in - (minx +. maxx) /. 2., (miny +. maxy) /. 2. - in - Pdftransform.mkrotate (cx, cy) (rad_of_deg (~-.(float rotate))) - -let transform_boxes tr pdf page = - let f (minx, miny, maxx, maxy) = - let minx, miny = Pdftransform.transform_matrix tr (minx, miny) - and maxx, maxy = Pdftransform.transform_matrix tr (maxx, maxy) in - (minx, miny, maxx, maxy) - in - change_boxes f pdf page - -let transform_contents ?(fast=false) tr pdf page = - let transform_op = Pdfops.Op_cm tr in - let page = change_pattern_matrices_page pdf (Pdftransform.matrix_invert tr) page in - transform_annotations pdf tr page.Pdfpage.rest; - Pdfpage.prepend_operators pdf [transform_op] ~fast page - -let upright ?(fast=false) range pdf = - if allupright range pdf then pdf else - let upright_page _ pnum page = - let tr = upright_transform page in - let page = transform_boxes tr pdf page in - let page = transform_contents ~fast tr pdf page in - (rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr) - in - Cpdfpage.process_pages (upright_page pdf) pdf range - -(* \section{Scale page data} *) -let scale_pdf ?(fast=false) sxsylist pdf range = - let scale_page pnum page = - let sx, sy = List.nth sxsylist (pnum - 1) in - let f (xmin, ymin, xmax, ymax) = - xmin *. sx, ymin *. sy, xmax *. sx, ymax *. sy - in - let page = change_boxes f pdf page - and matrix = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), sx, sy)) in - let transform_op = - Pdfops.Op_cm matrix - and page = - change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page - in - transform_annotations pdf matrix page.Pdfpage.rest; - (Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix) - in - Cpdfpage.process_pages scale_page pdf range - -(* Scale to fit page of size x * y *) -let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range = - let scale_page_to_fit pnum page = - let x, y = List.nth xylist (pnum - 1) in - let matrix = - let (minx, miny, maxx, maxy) = - (* Use cropbox if available *) - Pdf.parse_rectangle - (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with - | Some r -> r - | None -> page.Pdfpage.mediabox) - in - if maxx <= 0. || maxy <= 0. then failwith "Zero-sized pages are invalid" else - let fx = x /. maxx in let fy = y /. maxy in - let scale = fmin fx fy *. input_scale in - let trans_x = - match position with - Cpdfposition.Left _ -> 0. - | Cpdfposition.Right _ -> (x -. (maxx *. scale)) - | _ -> (x -. (maxx *. scale)) /. 2. - and trans_y = - match position with - | Cpdfposition.Top _ -> (y -. (maxy *. scale)) - | Cpdfposition.Bottom _ -> 0. - | _ -> (y -. (maxy *. scale)) /. 2. - in - (Pdftransform.matrix_of_transform - [Pdftransform.Translate (trans_x, trans_y); - Pdftransform.Scale ((0., 0.), scale, scale)]) - in - let page = - change_boxes - (function (minx, miny, maxx, maxy) -> 0., 0., x, y) - pdf page - in - transform_annotations pdf matrix page.Pdfpage.rest; - (Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast - (change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page), pnum, matrix) - in - Cpdfpage.process_pages scale_page_to_fit pdf range - -(* Scale contents *) -let scale_page_contents ?(fast=false) scale position pdf pnum page = - let (minx, miny, maxx, maxy) as box = - (* Use cropbox if available *) - Pdf.parse_rectangle - (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with - | Some r -> r - | None -> page.Pdfpage.mediabox) - in - let sx, sy, _ = Cpdfposition.calculate_position true 0. box Horizontal position in - let tx, ty = - let open Cpdfposition in - match position with - | Top t -> 0., -.t - | TopLeft t -> t, -.t - | TopRight t -> -.t, -.t - | Left t -> t, 0. - | BottomLeft t -> t, t - | Bottom t -> 0., t - | BottomRight t -> -.t, t - | Right t -> -.t, 0. - | _ -> 0., 0. (* centre it... FIXME: We will add a center position, eventually, for text and this... *) - in - let transform = - Pdftransform.matrix_of_transform - [Pdftransform.Translate (tx, ty); - Pdftransform.Scale ((sx, sy), scale, scale)] - in - let transform_op = Pdfops.Op_cm transform in - let page = change_pattern_matrices_page pdf transform page in - transform_annotations pdf transform page.Pdfpage.rest; - (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform) - -let scale_contents ?(fast=false) position scale pdf range = - Cpdfpage.process_pages (scale_page_contents ~fast scale position pdf) pdf range - -(* Imposition *) - -(* Union two rest dictionaries from the same PDF. *) -let combine_pdf_rests pdf a b = - let a_entries = - match a with - | Pdf.Dictionary entries -> entries - | _ -> [] - in let b_entries = - match b with - | Pdf.Dictionary entries -> entries - | _ -> [] - in - let keys_to_combine = ["/Annots"] in - let combine_entries key = - let a_entries = - match Pdf.lookup_direct pdf key a with - | Some (Pdf.Array d) -> d - | _ -> [] - in let b_entries = - match Pdf.lookup_direct pdf key b with - | Some (Pdf.Array d) -> d - | _ -> [] - in - if a_entries = [] && b_entries = [] then - None - else - Some (key, Pdf.Array (a_entries @ b_entries)) - in - let unknown_keys_a = lose (fun (k, _) -> mem k keys_to_combine) a_entries in - let unknown_keys_b = lose (fun (k, _) -> mem k keys_to_combine) b_entries in - let combined_known_entries = option_map combine_entries keys_to_combine in - fold_left - (fun dict (k, v) -> Pdf.add_dict_entry dict k v) - (Pdf.Dictionary []) - (unknown_keys_a @ unknown_keys_b @ combined_known_entries) - -(* Calculate the transformation matrices for a single imposed output page. *) - -(* make margins by scaling for a fitted impose. *) -let make_margin output_mediabox margin tr = - if margin = 0. then tr else - let width, height = - match Pdf.parse_rectangle output_mediabox with - xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin - in - if margin > width /. 2. || margin > height /. 2. then error "margin would fill whole page!" else - let hfactor = (width -. margin -. margin) /. width in - let vfactor = (height -. margin -. margin) /. height in - let factor = fmin hfactor vfactor in - let scale = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), factor, factor)) in - let shift = - Pdftransform.matrix_of_op (Pdftransform.Translate ((width -. width *. factor) /. 2., - (height -. height *. factor) /. 2.)) - in - (Pdftransform.matrix_compose shift (Pdftransform.matrix_compose scale tr)) - -(* FIXME fixup -center for next release. For now it has been disabled. *) -let impose_transforms fit fx fy columns rtl btt center margin mediabox output_mediabox fit_extra_hspace fit_extra_vspace len = - let width, height = - match Pdf.parse_rectangle mediabox with - xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin - in - let trs = ref [] in - let len = ref len in - let cent_extra_x = ref 0. in - let cent_extra_y = ref 0. in - let addtr x y row col px py = - let cex, cey = - (if rtl then ~-.(!cent_extra_x) else !cent_extra_x), (if btt then ~-.(!cent_extra_y) else !cent_extra_y) - in - let spacecol = if rtl then x - col - 1 else col in - let total_fit_extra_hspace = fit_extra_hspace *. (float_of_int spacecol +. 1.) in - let total_fit_extra_vspace = fit_extra_vspace *. (float_of_int row +. 1.) in - (*Printf.printf "row = %i, py = %f, ey = %f, fit_extra_vspace = %f, total_fit_extra_vspace = %f\n" row py cey fit_extra_vspace total_fit_extra_vspace;*) - trs := - Pdftransform.matrix_of_transform - [Pdftransform.Translate (px +. cex +. total_fit_extra_hspace, py +. cey +. total_fit_extra_vspace)] - ::!trs - in - let x = int_of_float fx in - let y = int_of_float fy in - let final_full_cols = !len mod x in - let final_full_rows = !len mod y in - let order row col = - ((if btt then y - row - 1 else row), (if rtl then x - col - 1 else col)) - in - if columns then - for col = 0 to x - 1 do - if center && !len < y then if !cent_extra_y = 0. then cent_extra_y := ~-.(height *. float_of_int (y - !len)) /. 2.; - for row = y - 1 downto 0 do - let original_row = row in - let row, col = order row col in - let adjusted_row = - let final_empty_rows = y - final_full_rows in - if center && !len <= final_full_rows then original_row + (y - 1 - 1 - (final_empty_rows / 2)) else original_row - in - if !len > 0 then addtr x y adjusted_row col (width *. float_of_int col) (height *. float_of_int row); - len := !len - 1 - done - done - else - for row = y - 1 downto 0 do - if center && !len < x then if !cent_extra_x = 0. then cent_extra_x := (width *. float_of_int (x - !len)) /. 2.; - for col = 0 to x - 1 do - let original_col = col in - let row, col = order row col in - let adjusted_col = - let final_empty_cols = x - final_full_cols in - if center && !len <= final_full_cols then original_col + (x - 1 - 1 - (final_empty_cols / 2)) else original_col - in - if !len > 0 then addtr x y row adjusted_col (width *. float_of_int col) (height *. float_of_int row); - len := !len - 1 - done - done; - map (if fit then make_margin output_mediabox margin else Fun.id) (rev !trs) - -(* Combine two pages into one throughout the document. The pages have already -had their objects renumbered so as not to clash. *) -let impose_pages fit x y columns rtl btt center margin output_mediabox fast fit_extra_hspace fit_extra_vspace pdf = function - | [] -> assert false - | (h::_) as pages -> - let transforms = - impose_transforms - fit x y columns rtl btt center margin h.Pdfpage.mediabox - output_mediabox fit_extra_hspace fit_extra_vspace (length pages) - in - (* Change the pattern matrices before combining resources *) - let pages, h = - let r = map2 (fun p t -> change_pattern_matrices_page pdf t p) pages transforms in - (r, List.hd r) - in - let resources' = pair_reduce (combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in - let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in - let content' = - let transform_stream transform contents = - (* If fast, no mismatched q/Q protection and no parsing of operators. *) - if fast then - [Pdfops.stream_of_ops [Pdfops.Op_q; Pdfops.Op_cm transform]] @ contents @ [Pdfops.stream_of_ops [Pdfops.Op_Q]] - else - (* If slow, use protect from Pdfpage. *) - let ops = Pdfpage.protect pdf resources' contents @ Pdfops.parse_operators pdf resources' contents in - [Pdfops.stream_of_ops ([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ ops @ [Pdfops.Op_Q])] - in - flatten - (map2 - (fun p t -> transform_annotations pdf t p.Pdfpage.rest; transform_stream t p.Pdfpage.content) - pages - transforms) - in - {Pdfpage.mediabox = output_mediabox; - Pdfpage.rotate = h.Pdfpage.rotate; - Pdfpage.content = content'; - Pdfpage.resources = resources'; - Pdfpage.rest = rest'} - -(* For fit, we scale contents, move to middle and retain page size. For xy, we - expand mediabox and move contents to middle. This function also does the hard boxing. *) -let make_space fit ~fast spacing pdf = - let endpage = Pdfpage.endpage pdf in - let all = ilist 1 endpage in - let pdf = hard_box pdf all "/MediaBox" false fast in - if spacing = 0. then pdf else - let margin = spacing /. 2. in - let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in - let width, height = - match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with - xmin, ymin, xmax, ymax -> (xmax -. xmin, ymax -. ymin) - in - if fit then - (shift_pdf - ~fast - (many (margin, margin) endpage) - (scale_contents ~fast (Cpdfposition.BottomLeft 0.) ((width -. spacing) /. width) pdf all) - all) - else - (set_mediabox - (many (0., 0., width +. spacing, height +. spacing) endpage) - (shift_pdf ~fast (many (margin, margin) endpage) pdf all) all) - -(* We add the border as a thick unfilled rectangle just inside the page edge, - only if its linewidth is > 0 since, for us, 0 means none, not single-pixel - like in PDF. *) -let add_border linewidth ~fast pdf = - if linewidth = 0. then pdf else - let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in - let _, _, w, h = Pdf.parse_rectangle firstpage.Pdfpage.mediabox in - Cpdfaddtext.addrectangle - fast (w -. linewidth, h -. linewidth) (RGB (0., 0., 0.)) true linewidth 1. (Cpdfposition.BottomLeft (linewidth /. 2.)) - false false (ilist 1 (Pdfpage.endpage pdf)) pdf - -let impose ~x ~y ~fit ~columns ~rtl ~btt ~center ~margin ~spacing ~linewidth ~fast pdf = - let endpage = Pdfpage.endpage pdf in - let pagenums = ilist 1 endpage in - let pdf = copy_cropbox_to_mediabox pdf pagenums in - let pdf = remove_cropping_pdf pdf pagenums in - let pdf = upright pagenums pdf in - let pdf = add_border linewidth ~fast pdf in - let pdf = make_space fit ~fast spacing pdf in - let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in - let _, _, w, h = Pdf.parse_rectangle firstpage.Pdfpage.mediabox in - let ix = int_of_float x in - let iy = int_of_float y in - let n, ix, iy, fit_extra_hspace, fit_extra_vspace = - if fit then - (* +. 0.001 ensures a page always fits on itself, or on another page of same height or width. *) - let across = int_of_float (floor (x /. w +. 0.001)) in - let down = int_of_float (floor (y /. h +. 0.001)) in - if across < 1 || down < 1 then error "Not even a single page would fit." else - let excess_hspace = x -. float_of_int across *. w in - let excess_vspace = y -. float_of_int down *. h in - (*Printf.printf "across = %i, down =%i, excess_hspace = %f, excess_hspace = %f\n" across down excess_hspace excess_vspace;*) - (across * down, - across, - down, - excess_hspace /. (float_of_int across +. 1.), - excess_vspace /. (float_of_int down +. 1.)) - else - if ix = 0 && iy = 0 then error "impose-xy: both dimensions cannot be zero" else - if ix = 0 then (endpage, endpage, 1, 0., 0.) - else if iy = 0 then (endpage, 1, endpage, 0., 0.) - else (ix * iy, ix, iy, 0., 0.) - in - let mediabox' = - if fit then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real x; Pdf.Real y] else - let m2 = margin *. 2. in - if x = 0.0 then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w *. float_of_int endpage +. m2); Pdf.Real (h +. m2)] - else if y = 0.0 then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w +. m2); Pdf.Real (h *. float_of_int endpage +. m2)] - else Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w *. x +. m2); Pdf.Real (h *. y +. m2)] - in - let pages = Pdfpage.pages_of_pagetree pdf in - let pagesets = splitinto n pages in - let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in - let pages = - map - (impose_pages fit (float_of_int ix) (float_of_int iy) columns rtl btt - center margin mediabox' fast fit_extra_hspace fit_extra_vspace pdf) - renumbered - in - let changes = map (fun x -> (x, (x + (n - 1)) / n)) pagenums in - let pdf = Pdfpage.change_pages ~changes true pdf pages in - if fit then pdf else shift_pdf ~fast (many (margin, margin) (length pages)) pdf (ilist 1 (Pdfpage.endpage pdf)) - -(* Legacy -twoup-stack. Impose 2x1 on a page twice the size then rotate. *) -let twoup_stack fast pdf = - let pdf = - impose - ~x:2. ~y:1. ~fit:false ~columns:false ~rtl:false ~btt:false ~center:false - ~margin:0. ~spacing:0. ~linewidth:0. ~fast pdf - in - let all = ilist 1 (Pdfpage.endpage pdf) in - upright ~fast all (rotate_pdf ~-90 pdf all) - -(* Legacy -two-up. Rotate the pages and shrink them so as to fit 2x1 on a page the same size. *) -let twoup fast pdf = - let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in - let width, height = - match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with - xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin - in - let width_exceeds_height = width > height in - let sc = - if width_exceeds_height - then fmin (height /. width) ((width /. 2.) /. height) - else fmin (width /. height) ((height /. 2.) /. width) - in - let endpage = Pdfpage.endpage pdf in - let all = ilist 1 endpage in - let pdf = scale_pdf ~fast (many (sc, sc) endpage) pdf all in - let pdf = - impose - ~x:2. ~y:1. ~fit:false ~columns:false ~rtl:false ~btt:false ~center:true - ~margin:0. ~spacing:0. ~linewidth:0. ~fast pdf - in - let endpage = Pdfpage.endpage pdf in - let all = ilist 1 endpage in - let pdf = upright all (rotate_pdf ~-90 pdf all) in - scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all (* \section{Blacken text} *) @@ -1150,7 +382,7 @@ let blacktext c range pdf = Cpdfutil.process_xobjects pdf page (blacktext_ops c); {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (ppstub blacktext_page) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub blacktext_page) pdf range (* \section{Blacken lines} *) let blacklines_ops c pdf resources content = @@ -1176,7 +408,7 @@ let blacklines c range pdf = Cpdfutil.process_xobjects pdf page (blacklines_ops c); {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (ppstub blacklines_page) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub blacklines_page) pdf range (* \section{Blacken Fills} *) let blackfills_ops c pdf resources content = @@ -1202,7 +434,7 @@ let blackfills c range pdf = Cpdfutil.process_xobjects pdf page (blackfills_ops c); {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (ppstub blackfills_page) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub blackfills_page) pdf range (* \section{Set a minimum line width to avoid dropout} *) let thinlines range width pdf = @@ -1277,7 +509,7 @@ let thinlines range width pdf = let content' = [Pdfops.stream_of_ops operators] in {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (ppstub thinpage) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub thinpage) pdf range (* Parse the new content to make sure syntactically ok, append * as required. Rewrite the content *) @@ -1289,7 +521,7 @@ let append_page_content_page fast s before pdf n page = pdf ops ~fast page let append_page_content s before fast range pdf = - Cpdfpage.process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub (append_page_content_page fast s before pdf)) pdf range (* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. * @@ -1335,7 +567,7 @@ let show_boxes_page fast pdf _ page = Pdfpage.postpend_operators pdf ops ~fast page let show_boxes ?(fast=false) pdf range = - Cpdfpage.process_pages (ppstub (show_boxes_page fast pdf)) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub (show_boxes_page fast pdf)) pdf range @@ -1369,7 +601,7 @@ let trim_marks_page fast pdf n page = page let trim_marks ?(fast=false) pdf range = - Cpdfpage.process_pages (ppstub (trim_marks_page fast pdf)) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub (trim_marks_page fast pdf)) pdf range (* 1. Extend remove_dict_entry with search term 2. Implement replace_dict_entry by analogy to remove_dict_entry *) @@ -1435,14 +667,14 @@ let remove_clipping pdf range = Cpdfutil.process_xobjects pdf page remove_clipping_ops; {page with Pdfpage.content = content'} in - Cpdfpage.process_pages (ppstub remove_clipping_page) pdf range + Cpdfpage.process_pages (Cpdfutil.ppstub remove_clipping_page) pdf range (* copy the contents of the box f to the box t. If mediabox_if_missing is set, the contents of the mediabox will be used if the from fox is not available. If mediabox_is_missing is false, the page is unaltered. *) let copy_box f t mediabox_if_missing pdf range = Cpdfpage.process_pages - (ppstub (fun _ page -> + (Cpdfutil.ppstub (fun _ page -> if f = "/MediaBox" then {page with Pdfpage.rest = (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} @@ -1477,4 +709,4 @@ let remove_unused_resources_page pdf n page = {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} let remove_unused_resources pdf = - Cpdfpage.process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) + Cpdfpage.process_pages (Cpdfutil.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) diff --git a/cpdf.mli b/cpdf.mli index 5e96fce..ac8fb86 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -1,10 +1,6 @@ (** Coherent PDF Tools Core Routines *) open Pdfutil -(** {2 Working with pages} *) - -val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t - (** {2 Stamping} *) (** [combine_pages fast under over scaletofit swap equalize] combines the page @@ -28,77 +24,19 @@ val hasbox : Pdf.t -> int -> string -> bool (** [crop_pdf xywhlist pdf range] sets the cropbox on the given pages. *) val crop_pdf : ?box:string -> (float * float * float * float) list -> Pdf.t -> int list -> Pdf.t -val hard_box : Pdf.t -> int list -> string -> bool -> bool -> Pdf.t - -(** [set_mediabox xywhlist pdf range] sets the media box on the given pages. *) -val set_mediabox : (float * float * float * float) list -> Pdf.t -> int list -> Pdf.t - (** [setBox boxname x y w h pdf range] sets the given box on the given pages. *) val setBox : string -> float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t -(** Remove any cropping from the given pages. *) -val remove_cropping_pdf : Pdf.t -> int list -> Pdf.t - -(** Remove any trim box from the given pages. *) -val remove_trim_pdf : Pdf.t -> int list -> Pdf.t - -(** Remove any bleed box from the given pages. *) -val remove_bleed_pdf : Pdf.t -> int list -> Pdf.t - -(** Remove any art box from the given pages. *) -val remove_art_pdf : Pdf.t -> int list -> Pdf.t - -(** Change rotation to a given value 0, 90, 180, 270 on given pages. *) -val rotate_pdf : int -> Pdf.t -> int list -> Pdf.t - -(** Rotate clockwise by 0, 90, 180, 270 on given pages. *) -val rotate_pdf_by : int -> Pdf.t -> int list -> Pdf.t - -(** Rotate the contents by the given angle on the given pages. If [fast] is true, assume PDF is well-formed. *) -val rotate_contents : ?fast:bool -> float -> Pdf.t -> int list -> Pdf.t - -(** Modify the rotation of the page and its contents to leave the rotation at 0 with the page effectively unaltered. *) -val upright : ?fast:bool -> int list -> Pdf.t -> Pdf.t - (** Flip the given pages vertically *) val vflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t (** Flip the given pages horizontally *) val hflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t -(** Shift a PDF in x and y (in pts) in the given pages. List of (x, y) pairs is -for all pages in pdf. *) -val shift_pdf : ?fast:bool -> (float * float) list -> Pdf.t -> int list -> Pdf.t - -(** Scale a PDF in sx, sy in the given pages. List of (sx, sy) pairs is -for all pages in pdf. *) -val scale_pdf : ?fast:bool -> (float * float) list -> Pdf.t -> int list -> Pdf.t - -(** [scale_to_fit_pdf fast position input_scale x y op pdf range] scales a page to fit the -page size given by (x, y) and by the [input_scale] (e.g 1.0 = scale to fit, 0.9 -= scale to fit leaving a border etc.). [op] is unused. *) -val scale_to_fit_pdf : ?fast:bool -> Cpdfposition.position -> float -> (float * float) list -> 'a -> Pdf.t -> int list -> Pdf.t - -(** Scale the contents of a page by a given factor centred around a given point in a given range. *) -val scale_contents : ?fast:bool -> Cpdfposition.position -> float -> Pdf.t -> int list -> Pdf.t - val trim_marks : ?fast:bool -> Pdf.t -> int list -> Pdf.t val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t -(** {2 Imposition} *) - -val impose : x:float -> y:float -> fit:bool -> columns:bool -> rtl:bool -> btt:bool -> center:bool -> margin:float -> spacing:float -> linewidth:float -> fast:bool -> Pdf.t -> Pdf.t - -(** The twoup_stack operation puts two logical pages on each physical page, -rotating them 90 degrees to do so. The new mediabox is thus larger. Bool true -(fast) if assume well-formed ISO content streams. *) -val twoup_stack : bool -> Pdf.t -> Pdf.t - -(** The twoup operation does the same, but scales the new sides down so that -the media box is unchanged. Bool true (fast) if assume well-formed ISO content streams. *) -val twoup : bool -> Pdf.t -> Pdf.t - (** {2 Miscellany} *) (** Make all lines in the PDF at least a certain thickness. *) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 109d133..a615a09 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3220,7 +3220,7 @@ let go () = let pdf = get_single_pdf (Some MediaBox) false in let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.set_mediabox xywhlist pdf range in + let pdf = Cpdfpage.set_mediabox xywhlist pdf range in write_pdf false pdf | _ -> error "set media box: bad command line" end @@ -3229,7 +3229,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf (Some (HardBox box)) false in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.hard_box pdf range box args.mediabox_if_missing args.fast in + let pdf = Cpdfpage.hard_box pdf range box args.mediabox_if_missing args.fast in write_pdf false pdf | _ -> error "hard box: bad command line" end @@ -3266,7 +3266,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf (Some RemoveCrop) false in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.remove_cropping_pdf pdf range in + let pdf = Cpdfpage.remove_cropping_pdf pdf range in write_pdf false pdf | _ -> error "remove-crop: bad command line" end @@ -3275,7 +3275,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf (Some RemoveArt) false in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.remove_art_pdf pdf range in + let pdf = Cpdfpage.remove_art_pdf pdf range in write_pdf false pdf | _ -> error "remove-crop: bad command line" end @@ -3284,7 +3284,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf (Some RemoveTrim) false in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.remove_trim_pdf pdf range in + let pdf = Cpdfpage.remove_trim_pdf pdf range in write_pdf false pdf | _ -> error "remove-crop: bad command line" end @@ -3293,7 +3293,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf (Some RemoveBleed) false in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.remove_bleed_pdf pdf range in + let pdf = Cpdfpage.remove_bleed_pdf pdf range in write_pdf false pdf | _ -> error "remove-crop: bad command line" end @@ -3302,7 +3302,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf (Some CopyCropBoxToMediaBox) false in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.copy_cropbox_to_mediabox pdf range in + let pdf = Cpdfpage.copy_cropbox_to_mediabox pdf range in write_pdf false pdf | _ -> error "remove-crop: bad command line" end @@ -3313,8 +3313,8 @@ let go () = let range = parse_pagespec_allow_empty pdf pagespec in let rotate = match args.op with - | Some (Rotate i) -> Cpdf.rotate_pdf i - | Some (Rotateby i) -> Cpdf.rotate_pdf_by i + | Some (Rotate i) -> Cpdfpage.rotate_pdf i + | Some (Rotateby i) -> Cpdfpage.rotate_pdf_by i | _ -> assert false in let pdf = rotate pdf range in @@ -3326,7 +3326,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.rotate_contents ~fast:args.fast a pdf range in + let pdf = Cpdfpage.rotate_contents ~fast:args.fast a pdf range in write_pdf false pdf | _ -> error "rotate-contents: bad command line" end @@ -3335,7 +3335,7 @@ let go () = | (_, pagespec, _, _, _, _)::_, _ -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.upright ~fast:args.fast range pdf in + let pdf = Cpdfpage.upright ~fast:args.fast range pdf in write_pdf false pdf | _ -> error "rotate-contents: bad command line" end @@ -3561,22 +3561,22 @@ let go () = let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let dxdylist = Cpdfcoord.parse_coordinates pdf args.coord in - write_pdf false (Cpdf.shift_pdf ~fast:args.fast dxdylist pdf range) + write_pdf false (Cpdfpage.shift_pdf ~fast:args.fast dxdylist pdf range) | Some Scale -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let sxsylist = Cpdfcoord.parse_coordinates pdf args.coord in - write_pdf false (Cpdf.scale_pdf ~fast:args.fast sxsylist pdf range) + write_pdf false (Cpdfpage.scale_pdf ~fast:args.fast sxsylist pdf range) | Some ScaleToFit -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let xylist = Cpdfcoord.parse_coordinates pdf args.coord and scale = args.scale in - write_pdf false (Cpdf.scale_to_fit_pdf ~fast:args.fast args.position scale xylist args.op pdf range) + write_pdf false (Cpdfpage.scale_to_fit_pdf ~fast:args.fast args.position scale xylist args.op pdf range) | Some (ScaleContents scale) -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdf.scale_contents ~fast:args.fast args.position scale pdf range) + write_pdf false (Cpdfpage.scale_contents ~fast:args.fast args.position scale pdf range) | Some ListAttachedFiles -> let pdf = get_single_pdf args.op false in let attachments = Cpdfattach.list_attached_files pdf in @@ -3659,7 +3659,7 @@ let go () = | OtherFont f -> None (* it's in fontname *) in let pdf = - if args.prerotate then Cpdf.upright ~fast:args.fast range pdf else pdf + if args.prerotate then Cpdfpage.upright ~fast:args.fast range pdf else pdf and filename = match args.inputs with | (InFile inname, _, _, _, _, _)::_ -> inname @@ -3692,15 +3692,15 @@ let go () = | Some RemoveBookmarks -> write_pdf false (Pdfmarks.remove_bookmarks (get_single_pdf args.op false)) | Some TwoUp -> - write_pdf false (Cpdf.twoup args.fast (get_single_pdf args.op false)) + write_pdf false (Cpdfimpose.twoup args.fast (get_single_pdf args.op false)) | Some TwoUpStack -> - write_pdf false (Cpdf.twoup_stack args.fast (get_single_pdf args.op false)) + write_pdf false (Cpdfimpose.twoup_stack args.fast (get_single_pdf args.op false)) | Some Impose fit -> let pdf = get_single_pdf args.op false in let x, y = Cpdfcoord.parse_coordinate pdf args.coord in if not fit && (x < 0.0 || y < 0.0) then error "Negative imposition parameters not allowed." else write_pdf false - (Cpdf.impose ~x ~y ~fit ~columns:args.impose_columns ~rtl:args.impose_rtl ~btt:args.impose_btt ~center:args.impose_center + (Cpdfimpose.impose ~x ~y ~fit ~columns:args.impose_columns ~rtl:args.impose_rtl ~btt:args.impose_btt ~center:args.impose_center ~margin:args.impose_margin ~spacing:args.impose_spacing ~linewidth:args.impose_linewidth ~fast:args.fast pdf) | Some (StampOn over) -> let overpdf = diff --git a/cpdfimpose.ml b/cpdfimpose.ml new file mode 100644 index 0000000..be0e1de --- /dev/null +++ b/cpdfimpose.ml @@ -0,0 +1,281 @@ +open Pdfutil +open Cpdferror + +(* Imposition *) + +(* Union two rest dictionaries from the same PDF. *) +let combine_pdf_rests pdf a b = + let a_entries = + match a with + | Pdf.Dictionary entries -> entries + | _ -> [] + in let b_entries = + match b with + | Pdf.Dictionary entries -> entries + | _ -> [] + in + let keys_to_combine = ["/Annots"] in + let combine_entries key = + let a_entries = + match Pdf.lookup_direct pdf key a with + | Some (Pdf.Array d) -> d + | _ -> [] + in let b_entries = + match Pdf.lookup_direct pdf key b with + | Some (Pdf.Array d) -> d + | _ -> [] + in + if a_entries = [] && b_entries = [] then + None + else + Some (key, Pdf.Array (a_entries @ b_entries)) + in + let unknown_keys_a = lose (fun (k, _) -> mem k keys_to_combine) a_entries in + let unknown_keys_b = lose (fun (k, _) -> mem k keys_to_combine) b_entries in + let combined_known_entries = option_map combine_entries keys_to_combine in + fold_left + (fun dict (k, v) -> Pdf.add_dict_entry dict k v) + (Pdf.Dictionary []) + (unknown_keys_a @ unknown_keys_b @ combined_known_entries) + +(* Calculate the transformation matrices for a single imposed output page. *) + +(* make margins by scaling for a fitted impose. *) +let make_margin output_mediabox margin tr = + if margin = 0. then tr else + let width, height = + match Pdf.parse_rectangle output_mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + if margin > width /. 2. || margin > height /. 2. then error "margin would fill whole page!" else + let hfactor = (width -. margin -. margin) /. width in + let vfactor = (height -. margin -. margin) /. height in + let factor = fmin hfactor vfactor in + let scale = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), factor, factor)) in + let shift = + Pdftransform.matrix_of_op (Pdftransform.Translate ((width -. width *. factor) /. 2., + (height -. height *. factor) /. 2.)) + in + (Pdftransform.matrix_compose shift (Pdftransform.matrix_compose scale tr)) + +(* FIXME fixup -center for next release. For now it has been disabled. *) +let impose_transforms fit fx fy columns rtl btt center margin mediabox output_mediabox fit_extra_hspace fit_extra_vspace len = + let width, height = + match Pdf.parse_rectangle mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + let trs = ref [] in + let len = ref len in + let cent_extra_x = ref 0. in + let cent_extra_y = ref 0. in + let addtr x y row col px py = + let cex, cey = + (if rtl then ~-.(!cent_extra_x) else !cent_extra_x), (if btt then ~-.(!cent_extra_y) else !cent_extra_y) + in + let spacecol = if rtl then x - col - 1 else col in + let total_fit_extra_hspace = fit_extra_hspace *. (float_of_int spacecol +. 1.) in + let total_fit_extra_vspace = fit_extra_vspace *. (float_of_int row +. 1.) in + (*Printf.printf "row = %i, py = %f, ey = %f, fit_extra_vspace = %f, total_fit_extra_vspace = %f\n" row py cey fit_extra_vspace total_fit_extra_vspace;*) + trs := + Pdftransform.matrix_of_transform + [Pdftransform.Translate (px +. cex +. total_fit_extra_hspace, py +. cey +. total_fit_extra_vspace)] + ::!trs + in + let x = int_of_float fx in + let y = int_of_float fy in + let final_full_cols = !len mod x in + let final_full_rows = !len mod y in + let order row col = + ((if btt then y - row - 1 else row), (if rtl then x - col - 1 else col)) + in + if columns then + for col = 0 to x - 1 do + if center && !len < y then if !cent_extra_y = 0. then cent_extra_y := ~-.(height *. float_of_int (y - !len)) /. 2.; + for row = y - 1 downto 0 do + let original_row = row in + let row, col = order row col in + let adjusted_row = + let final_empty_rows = y - final_full_rows in + if center && !len <= final_full_rows then original_row + (y - 1 - 1 - (final_empty_rows / 2)) else original_row + in + if !len > 0 then addtr x y adjusted_row col (width *. float_of_int col) (height *. float_of_int row); + len := !len - 1 + done + done + else + for row = y - 1 downto 0 do + if center && !len < x then if !cent_extra_x = 0. then cent_extra_x := (width *. float_of_int (x - !len)) /. 2.; + for col = 0 to x - 1 do + let original_col = col in + let row, col = order row col in + let adjusted_col = + let final_empty_cols = x - final_full_cols in + if center && !len <= final_full_cols then original_col + (x - 1 - 1 - (final_empty_cols / 2)) else original_col + in + if !len > 0 then addtr x y row adjusted_col (width *. float_of_int col) (height *. float_of_int row); + len := !len - 1 + done + done; + map (if fit then make_margin output_mediabox margin else Fun.id) (rev !trs) + +(* Combine two pages into one throughout the document. The pages have already +had their objects renumbered so as not to clash. *) +let impose_pages fit x y columns rtl btt center margin output_mediabox fast fit_extra_hspace fit_extra_vspace pdf = function + | [] -> assert false + | (h::_) as pages -> + let transforms = + impose_transforms + fit x y columns rtl btt center margin h.Pdfpage.mediabox + output_mediabox fit_extra_hspace fit_extra_vspace (length pages) + in + (* Change the pattern matrices before combining resources *) + let pages, h = + let r = map2 (fun p t -> Cpdfutil.change_pattern_matrices_page pdf t p) pages transforms in + (r, List.hd r) + in + let resources' = pair_reduce (Cpdfutil.combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in + let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in + let content' = + let transform_stream transform contents = + (* If fast, no mismatched q/Q protection and no parsing of operators. *) + if fast then + [Pdfops.stream_of_ops [Pdfops.Op_q; Pdfops.Op_cm transform]] @ contents @ [Pdfops.stream_of_ops [Pdfops.Op_Q]] + else + (* If slow, use protect from Pdfpage. *) + let ops = Pdfpage.protect pdf resources' contents @ Pdfops.parse_operators pdf resources' contents in + [Pdfops.stream_of_ops ([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ ops @ [Pdfops.Op_Q])] + in + flatten + (map2 + (fun p t -> Cpdfutil.transform_annotations pdf t p.Pdfpage.rest; transform_stream t p.Pdfpage.content) + pages + transforms) + in + {Pdfpage.mediabox = output_mediabox; + Pdfpage.rotate = h.Pdfpage.rotate; + Pdfpage.content = content'; + Pdfpage.resources = resources'; + Pdfpage.rest = rest'} + +(* For fit, we scale contents, move to middle and retain page size. For xy, we + expand mediabox and move contents to middle. This function also does the hard boxing. *) +let make_space fit ~fast spacing pdf = + let endpage = Pdfpage.endpage pdf in + let all = ilist 1 endpage in + let pdf = Cpdfpage.hard_box pdf all "/MediaBox" false fast in + if spacing = 0. then pdf else + let margin = spacing /. 2. in + let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in + let width, height = + match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with + xmin, ymin, xmax, ymax -> (xmax -. xmin, ymax -. ymin) + in + if fit then + (Cpdfpage.shift_pdf + ~fast + (many (margin, margin) endpage) + (Cpdfpage.scale_contents ~fast (Cpdfposition.BottomLeft 0.) ((width -. spacing) /. width) pdf all) + all) + else + (Cpdfpage.set_mediabox + (many (0., 0., width +. spacing, height +. spacing) endpage) + (Cpdfpage.shift_pdf ~fast (many (margin, margin) endpage) pdf all) all) + +(* We add the border as a thick unfilled rectangle just inside the page edge, + only if its linewidth is > 0 since, for us, 0 means none, not single-pixel + like in PDF. *) +let add_border linewidth ~fast pdf = + if linewidth = 0. then pdf else + let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in + let _, _, w, h = Pdf.parse_rectangle firstpage.Pdfpage.mediabox in + Cpdfaddtext.addrectangle + fast (w -. linewidth, h -. linewidth) (RGB (0., 0., 0.)) true linewidth 1. (Cpdfposition.BottomLeft (linewidth /. 2.)) + false false (ilist 1 (Pdfpage.endpage pdf)) pdf + +let impose ~x ~y ~fit ~columns ~rtl ~btt ~center ~margin ~spacing ~linewidth ~fast pdf = + let endpage = Pdfpage.endpage pdf in + let pagenums = ilist 1 endpage in + let pdf = Cpdfpage.copy_cropbox_to_mediabox pdf pagenums in + let pdf = Cpdfpage.remove_cropping_pdf pdf pagenums in + let pdf = Cpdfpage.upright pagenums pdf in + let pdf = add_border linewidth ~fast pdf in + let pdf = make_space fit ~fast spacing pdf in + let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in + let _, _, w, h = Pdf.parse_rectangle firstpage.Pdfpage.mediabox in + let ix = int_of_float x in + let iy = int_of_float y in + let n, ix, iy, fit_extra_hspace, fit_extra_vspace = + if fit then + (* +. 0.001 ensures a page always fits on itself, or on another page of same height or width. *) + let across = int_of_float (floor (x /. w +. 0.001)) in + let down = int_of_float (floor (y /. h +. 0.001)) in + if across < 1 || down < 1 then error "Not even a single page would fit." else + let excess_hspace = x -. float_of_int across *. w in + let excess_vspace = y -. float_of_int down *. h in + (*Printf.printf "across = %i, down =%i, excess_hspace = %f, excess_hspace = %f\n" across down excess_hspace excess_vspace;*) + (across * down, + across, + down, + excess_hspace /. (float_of_int across +. 1.), + excess_vspace /. (float_of_int down +. 1.)) + else + if ix = 0 && iy = 0 then error "impose-xy: both dimensions cannot be zero" else + if ix = 0 then (endpage, endpage, 1, 0., 0.) + else if iy = 0 then (endpage, 1, endpage, 0., 0.) + else (ix * iy, ix, iy, 0., 0.) + in + let mediabox' = + if fit then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real x; Pdf.Real y] else + let m2 = margin *. 2. in + if x = 0.0 then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w *. float_of_int endpage +. m2); Pdf.Real (h +. m2)] + else if y = 0.0 then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w +. m2); Pdf.Real (h *. float_of_int endpage +. m2)] + else Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w *. x +. m2); Pdf.Real (h *. y +. m2)] + in + let pages = Pdfpage.pages_of_pagetree pdf in + let pagesets = splitinto n pages in + let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in + let pages = + map + (impose_pages fit (float_of_int ix) (float_of_int iy) columns rtl btt + center margin mediabox' fast fit_extra_hspace fit_extra_vspace pdf) + renumbered + in + let changes = map (fun x -> (x, (x + (n - 1)) / n)) pagenums in + let pdf = Pdfpage.change_pages ~changes true pdf pages in + if fit then pdf else Cpdfpage.shift_pdf ~fast (many (margin, margin) (length pages)) pdf (ilist 1 (Pdfpage.endpage pdf)) + +(* Legacy -twoup-stack. Impose 2x1 on a page twice the size then rotate. *) +let twoup_stack fast pdf = + let pdf = + impose + ~x:2. ~y:1. ~fit:false ~columns:false ~rtl:false ~btt:false ~center:false + ~margin:0. ~spacing:0. ~linewidth:0. ~fast pdf + in + let all = ilist 1 (Pdfpage.endpage pdf) in + Cpdfpage.upright ~fast all (Cpdfpage.rotate_pdf ~-90 pdf all) + +(* Legacy -two-up. Rotate the pages and shrink them so as to fit 2x1 on a page the same size. *) +let twoup fast pdf = + let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in + let width, height = + match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + let width_exceeds_height = width > height in + let sc = + if width_exceeds_height + then fmin (height /. width) ((width /. 2.) /. height) + else fmin (width /. height) ((height /. 2.) /. width) + in + let endpage = Pdfpage.endpage pdf in + let all = ilist 1 endpage in + let pdf = Cpdfpage.scale_pdf ~fast (many (sc, sc) endpage) pdf all in + let pdf = + impose + ~x:2. ~y:1. ~fit:false ~columns:false ~rtl:false ~btt:false ~center:true + ~margin:0. ~spacing:0. ~linewidth:0. ~fast pdf + in + let endpage = Pdfpage.endpage pdf in + let all = ilist 1 endpage in + let pdf = Cpdfpage.upright all (Cpdfpage.rotate_pdf ~-90 pdf all) in + Cpdfpage.scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all diff --git a/cpdfimpose.mli b/cpdfimpose.mli new file mode 100644 index 0000000..2608596 --- /dev/null +++ b/cpdfimpose.mli @@ -0,0 +1,12 @@ +(** {2 Imposition} *) + +val impose : x:float -> y:float -> fit:bool -> columns:bool -> rtl:bool -> btt:bool -> center:bool -> margin:float -> spacing:float -> linewidth:float -> fast:bool -> Pdf.t -> Pdf.t + +(** The twoup_stack operation puts two logical pages on each physical page, +rotating them 90 degrees to do so. The new mediabox is thus larger. Bool true +(fast) if assume well-formed ISO content streams. *) +val twoup_stack : bool -> Pdf.t -> Pdf.t + +(** The twoup operation does the same, but scales the new sides down so that +the media box is unchanged. Bool true (fast) if assume well-formed ISO content streams. *) +val twoup : bool -> Pdf.t -> Pdf.t diff --git a/cpdfpage.ml b/cpdfpage.ml index f3aadc5..29f8707 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -1,4 +1,5 @@ open Pdfutil +open Cpdferror (* Output information for each page *) let output_page_info pdf range = @@ -59,3 +60,328 @@ let map_pages f pdf range = (ilist 1 (length pages)) pages +(* Clip a page to one of its boxes, or the media box if that box is not + * present. This is a hard clip, done by using a clipping rectangle, so that + * the page may then be used as a stamp without extraneous material reapearing. + * *) +let hard_box pdf range boxname mediabox_if_missing fast = + process_pages + (Cpdfutil.ppstub (fun pagenum page -> + let minx, miny, maxx, maxy = + if boxname = "/MediaBox" then + Pdf.parse_rectangle page.Pdfpage.mediabox + else + match Pdf.lookup_direct pdf boxname page.Pdfpage.rest with + | Some a -> Pdf.parse_rectangle a + | _ -> + if mediabox_if_missing + then Pdf.parse_rectangle page.Pdfpage.mediabox + else error (Printf.sprintf "hard_box: box %s not found" boxname) + in + let ops = [Pdfops.Op_re (minx, miny, maxx -. minx, maxy -. miny); Pdfops.Op_W; Pdfops.Op_n] in + Pdfpage.prepend_operators pdf ops ~fast page)) + pdf + range + +let shift_page ?(fast=false) dxdylist pdf pnum page = + let dx, dy = List.nth dxdylist (pnum - 1) in + let transform_op = + Pdfops.Op_cm (Pdftransform.matrix_of_op (Pdftransform.Translate (dx, dy))) + in + let page = + Cpdfutil.change_pattern_matrices_page pdf (Pdftransform.mktranslate ~-.dx ~-.dy) page + in + Cpdfutil.transform_annotations pdf (Pdftransform.mktranslate dx dy) page.Pdfpage.rest; + (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, Pdftransform.mktranslate dx dy) + +let shift_pdf ?(fast=false) dxdylist pdf range = + process_pages (shift_page ~fast dxdylist pdf) pdf range + +(* \section{Shift page data} *) +let make_mediabox (xmin, ymin, xmax, ymax) = + Pdf.Array + [Pdf.Real xmin; Pdf.Real ymin; Pdf.Real xmax; Pdf.Real ymax] + +(* Change the media box and other known boxes by the function [f] which takes +xmin, xmax, ymin, ymax as input. *) +let change_boxes f pdf page = + let names = ["/TrimBox"; "/ArtBox"; "/CropBox"; "/BleedBox"] + in let getbox n = + Pdf.lookup_direct pdf n page.Pdfpage.rest + in + let boxes = combine names (map getbox names) in + let toreplace = lose (function (_, None) -> true | _ -> false) boxes in + let toreplace = + map + (function (name, Some value) -> (name, value) | _ -> assert false) + toreplace + in + let rest' = + fold_left + (fun e (k, v) -> + let v = + make_mediabox (f (Pdf.parse_rectangle v)) + in + Pdf.replace_dict_entry e k v) + page.Pdfpage.rest + toreplace + in + {page with + Pdfpage.mediabox = + make_mediabox (f (Pdf.parse_rectangle page.Pdfpage.mediabox)); + Pdfpage.rest = rest'} + +(* Change a page's media box so its minimum x and y are 0, making other +operations simpler to think about. Any shift that is done is reflected in +other boxes (clip etc.) *) +let rectify_boxes ?(fast=false) pdf page = + let minx, miny, _, _ = + Pdf.parse_rectangle page.Pdfpage.mediabox + in + let f (iminx, iminy, imaxx, imaxy) = + iminx -. minx, iminy -. miny, imaxx -. minx, imaxy -. miny + in + let page = change_boxes f pdf page in + if minx <> 0. || miny <> 0. + then + begin let p, _, _ = shift_page ~fast [(-.minx),(-.miny)] pdf 1 page in p end + else page + +(* Scale contents *) +let scale_page_contents ?(fast=false) scale position pdf pnum page = + let (minx, miny, maxx, maxy) as box = + (* Use cropbox if available *) + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> r + | None -> page.Pdfpage.mediabox) + in + let sx, sy, _ = Cpdfposition.calculate_position true 0. box Horizontal position in + let tx, ty = + let open Cpdfposition in + match position with + | Top t -> 0., -.t + | TopLeft t -> t, -.t + | TopRight t -> -.t, -.t + | Left t -> t, 0. + | BottomLeft t -> t, t + | Bottom t -> 0., t + | BottomRight t -> -.t, t + | Right t -> -.t, 0. + | _ -> 0., 0. (* centre it... FIXME: We will add a center position, eventually, for text and this... *) + in + let transform = + Pdftransform.matrix_of_transform + [Pdftransform.Translate (tx, ty); + Pdftransform.Scale ((sx, sy), scale, scale)] + in + let transform_op = Pdfops.Op_cm transform in + let page = Cpdfutil.change_pattern_matrices_page pdf transform page in + Cpdfutil.transform_annotations pdf transform page.Pdfpage.rest; + (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform) + +let scale_contents ?(fast=false) position scale pdf range = + process_pages (scale_page_contents ~fast scale position pdf) pdf range + +(* \section{Set media box} *) +let set_mediabox xywhlist pdf range = + let crop_page pnum page = + let x, y, w, h = List.nth xywhlist (pnum - 1) in + {page with + Pdfpage.mediabox = + (Pdf.Array + [Pdf.Real x; Pdf.Real y; + Pdf.Real (x +. w); Pdf.Real (y +. h)])} + in + process_pages (Cpdfutil.ppstub crop_page) pdf range + +(* If a cropbox exists, make it the mediabox. If not, change nothing. *) +let copy_cropbox_to_mediabox pdf range = + process_pages + (Cpdfutil.ppstub (fun _ page -> + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject} + | None -> page)) + pdf + range + +let remove_cropping_pdf pdf range = + let remove_cropping_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")} + in + process_pages (Cpdfutil.ppstub remove_cropping_page) pdf range + +let remove_trim_pdf pdf range = + let remove_trim_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")} + in + process_pages (Cpdfutil.ppstub remove_trim_page) pdf range + +let remove_art_pdf pdf range = + let remove_art_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")} + in + process_pages (Cpdfutil.ppstub remove_art_page) pdf range + +let remove_bleed_pdf pdf range = + let remove_bleed_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")} + in + process_pages (Cpdfutil.ppstub remove_bleed_page) pdf range + +(* Upright functionality *) + +(* Return the pages from the pdf in the range, unordered. *) +let select_pages range pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + option_map (function n -> try Some (select n pages) with _ -> None) range + +(* If all pages are already upright, and the mediabox is (0,0)-based, do nothing +to save time. *) +let allupright range pdf = + let page_is_upright page = + page.Pdfpage.rotate = Pdfpage.Rotate0 && + (let (minx, miny, _, _) = Pdf.parse_rectangle page.Pdfpage.mediabox in + minx < 0.001 && miny < 0.001 && minx > ~-.0.001 && miny > ~-.0.001) + in + not (mem false (map page_is_upright (select_pages range pdf))) + +let upright_transform page = + let rotate = + Pdfpage.int_of_rotation page.Pdfpage.rotate + and cx, cy = + let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in + (minx +. maxx) /. 2., (miny +. maxy) /. 2. + in + Pdftransform.mkrotate (cx, cy) (rad_of_deg (~-.(float rotate))) + +let transform_boxes tr pdf page = + let f (minx, miny, maxx, maxy) = + let minx, miny = Pdftransform.transform_matrix tr (minx, miny) + and maxx, maxy = Pdftransform.transform_matrix tr (maxx, maxy) in + (minx, miny, maxx, maxy) + in + change_boxes f pdf page + +let transform_contents ?(fast=false) tr pdf page = + let transform_op = Pdfops.Op_cm tr in + let page = Cpdfutil.change_pattern_matrices_page pdf (Pdftransform.matrix_invert tr) page in + Cpdfutil.transform_annotations pdf tr page.Pdfpage.rest; + Pdfpage.prepend_operators pdf [transform_op] ~fast page + +let upright ?(fast=false) range pdf = + if allupright range pdf then pdf else + let upright_page _ pnum page = + let tr = upright_transform page in + let page = transform_boxes tr pdf page in + let page = transform_contents ~fast tr pdf page in + (rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr) + in + process_pages (upright_page pdf) pdf range + +(* \section{Rotating pages} *) +let rotate_pdf r pdf range = + let rotate_page _ page = + {page with Pdfpage.rotate = + Pdfpage.rotation_of_int r} + in + process_pages (Cpdfutil.ppstub rotate_page) pdf range + +let rotate_pdf_by r pdf range = + let rotate_page_by _ page = + {page with Pdfpage.rotate = + Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)} + in + process_pages (Cpdfutil.ppstub rotate_page_by) pdf range + +let rotate_page_contents ~fast rotpoint r pdf pnum page = + let rotation_point = + match rotpoint with + | None -> + let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in + (minx +. maxx) /. 2., (miny +. maxy) /. 2. + | Some point -> point + in + let tr = + Pdftransform.matrix_of_op + (Pdftransform.Rotate (rotation_point, -.(rad_of_deg r))) + in let tr2 = + Pdftransform.matrix_of_op + (Pdftransform.Rotate (rotation_point, rad_of_deg r)) + in + let transform_op = Pdfops.Op_cm tr in + let page = Cpdfutil.change_pattern_matrices_page pdf tr2 page in + Cpdfutil.transform_annotations pdf tr page.Pdfpage.rest; + (Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr) + +let rotate_contents ?(fast=false) r pdf range = + process_pages (rotate_page_contents ~fast None r pdf) pdf range + +(* \section{Scale page data} *) +let scale_pdf ?(fast=false) sxsylist pdf range = + let scale_page pnum page = + let sx, sy = List.nth sxsylist (pnum - 1) in + let f (xmin, ymin, xmax, ymax) = + xmin *. sx, ymin *. sy, xmax *. sx, ymax *. sy + in + let page = change_boxes f pdf page + and matrix = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), sx, sy)) in + let transform_op = + Pdfops.Op_cm matrix + and page = + Cpdfutil.change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page + in + Cpdfutil.transform_annotations pdf matrix page.Pdfpage.rest; + (Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix) + in + process_pages scale_page pdf range + +(* Scale to fit page of size x * y *) +let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range = + let scale_page_to_fit pnum page = + let x, y = List.nth xylist (pnum - 1) in + let matrix = + let (minx, miny, maxx, maxy) = + (* Use cropbox if available *) + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> r + | None -> page.Pdfpage.mediabox) + in + if maxx <= 0. || maxy <= 0. then failwith "Zero-sized pages are invalid" else + let fx = x /. maxx in let fy = y /. maxy in + let scale = fmin fx fy *. input_scale in + let trans_x = + match position with + Cpdfposition.Left _ -> 0. + | Cpdfposition.Right _ -> (x -. (maxx *. scale)) + | _ -> (x -. (maxx *. scale)) /. 2. + and trans_y = + match position with + | Cpdfposition.Top _ -> (y -. (maxy *. scale)) + | Cpdfposition.Bottom _ -> 0. + | _ -> (y -. (maxy *. scale)) /. 2. + in + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (trans_x, trans_y); + Pdftransform.Scale ((0., 0.), scale, scale)]) + in + let page = + change_boxes + (function (minx, miny, maxx, maxy) -> 0., 0., x, y) + pdf page + in + Cpdfutil.transform_annotations pdf matrix page.Pdfpage.rest; + (Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast + (Cpdfutil.change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page), pnum, matrix) + in + process_pages scale_page_to_fit pdf range + diff --git a/cpdfpage.mli b/cpdfpage.mli index 63971de..ba0512d 100644 --- a/cpdfpage.mli +++ b/cpdfpage.mli @@ -1,3 +1,5 @@ +(** {2 Working with pages} *) + (** Print page info (Mediabox etc) to standard output. *) val output_page_info : Pdf.t -> int list -> unit @@ -12,3 +14,55 @@ val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit (** Same as [process_pages] but return the list of outputs of the map function. *) val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list +val hard_box : Pdf.t -> int list -> string -> bool -> bool -> Pdf.t + +(** Shift a PDF in x and y (in pts) in the given pages. List of (x, y) pairs is +for all pages in pdf. *) +val shift_pdf : ?fast:bool -> (float * float) list -> Pdf.t -> int list -> Pdf.t + +val rectify_boxes : ?fast:bool -> Pdf.t -> Pdfpage.t -> Pdfpage.t + +val change_boxes : (float * float * float * float -> float * float * float * float) -> + Pdf.t -> Pdfpage.t -> Pdfpage.t + +(** Scale the contents of a page by a given factor centred around a given point in a given range. *) +val scale_contents : ?fast:bool -> Cpdfposition.position -> float -> Pdf.t -> int list -> Pdf.t + +(** [set_mediabox xywhlist pdf range] sets the media box on the given pages. *) +val set_mediabox : (float * float * float * float) list -> Pdf.t -> int list -> Pdf.t + +val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t + +(** Remove any cropping from the given pages. *) +val remove_cropping_pdf : Pdf.t -> int list -> Pdf.t + +(** Remove any trim box from the given pages. *) +val remove_trim_pdf : Pdf.t -> int list -> Pdf.t + +(** Remove any bleed box from the given pages. *) +val remove_bleed_pdf : Pdf.t -> int list -> Pdf.t + +(** Remove any art box from the given pages. *) +val remove_art_pdf : Pdf.t -> int list -> Pdf.t + +(** Modify the rotation of the page and its contents to leave the rotation at 0 with the page effectively unaltered. *) +val upright : ?fast:bool -> int list -> Pdf.t -> Pdf.t + +(** Change rotation to a given value 0, 90, 180, 270 on given pages. *) +val rotate_pdf : int -> Pdf.t -> int list -> Pdf.t + +(** Rotate clockwise by 0, 90, 180, 270 on given pages. *) +val rotate_pdf_by : int -> Pdf.t -> int list -> Pdf.t + +(** Rotate the contents by the given angle on the given pages. If [fast] is true, assume PDF is well-formed. *) +val rotate_contents : ?fast:bool -> float -> Pdf.t -> int list -> Pdf.t + +(** Scale a PDF in sx, sy in the given pages. List of (sx, sy) pairs is +for all pages in pdf. *) +val scale_pdf : ?fast:bool -> (float * float) list -> Pdf.t -> int list -> Pdf.t + +(** [scale_to_fit_pdf fast position input_scale x y op pdf range] scales a page to fit the +page size given by (x, y) and by the [input_scale] (e.g 1.0 = scale to fit, 0.9 += scale to fit leaving a border etc.). [op] is unused. *) +val scale_to_fit_pdf : ?fast:bool -> Cpdfposition.position -> float -> (float * float) list -> 'a -> Pdf.t -> int list -> Pdf.t + diff --git a/cpdfutil.ml b/cpdfutil.ml index ad77c1f..f6ac34c 100644 --- a/cpdfutil.ml +++ b/cpdfutil.ml @@ -1,5 +1,9 @@ open Pdfutil +(* For uses of process_pages which don't need to deal with matrices, this + function transforms into one which returns the identity matrix *) +let ppstub f n p = (f n p, n, Pdftransform.i_matrix) + (* These may move into CamlPDF at some point *) let process_xobject f pdf resources i = let xobj = Pdf.lookup_obj pdf i in @@ -34,3 +38,164 @@ let process_xobjects pdf page f = elts | _ -> () +(* The content transformed by altering any use of [Op_cm]. But we must also +alter any /Matrix entries in pattern dictionaries *) +let change_pattern_matrices_resources pdf tr resources = + try + begin match Pdf.lookup_direct pdf "/Pattern" resources with + | Some (Pdf.Dictionary patterns) -> + let entries = + map + (fun (name, p) -> + (*Printf.printf "Changing matrices of pattern %s\n" name;*) + let old_pattern = Pdf.direct pdf p in + let new_pattern = + let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in + let new_tr = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) existing_tr in + Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr) + in + name, Pdf.Indirect (Pdf.addobj pdf new_pattern)) + patterns + in + Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries) + | _ -> resources + end + with + Pdftransform.NonInvertable -> + Printf.eprintf "Warning: noninvertible matrix\n%!"; + resources + +let change_pattern_matrices_page pdf tr page = + let page = + {page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources} + in + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary elts) -> + iter + (fun (k, v) -> + match v with + | Pdf.Indirect i -> + (* Check if it's a form XObject. If so, rewrite its resources and add back as same number. *) + begin match Pdf.lookup_direct pdf "/Subtype" v with + | Some (Pdf.Name "/Form") -> + (*Printf.printf "Processing form xobject %s for patterns\n" k; *) + let form_xobject = Pdf.lookup_obj pdf i in + begin match Pdf.lookup_direct pdf "/Resources" form_xobject with + | Some resources -> + let form_xobject' = + Pdf.add_dict_entry form_xobject "/Resources" (change_pattern_matrices_resources pdf tr resources) + in + Pdf.addobj_given_num pdf (i, form_xobject') + | _ -> () + end + | _ -> () + end; + | _ -> raise (Pdf.PDFError "change_pattern_matrices_page")) + elts; + page + | _ -> page + +(* Union two resource dictionaries from the same PDF. *) +let combine_pdf_resources pdf a b = + let a_entries = + match a with + | Pdf.Dictionary entries -> entries + | _ -> [] + in let b_entries = + match b with + | Pdf.Dictionary entries -> entries + | _ -> [] + in + let resource_keys = + ["/Font"; "/ExtGState"; "/ColorSpace"; "/Pattern"; + "/Shading"; "/XObject"; "/Properties"] + in + let combine_entries key = + let a_entries = + match Pdf.lookup_direct pdf key a with + | Some (Pdf.Dictionary d) -> d + | _ -> [] + in let b_entries = + match Pdf.lookup_direct pdf key b with + | Some (Pdf.Dictionary d) -> d + | _ -> [] + in + if a_entries = [] && b_entries = [] then + None + else + Some (key, Pdf.Dictionary (a_entries @ b_entries)) + in + let unknown_keys_a = lose (fun (k, _) -> mem k resource_keys) a_entries in + let unknown_keys_b = lose (fun (k, _) -> mem k resource_keys) b_entries in + let combined_known_entries = option_map combine_entries resource_keys in + fold_left + (fun dict (k, v) -> Pdf.add_dict_entry dict k v) + (Pdf.Dictionary []) + (unknown_keys_a @ unknown_keys_b @ combined_known_entries) + +let transform_rect transform rect = + let minx, miny, maxx, maxy = Pdf.parse_rectangle rect in + let (x0, y0) = Pdftransform.transform_matrix transform (minx, miny) in + let (x1, y1) = Pdftransform.transform_matrix transform (maxx, maxy) in + let (x2, y2) = Pdftransform.transform_matrix transform (minx, maxy) in + let (x3, y3) = Pdftransform.transform_matrix transform (maxx, miny) in + let minx = fmin (fmin x0 x1) (fmin x2 x3) in + let miny = fmin (fmin y0 y1) (fmin y2 y3) in + let maxx = fmax (fmax x0 x1) (fmax x2 x3) in + let maxy = fmax (fmax y0 y1) (fmax y2 y3) in + Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy] + +let transform_quadpoint_single transform = function + | [x1; y1; x2; y2; x3; y3; x4; y4] -> + let x1, y1, x2, y2, x3, y3, x4, y4 = + Pdf.getnum x1, Pdf.getnum y1, + Pdf.getnum x2, Pdf.getnum y2, + Pdf.getnum x3, Pdf.getnum y3, + Pdf.getnum x4, Pdf.getnum y4 + in + let (x1, y1) = Pdftransform.transform_matrix transform (x1, y1) in + let (x2, y2) = Pdftransform.transform_matrix transform (x2, y2) in + let (x3, y3) = Pdftransform.transform_matrix transform (x3, y3) in + let (x4, y4) = Pdftransform.transform_matrix transform (x4, y4) in + map (fun x -> Pdf.Real x) [x1; y1; x2; y2; x3; y3; x4; y4] + | qp -> + Printf.eprintf "Malformed /QuadPoints format: must be a multiple of 8 entries\n"; + qp + +let transform_quadpoints transform = function +| Pdf.Array qps -> + Pdf.Array (flatten (map (transform_quadpoint_single transform) (splitinto 8 qps))) +| qp -> + Printf.eprintf "Unknown or malformed /QuadPoints format %s\n" (Pdfwrite.string_of_pdf qp); + qp + +(* Apply transformations to any annotations in /Annots (i.e their /Rect and /QuadPoints entries) *) +let transform_annotations pdf transform rest = + match Pdf.lookup_direct pdf "/Annots" rest with + | Some (Pdf.Array annots) -> + (* Always indirect references, so alter in place *) + iter + (function + | Pdf.Indirect i -> + let annot = Pdf.lookup_obj pdf i in + let rect' = + match Pdf.lookup_direct pdf "/Rect" annot with + | Some rect -> transform_rect transform rect + | None -> raise (Pdf.PDFError "transform_annotations: no rect") + in + let quadpoints' = + match Pdf.lookup_direct pdf "/QuadPoints" annot with + | Some qp -> Some (transform_quadpoints transform qp) + | None -> None + in + let annot = Pdf.add_dict_entry annot "/Rect" rect' in + let annot = + match quadpoints' with + | Some qp -> Pdf.add_dict_entry annot "/QuadPoints" qp + | None -> annot + in + Pdf.addobj_given_num pdf (i, annot) + | _ -> Printf.eprintf "transform_annotations: not indirect\n%!") + annots + | _ -> () + diff --git a/cpdfutil.mli b/cpdfutil.mli index dd12e0d..3e9980b 100644 --- a/cpdfutil.mli +++ b/cpdfutil.mli @@ -3,3 +3,13 @@ val process_xobjects : Pdf.t -> (Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) -> unit + +val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t + +val combine_pdf_resources : Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject -> Pdf.pdfobject + +val transform_annotations : Pdf.t -> Pdftransform.transform_matrix -> Pdf.pdfobject -> unit + +val ppstub : + ('a -> 'b -> 'c) -> + 'a -> 'b -> 'c * 'a * Pdftransform.transform_matrix