2021-12-21 14:44:46 +01:00
|
|
|
open Pdfutil
|
2021-12-22 09:58:56 +01:00
|
|
|
open Cpdferror
|
2021-12-21 14:44:46 +01:00
|
|
|
|
|
|
|
(* Output information for each page *)
|
|
|
|
let output_page_info pdf range =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf
|
|
|
|
and labels = Pdfpagelabels.read pdf in
|
|
|
|
let getbox page box =
|
|
|
|
if box = "/MediaBox" then
|
|
|
|
match page.Pdfpage.mediabox with
|
|
|
|
| Pdf.Array [a; b; c; d] ->
|
|
|
|
Printf.sprintf "%f %f %f %f"
|
|
|
|
(Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d)
|
|
|
|
| _ -> ""
|
|
|
|
else
|
|
|
|
match Pdf.lookup_direct pdf box page.Pdfpage.rest with
|
|
|
|
| Some (Pdf.Array [a; b; c; d]) ->
|
|
|
|
Printf.sprintf "%f %f %f %f"
|
|
|
|
(Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d)
|
|
|
|
| _ -> ""
|
|
|
|
and rotation page =
|
|
|
|
Pdfpage.int_of_rotation page.Pdfpage.rotate
|
|
|
|
in
|
|
|
|
iter
|
|
|
|
(fun pnum ->
|
|
|
|
let page = select pnum pages in
|
|
|
|
Printf.printf "Page %i:\n" pnum;
|
|
|
|
Printf.printf "Label: %s\n"
|
|
|
|
(try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> "");
|
|
|
|
Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox");
|
|
|
|
Printf.printf "CropBox: %s\n" (getbox page "/CropBox");
|
|
|
|
Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox");
|
|
|
|
Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox");
|
|
|
|
Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox");
|
|
|
|
Printf.printf "Rotation: %i\n" (rotation page))
|
|
|
|
range
|
|
|
|
|
|
|
|
let process_pages f pdf range =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
let pages', pagenumbers, matrices = (* new page objects, page number, matrix *)
|
|
|
|
split3
|
|
|
|
(map2
|
|
|
|
(fun n p -> if mem n range then f n p else (p, n, Pdftransform.i_matrix))
|
|
|
|
(ilist 1 (length pages))
|
|
|
|
pages)
|
|
|
|
in
|
|
|
|
Pdfpage.change_pages ~matrices:(combine pagenumbers matrices) true pdf pages'
|
|
|
|
|
|
|
|
let iter_pages f pdf range =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
iter2
|
|
|
|
(fun n p -> if mem n range then f n p)
|
|
|
|
(ilist 1 (length pages))
|
|
|
|
pages
|
|
|
|
|
|
|
|
let map_pages f pdf range =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
option_map2
|
|
|
|
(fun n p -> if mem n range then Some (f n p) else None)
|
|
|
|
(ilist 1 (length pages))
|
|
|
|
pages
|
|
|
|
|
2021-12-22 09:58:56 +01:00
|
|
|
(* Clip a page to one of its boxes, or the media box if that box is not
|
|
|
|
* present. This is a hard clip, done by using a clipping rectangle, so that
|
|
|
|
* the page may then be used as a stamp without extraneous material reapearing.
|
|
|
|
* *)
|
|
|
|
let hard_box pdf range boxname mediabox_if_missing fast =
|
|
|
|
process_pages
|
|
|
|
(Cpdfutil.ppstub (fun pagenum page ->
|
|
|
|
let minx, miny, maxx, maxy =
|
|
|
|
if boxname = "/MediaBox" then
|
|
|
|
Pdf.parse_rectangle page.Pdfpage.mediabox
|
|
|
|
else
|
|
|
|
match Pdf.lookup_direct pdf boxname page.Pdfpage.rest with
|
|
|
|
| Some a -> Pdf.parse_rectangle a
|
|
|
|
| _ ->
|
|
|
|
if mediabox_if_missing
|
|
|
|
then Pdf.parse_rectangle page.Pdfpage.mediabox
|
|
|
|
else error (Printf.sprintf "hard_box: box %s not found" boxname)
|
|
|
|
in
|
|
|
|
let ops = [Pdfops.Op_re (minx, miny, maxx -. minx, maxy -. miny); Pdfops.Op_W; Pdfops.Op_n] in
|
|
|
|
Pdfpage.prepend_operators pdf ops ~fast page))
|
|
|
|
pdf
|
|
|
|
range
|
|
|
|
|
|
|
|
let shift_page ?(fast=false) dxdylist pdf pnum page =
|
|
|
|
let dx, dy = List.nth dxdylist (pnum - 1) in
|
|
|
|
let transform_op =
|
|
|
|
Pdfops.Op_cm (Pdftransform.matrix_of_op (Pdftransform.Translate (dx, dy)))
|
|
|
|
in
|
|
|
|
let page =
|
|
|
|
Cpdfutil.change_pattern_matrices_page pdf (Pdftransform.mktranslate ~-.dx ~-.dy) page
|
|
|
|
in
|
|
|
|
Cpdfutil.transform_annotations pdf (Pdftransform.mktranslate dx dy) page.Pdfpage.rest;
|
|
|
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, Pdftransform.mktranslate dx dy)
|
|
|
|
|
|
|
|
let shift_pdf ?(fast=false) dxdylist pdf range =
|
|
|
|
process_pages (shift_page ~fast dxdylist pdf) pdf range
|
|
|
|
|
|
|
|
(* \section{Shift page data} *)
|
|
|
|
let make_mediabox (xmin, ymin, xmax, ymax) =
|
|
|
|
Pdf.Array
|
|
|
|
[Pdf.Real xmin; Pdf.Real ymin; Pdf.Real xmax; Pdf.Real ymax]
|
|
|
|
|
|
|
|
(* Change the media box and other known boxes by the function [f] which takes
|
|
|
|
xmin, xmax, ymin, ymax as input. *)
|
|
|
|
let change_boxes f pdf page =
|
|
|
|
let names = ["/TrimBox"; "/ArtBox"; "/CropBox"; "/BleedBox"]
|
|
|
|
in let getbox n =
|
|
|
|
Pdf.lookup_direct pdf n page.Pdfpage.rest
|
|
|
|
in
|
|
|
|
let boxes = combine names (map getbox names) in
|
|
|
|
let toreplace = lose (function (_, None) -> true | _ -> false) boxes in
|
|
|
|
let toreplace =
|
|
|
|
map
|
|
|
|
(function (name, Some value) -> (name, value) | _ -> assert false)
|
|
|
|
toreplace
|
|
|
|
in
|
|
|
|
let rest' =
|
|
|
|
fold_left
|
|
|
|
(fun e (k, v) ->
|
|
|
|
let v =
|
|
|
|
make_mediabox (f (Pdf.parse_rectangle v))
|
|
|
|
in
|
|
|
|
Pdf.replace_dict_entry e k v)
|
|
|
|
page.Pdfpage.rest
|
|
|
|
toreplace
|
|
|
|
in
|
|
|
|
{page with
|
|
|
|
Pdfpage.mediabox =
|
|
|
|
make_mediabox (f (Pdf.parse_rectangle page.Pdfpage.mediabox));
|
|
|
|
Pdfpage.rest = rest'}
|
|
|
|
|
|
|
|
(* Change a page's media box so its minimum x and y are 0, making other
|
|
|
|
operations simpler to think about. Any shift that is done is reflected in
|
|
|
|
other boxes (clip etc.) *)
|
|
|
|
let rectify_boxes ?(fast=false) pdf page =
|
|
|
|
let minx, miny, _, _ =
|
|
|
|
Pdf.parse_rectangle page.Pdfpage.mediabox
|
|
|
|
in
|
|
|
|
let f (iminx, iminy, imaxx, imaxy) =
|
|
|
|
iminx -. minx, iminy -. miny, imaxx -. minx, imaxy -. miny
|
|
|
|
in
|
|
|
|
let page = change_boxes f pdf page in
|
|
|
|
if minx <> 0. || miny <> 0.
|
|
|
|
then
|
|
|
|
begin let p, _, _ = shift_page ~fast [(-.minx),(-.miny)] pdf 1 page in p end
|
|
|
|
else page
|
|
|
|
|
|
|
|
(* Scale contents *)
|
|
|
|
let scale_page_contents ?(fast=false) scale position pdf pnum page =
|
|
|
|
let (minx, miny, maxx, maxy) as box =
|
|
|
|
(* Use cropbox if available *)
|
|
|
|
Pdf.parse_rectangle
|
|
|
|
(match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
|
|
|
|
| Some r -> r
|
|
|
|
| None -> page.Pdfpage.mediabox)
|
|
|
|
in
|
|
|
|
let sx, sy, _ = Cpdfposition.calculate_position true 0. box Horizontal position in
|
|
|
|
let tx, ty =
|
|
|
|
let open Cpdfposition in
|
|
|
|
match position with
|
|
|
|
| Top t -> 0., -.t
|
|
|
|
| TopLeft t -> t, -.t
|
|
|
|
| TopRight t -> -.t, -.t
|
|
|
|
| Left t -> t, 0.
|
|
|
|
| BottomLeft t -> t, t
|
|
|
|
| Bottom t -> 0., t
|
|
|
|
| BottomRight t -> -.t, t
|
|
|
|
| Right t -> -.t, 0.
|
|
|
|
| _ -> 0., 0. (* centre it... FIXME: We will add a center position, eventually, for text and this... *)
|
|
|
|
in
|
|
|
|
let transform =
|
|
|
|
Pdftransform.matrix_of_transform
|
|
|
|
[Pdftransform.Translate (tx, ty);
|
|
|
|
Pdftransform.Scale ((sx, sy), scale, scale)]
|
|
|
|
in
|
|
|
|
let transform_op = Pdfops.Op_cm transform in
|
|
|
|
let page = Cpdfutil.change_pattern_matrices_page pdf transform page in
|
|
|
|
Cpdfutil.transform_annotations pdf transform page.Pdfpage.rest;
|
|
|
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform)
|
|
|
|
|
|
|
|
let scale_contents ?(fast=false) position scale pdf range =
|
|
|
|
process_pages (scale_page_contents ~fast scale position pdf) pdf range
|
|
|
|
|
|
|
|
(* \section{Set media box} *)
|
|
|
|
let set_mediabox xywhlist pdf range =
|
|
|
|
let crop_page pnum page =
|
|
|
|
let x, y, w, h = List.nth xywhlist (pnum - 1) in
|
|
|
|
{page with
|
|
|
|
Pdfpage.mediabox =
|
|
|
|
(Pdf.Array
|
|
|
|
[Pdf.Real x; Pdf.Real y;
|
|
|
|
Pdf.Real (x +. w); Pdf.Real (y +. h)])}
|
|
|
|
in
|
|
|
|
process_pages (Cpdfutil.ppstub crop_page) pdf range
|
|
|
|
|
|
|
|
(* If a cropbox exists, make it the mediabox. If not, change nothing. *)
|
|
|
|
let copy_cropbox_to_mediabox pdf range =
|
|
|
|
process_pages
|
|
|
|
(Cpdfutil.ppstub (fun _ page ->
|
|
|
|
match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
|
|
|
|
| Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject}
|
|
|
|
| None -> page))
|
|
|
|
pdf
|
|
|
|
range
|
|
|
|
|
|
|
|
let remove_cropping_pdf pdf range =
|
|
|
|
let remove_cropping_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")}
|
|
|
|
in
|
|
|
|
process_pages (Cpdfutil.ppstub remove_cropping_page) pdf range
|
|
|
|
|
|
|
|
let remove_trim_pdf pdf range =
|
|
|
|
let remove_trim_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")}
|
|
|
|
in
|
|
|
|
process_pages (Cpdfutil.ppstub remove_trim_page) pdf range
|
|
|
|
|
|
|
|
let remove_art_pdf pdf range =
|
|
|
|
let remove_art_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")}
|
|
|
|
in
|
|
|
|
process_pages (Cpdfutil.ppstub remove_art_page) pdf range
|
|
|
|
|
|
|
|
let remove_bleed_pdf pdf range =
|
|
|
|
let remove_bleed_page _ page =
|
|
|
|
{page with
|
|
|
|
Pdfpage.rest =
|
|
|
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")}
|
|
|
|
in
|
|
|
|
process_pages (Cpdfutil.ppstub remove_bleed_page) pdf range
|
|
|
|
|
|
|
|
(* Upright functionality *)
|
|
|
|
|
|
|
|
(* Return the pages from the pdf in the range, unordered. *)
|
|
|
|
let select_pages range pdf =
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
option_map (function n -> try Some (select n pages) with _ -> None) range
|
|
|
|
|
|
|
|
(* If all pages are already upright, and the mediabox is (0,0)-based, do nothing
|
|
|
|
to save time. *)
|
|
|
|
let allupright range pdf =
|
|
|
|
let page_is_upright page =
|
|
|
|
page.Pdfpage.rotate = Pdfpage.Rotate0 &&
|
|
|
|
(let (minx, miny, _, _) = Pdf.parse_rectangle page.Pdfpage.mediabox in
|
|
|
|
minx < 0.001 && miny < 0.001 && minx > ~-.0.001 && miny > ~-.0.001)
|
|
|
|
in
|
|
|
|
not (mem false (map page_is_upright (select_pages range pdf)))
|
|
|
|
|
|
|
|
let upright_transform page =
|
|
|
|
let rotate =
|
|
|
|
Pdfpage.int_of_rotation page.Pdfpage.rotate
|
|
|
|
and cx, cy =
|
|
|
|
let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in
|
|
|
|
(minx +. maxx) /. 2., (miny +. maxy) /. 2.
|
|
|
|
in
|
|
|
|
Pdftransform.mkrotate (cx, cy) (rad_of_deg (~-.(float rotate)))
|
|
|
|
|
|
|
|
let transform_boxes tr pdf page =
|
|
|
|
let f (minx, miny, maxx, maxy) =
|
|
|
|
let minx, miny = Pdftransform.transform_matrix tr (minx, miny)
|
|
|
|
and maxx, maxy = Pdftransform.transform_matrix tr (maxx, maxy) in
|
|
|
|
(minx, miny, maxx, maxy)
|
|
|
|
in
|
|
|
|
change_boxes f pdf page
|
|
|
|
|
|
|
|
let transform_contents ?(fast=false) tr pdf page =
|
|
|
|
let transform_op = Pdfops.Op_cm tr in
|
|
|
|
let page = Cpdfutil.change_pattern_matrices_page pdf (Pdftransform.matrix_invert tr) page in
|
|
|
|
Cpdfutil.transform_annotations pdf tr page.Pdfpage.rest;
|
|
|
|
Pdfpage.prepend_operators pdf [transform_op] ~fast page
|
|
|
|
|
|
|
|
let upright ?(fast=false) range pdf =
|
|
|
|
if allupright range pdf then pdf else
|
|
|
|
let upright_page _ pnum page =
|
|
|
|
let tr = upright_transform page in
|
|
|
|
let page = transform_boxes tr pdf page in
|
|
|
|
let page = transform_contents ~fast tr pdf page in
|
|
|
|
(rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr)
|
|
|
|
in
|
|
|
|
process_pages (upright_page pdf) pdf range
|
|
|
|
|
|
|
|
(* \section{Rotating pages} *)
|
|
|
|
let rotate_pdf r pdf range =
|
|
|
|
let rotate_page _ page =
|
|
|
|
{page with Pdfpage.rotate =
|
|
|
|
Pdfpage.rotation_of_int r}
|
|
|
|
in
|
|
|
|
process_pages (Cpdfutil.ppstub rotate_page) pdf range
|
|
|
|
|
|
|
|
let rotate_pdf_by r pdf range =
|
|
|
|
let rotate_page_by _ page =
|
|
|
|
{page with Pdfpage.rotate =
|
|
|
|
Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)}
|
|
|
|
in
|
|
|
|
process_pages (Cpdfutil.ppstub rotate_page_by) pdf range
|
|
|
|
|
|
|
|
let rotate_page_contents ~fast rotpoint r pdf pnum page =
|
|
|
|
let rotation_point =
|
|
|
|
match rotpoint with
|
|
|
|
| None ->
|
|
|
|
let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in
|
|
|
|
(minx +. maxx) /. 2., (miny +. maxy) /. 2.
|
|
|
|
| Some point -> point
|
|
|
|
in
|
|
|
|
let tr =
|
|
|
|
Pdftransform.matrix_of_op
|
|
|
|
(Pdftransform.Rotate (rotation_point, -.(rad_of_deg r)))
|
|
|
|
in let tr2 =
|
|
|
|
Pdftransform.matrix_of_op
|
|
|
|
(Pdftransform.Rotate (rotation_point, rad_of_deg r))
|
|
|
|
in
|
|
|
|
let transform_op = Pdfops.Op_cm tr in
|
|
|
|
let page = Cpdfutil.change_pattern_matrices_page pdf tr2 page in
|
|
|
|
Cpdfutil.transform_annotations pdf tr page.Pdfpage.rest;
|
|
|
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr)
|
|
|
|
|
|
|
|
let rotate_contents ?(fast=false) r pdf range =
|
|
|
|
process_pages (rotate_page_contents ~fast None r pdf) pdf range
|
|
|
|
|
|
|
|
(* \section{Scale page data} *)
|
|
|
|
let scale_pdf ?(fast=false) sxsylist pdf range =
|
|
|
|
let scale_page pnum page =
|
|
|
|
let sx, sy = List.nth sxsylist (pnum - 1) in
|
|
|
|
let f (xmin, ymin, xmax, ymax) =
|
|
|
|
xmin *. sx, ymin *. sy, xmax *. sx, ymax *. sy
|
|
|
|
in
|
|
|
|
let page = change_boxes f pdf page
|
|
|
|
and matrix = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), sx, sy)) in
|
|
|
|
let transform_op =
|
|
|
|
Pdfops.Op_cm matrix
|
|
|
|
and page =
|
|
|
|
Cpdfutil.change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page
|
|
|
|
in
|
|
|
|
Cpdfutil.transform_annotations pdf matrix page.Pdfpage.rest;
|
|
|
|
(Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix)
|
|
|
|
in
|
|
|
|
process_pages scale_page pdf range
|
|
|
|
|
|
|
|
(* Scale to fit page of size x * y *)
|
|
|
|
let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range =
|
|
|
|
let scale_page_to_fit pnum page =
|
|
|
|
let x, y = List.nth xylist (pnum - 1) in
|
|
|
|
let matrix =
|
|
|
|
let (minx, miny, maxx, maxy) =
|
|
|
|
(* Use cropbox if available *)
|
|
|
|
Pdf.parse_rectangle
|
|
|
|
(match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
|
|
|
|
| Some r -> r
|
|
|
|
| None -> page.Pdfpage.mediabox)
|
|
|
|
in
|
|
|
|
if maxx <= 0. || maxy <= 0. then failwith "Zero-sized pages are invalid" else
|
|
|
|
let fx = x /. maxx in let fy = y /. maxy in
|
|
|
|
let scale = fmin fx fy *. input_scale in
|
|
|
|
let trans_x =
|
|
|
|
match position with
|
|
|
|
Cpdfposition.Left _ -> 0.
|
|
|
|
| Cpdfposition.Right _ -> (x -. (maxx *. scale))
|
|
|
|
| _ -> (x -. (maxx *. scale)) /. 2.
|
|
|
|
and trans_y =
|
|
|
|
match position with
|
|
|
|
| Cpdfposition.Top _ -> (y -. (maxy *. scale))
|
|
|
|
| Cpdfposition.Bottom _ -> 0.
|
|
|
|
| _ -> (y -. (maxy *. scale)) /. 2.
|
|
|
|
in
|
|
|
|
(Pdftransform.matrix_of_transform
|
|
|
|
[Pdftransform.Translate (trans_x, trans_y);
|
|
|
|
Pdftransform.Scale ((0., 0.), scale, scale)])
|
|
|
|
in
|
|
|
|
let page =
|
|
|
|
change_boxes
|
|
|
|
(function (minx, miny, maxx, maxy) -> 0., 0., x, y)
|
|
|
|
pdf page
|
|
|
|
in
|
|
|
|
Cpdfutil.transform_annotations pdf matrix page.Pdfpage.rest;
|
|
|
|
(Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast
|
|
|
|
(Cpdfutil.change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page), pnum, matrix)
|
|
|
|
in
|
|
|
|
process_pages scale_page_to_fit pdf range
|
|
|
|
|