This commit is contained in:
John Whitington 2021-10-26 16:18:09 +01:00
parent 312e07eaf1
commit 609943e78c
3 changed files with 40 additions and 40 deletions

67
cpdf.ml
View File

@ -120,6 +120,7 @@ let decompress_pdf pdf =
(Pdf.iter_stream (Pdfcodec.decode_pdfstream_until_unknown pdf) pdf); (Pdf.iter_stream (Pdfcodec.decode_pdfstream_until_unknown pdf) pdf);
pdf pdf
(* Equality on PDF objects *) (* Equality on PDF objects *)
let pdfobjeq pdf x y = let pdfobjeq pdf x y =
let x = Pdf.lookup_obj pdf x let x = Pdf.lookup_obj pdf x
@ -435,6 +436,16 @@ let protect fast pdf resources content =
let qs = addstream (many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) in let qs = addstream (many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) in
[Pdf.Indirect q] @ content @ [Pdf.Indirect qs] [Pdf.Indirect q] @ content @ [Pdf.Indirect qs]
(* If a cropbox exists, make it the mediabox. If not, change nothing. *)
let copy_cropbox_to_mediabox pdf range =
process_pages
(ppstub (fun _ page ->
match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
| Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject}
| None -> page))
pdf
range
(* Union two resource dictionaries from the same PDF. *) (* Union two resource dictionaries from the same PDF. *)
let combine_pdf_resources pdf a b = let combine_pdf_resources pdf a b =
let a_entries = let a_entries =
@ -2304,7 +2315,7 @@ let hard_box pdf range boxname mediabox_if_missing fast =
else error (Printf.sprintf "hard_box: box %s not found" boxname) else error (Printf.sprintf "hard_box: box %s not found" boxname)
in in
let ops = [Pdfops.Op_re (minx, miny, maxx -. minx, maxy -. miny); Pdfops.Op_W; Pdfops.Op_n] in let ops = [Pdfops.Op_re (minx, miny, maxx -. minx, maxy -. miny); Pdfops.Op_W; Pdfops.Op_n] in
Pdfpage.prepend_operators pdf ops ~fast:fast page)) Pdfpage.prepend_operators pdf ops ~fast page))
pdf pdf
range range
@ -2932,31 +2943,23 @@ let impose_pages fit x y columns rtl btt center margin output_mediabox fast fit_
let resources' = pair_reduce (combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in let resources' = pair_reduce (combine_pdf_resources pdf) (map (fun p -> p.Pdfpage.resources) pages) in
let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in let rest' = pair_reduce (combine_pdf_rests pdf) (map (fun p -> p.Pdfpage.rest) pages) in
let content' = let content' =
let transform_stream clipbox contents transform = let transform_stream transform contents =
let clipops = (* If fast, no mismatched q/Q protection and no parsing of operators. *)
let minx, miny, maxx, maxy = Pdf.parse_rectangle clipbox in if fast then
[Pdfops.Op_re (minx, miny, maxx -. minx, maxy -. miny); Pdfops.Op_W; Pdfops.Op_n] let before = Pdfops.stream_of_ops [Pdfops.Op_q; Pdfops.Op_cm transform] in
in let after = Pdfops.stream_of_ops [Pdfops.Op_Q] in
(* If fast, no mismatched q/Q protection and no parsing of operators. *) [before] @ contents @ [after]
if fast then else
let before = Pdfops.stream_of_ops (Pdfops.Op_q::Pdfops.Op_cm transform::clipops) in (* If slow, use protect from Pdfpage. *)
let after = Pdfops.stream_of_ops [Pdfops.Op_Q] in let ops = Pdfpage.protect pdf resources' contents @ Pdfops.parse_operators pdf resources' contents in
[before] @ contents @ [after] [Pdfops.stream_of_ops
else ([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ ops @ [Pdfops.Op_Q])]
(* If slow, use protect from Pdfpage. *)
let ops = Pdfpage.protect pdf resources' contents @ Pdfops.parse_operators pdf resources' contents in
[Pdfops.stream_of_ops
([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ clipops @ ops @ [Pdfops.Op_Q])]
in in
flatten flatten
(map2 (map2
(fun p t -> (fun p t ->
transform_annotations pdf t p.Pdfpage.rest; transform_annotations pdf t p.Pdfpage.rest;
transform_stream transform_stream t p.Pdfpage.content)
(match Pdf.lookup_direct pdf "/CropBox" p.Pdfpage.rest with
None -> p.Pdfpage.mediabox
| Some box -> box)
p.Pdfpage.content t)
pages pages
transforms) transforms)
in in
@ -2967,26 +2970,28 @@ let impose_pages fit x y columns rtl btt center margin output_mediabox fast fit_
Pdfpage.rest = rest'} Pdfpage.rest = rest'}
(* For fit, we scale contents, move to middle and retain page size. For xy, we (* For fit, we scale contents, move to middle and retain page size. For xy, we
expand mediabox and move contents to middle. *) expand mediabox and move contents to middle. This function also does the hard boxing. *)
let make_space fit ~fast spacing pdf = let make_space fit ~fast spacing pdf =
let margin = spacing /. 2. in
let endpage = Pdfpage.endpage pdf in let endpage = Pdfpage.endpage pdf in
let all = ilist 1 endpage in let all = ilist 1 endpage in
let pdf = hard_box pdf all "/MediaBox" false fast in
if spacing = 0. then pdf else
let margin = spacing /. 2. in
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
let width, height = let width, height =
match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with
xmin, ymin, xmax, ymax -> (xmax -. xmin, ymax -. ymin) xmin, ymin, xmax, ymax -> (xmax -. xmin, ymax -. ymin)
in in
if fit then if fit then
shift_pdf (shift_pdf
~fast ~fast
(many (margin, margin) endpage) (many (margin, margin) endpage)
(scale_contents ~fast (Cpdfposition.BottomLeft 0.) ((width -. spacing) /. width) pdf all) (scale_contents ~fast (Cpdfposition.BottomLeft 0.) ((width -. spacing) /. width) pdf all)
all all)
else else
set_mediabox (set_mediabox
(many (0., 0., width +. spacing, height +. spacing) endpage) (many (0., 0., width +. spacing, height +. spacing) endpage)
(shift_pdf ~fast (many (margin, margin) endpage) pdf all) all (shift_pdf ~fast (many (margin, margin) endpage) pdf all) all)
(* We add the border as a thick unfilled rectangle just inside the page edge, (* We add the border as a thick unfilled rectangle just inside the page edge,
only if its linewidth is > 0 since, for us, 0 means none, not single-pixel only if its linewidth is > 0 since, for us, 0 means none, not single-pixel
@ -3000,9 +3005,13 @@ let add_border linewidth ~fast pdf =
false false (ilist 1 (Pdfpage.endpage pdf)) pdf false false (ilist 1 (Pdfpage.endpage pdf)) pdf
let impose ~x ~y ~fit ~columns ~rtl ~btt ~center ~margin ~spacing ~linewidth ~fast pdf = let impose ~x ~y ~fit ~columns ~rtl ~btt ~center ~margin ~spacing ~linewidth ~fast pdf =
let endpage = Pdfpage.endpage pdf in
let pagenums = ilist 1 endpage in
let pdf = copy_cropbox_to_mediabox pdf pagenums in
let pdf = remove_cropping_pdf pdf pagenums in
let pdf = upright pagenums pdf in
let pdf = add_border linewidth ~fast pdf in let pdf = add_border linewidth ~fast pdf in
let pdf = make_space fit ~fast spacing pdf in let pdf = make_space fit ~fast spacing pdf in
let endpage = Pdfpage.endpage pdf in
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
let _, _, w, h = Pdf.parse_rectangle firstpage.Pdfpage.mediabox in let _, _, w, h = Pdf.parse_rectangle firstpage.Pdfpage.mediabox in
let ix = int_of_float x in let ix = int_of_float x in
@ -3034,8 +3043,6 @@ let impose ~x ~y ~fit ~columns ~rtl ~btt ~center ~margin ~spacing ~linewidth ~fa
else if y = 0.0 then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w +. m2); Pdf.Real (h *. float_of_int endpage +. m2)] else if y = 0.0 then Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w +. m2); Pdf.Real (h *. float_of_int endpage +. m2)]
else Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w *. x +. m2); Pdf.Real (h *. y +. m2)] else Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real (w *. x +. m2); Pdf.Real (h *. y +. m2)]
in in
let pagenums = ilist 1 endpage in
let pdf = upright pagenums pdf in
let pages = Pdfpage.pages_of_pagetree pdf in let pages = Pdfpage.pages_of_pagetree pdf in
let pagesets = splitinto n pages in let pagesets = splitinto n pages in
let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in

View File

@ -39,6 +39,8 @@ val recompress_pdf : Pdf.t -> Pdf.t
(** Decompresses all streams in a PDF document, assuming it isn't encrypted. *) (** Decompresses all streams in a PDF document, assuming it isn't encrypted. *)
val decompress_pdf : Pdf.t -> Pdf.t val decompress_pdf : Pdf.t -> Pdf.t
val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t
(** {2 Metadata and settings} *) (** {2 Metadata and settings} *)
(** [copy_id keepversion copyfrom copyto] copies the ID, if any, from (** [copy_id keepversion copyfrom copyto] copies the ID, if any, from

View File

@ -2981,15 +2981,6 @@ let getencryption pdf =
| Some (Pdfwrite.AES256bitISO true) -> "256bit AES ISO, Metadata encrypted" | Some (Pdfwrite.AES256bitISO true) -> "256bit AES ISO, Metadata encrypted"
| Some (Pdfwrite.AES256bitISO false) -> "256bit AES ISO, Metadata not encrypted" | Some (Pdfwrite.AES256bitISO false) -> "256bit AES ISO, Metadata not encrypted"
(* If a cropbox exists, make it the mediabox. If not, change nothing. *)
let copy_cropbox_to_mediabox pdf range =
Cpdf.process_pages
(ppstub (fun _ page ->
match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
| Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject}
| None -> page))
pdf
range
(* copy the contents of the box f to the box t. If mediabox_if_missing is set, (* copy the contents of the box f to the box t. If mediabox_if_missing is set,
the contents of the mediabox will be used if the from fox is not available. If the contents of the mediabox will be used if the from fox is not available. If
@ -3572,7 +3563,7 @@ let go () =
| (_, pagespec, _, _, _, _)::_, _ -> | (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some CopyCropBoxToMediaBox) false in let pdf = get_single_pdf (Some CopyCropBoxToMediaBox) false in
let range = parse_pagespec_allow_empty pdf pagespec in let range = parse_pagespec_allow_empty pdf pagespec in
let pdf = copy_cropbox_to_mediabox pdf range in let pdf = Cpdf.copy_cropbox_to_mediabox pdf range in
write_pdf false pdf write_pdf false pdf
| _ -> error "remove-crop: bad command line" | _ -> error "remove-crop: bad command line"
end end