Finished splitting of cpdf.ml
This commit is contained in:
parent
e3acc74ecd
commit
69cce9e6f0
16
Makefile
16
Makefile
|
@ -2,9 +2,10 @@
|
||||||
MODS = cpdfyojson cpdfxmlm cpdfutil \
|
MODS = cpdfyojson cpdfxmlm cpdfutil \
|
||||||
cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \
|
cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \
|
||||||
cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \
|
cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \
|
||||||
cpdfbookmarks cpdfpage cpdfaddtext cpdf cpdfimage cpdffont cpdftype \
|
cpdfbookmarks cpdfpage cpdfaddtext cpdfimage cpdffont cpdftype \
|
||||||
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfdraft cpdfspot \
|
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfdraft cpdfspot \
|
||||||
cpdfpagelabels cpdfcreate cpdfannot cpdfxobject cpdfimpose cpdfcommand
|
cpdfpagelabels cpdfcreate cpdfannot cpdfxobject cpdfimpose cpdftweak \
|
||||||
|
cpdfcommand
|
||||||
|
|
||||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||||
|
|
||||||
|
@ -24,9 +25,14 @@ clean ::
|
||||||
rm -rf doc foo foo2 out.pdf out2.pdf foo.pdf decomp.pdf *.cmt *.cmti \
|
rm -rf doc foo foo2 out.pdf out2.pdf foo.pdf decomp.pdf *.cmt *.cmti \
|
||||||
*.json test/*.pdf debug/*.pdf *.ps
|
*.json test/*.pdf debug/*.pdf *.ps
|
||||||
|
|
||||||
DOC_FILES = cpdferror.mli cpdfjson.mli cpdfstrftime.mli cpdfcoord.mli \
|
DOC_FILES = cpdfunicodedata.mli cpdferror.mli cpdfdebug.mli cpdfjson.mli \
|
||||||
cpdfattach.mli cpdfpagespec.mli cpdfposition.mli cpdf.mli \
|
cpdfstrftime.mli cpdfcoord.mli cpdfattach.mli cpdfpagespec.mli \
|
||||||
cpdfcommand.mli
|
cpdfposition.mli cpdfpresent.mli cpdfmetadata.mli \
|
||||||
|
cpdfbookmarks.mli cpdfpage.mli cpdfaddtext.mli cpdfimage.mli \
|
||||||
|
cpdffont.mli cpdftype.mli cpdftexttopdf.mli cpdftoc.mli \
|
||||||
|
cpdfpad.mli cpdfocg.mli cpdfsqueeze.mli cpdfdraft.mli \
|
||||||
|
cpdfspot.mli cpdfpagelabels.mli cpdfcreate.mli cpdfannot.mli \
|
||||||
|
cpdfxobject.mli cpdfimpose.mli cpdftweak.mli cpdfcommand.mli
|
||||||
|
|
||||||
LIBINSTALL_FILES = cpdf.a cpdf.cma cpdf.cmxa \
|
LIBINSTALL_FILES = cpdf.a cpdf.cma cpdf.cmxa \
|
||||||
$(foreach x,$(MODS),$x.mli) $(foreach x,$(MODS),$x.cmi) \
|
$(foreach x,$(MODS),$x.mli) $(foreach x,$(MODS),$x.cmi) \
|
||||||
|
|
|
@ -3069,7 +3069,7 @@ let go () =
|
||||||
begin match args.inputs, args.out with
|
begin match args.inputs, args.out with
|
||||||
| _::_, _ ->
|
| _::_, _ ->
|
||||||
let pdf = get_single_pdf (Some RemoveUnusedResources) false in
|
let pdf = get_single_pdf (Some RemoveUnusedResources) false in
|
||||||
let outpdf = Cpdf.remove_unused_resources pdf in
|
let outpdf = Cpdftweak.remove_unused_resources pdf in
|
||||||
write_pdf true outpdf
|
write_pdf true outpdf
|
||||||
| _ -> error "RemoveUnusedResources: bad command line"
|
| _ -> error "RemoveUnusedResources: bad command line"
|
||||||
end
|
end
|
||||||
|
@ -3180,7 +3180,7 @@ let go () =
|
||||||
let pdf = get_single_pdf (Some Crop) false in
|
let pdf = get_single_pdf (Some Crop) false in
|
||||||
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
|
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
|
||||||
let range = parse_pagespec_allow_empty pdf pagespec in
|
let range = parse_pagespec_allow_empty pdf pagespec in
|
||||||
let pdf = Cpdf.crop_pdf xywhlist pdf range in
|
let pdf = Cpdfpage.crop_pdf xywhlist pdf range in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| _ -> error "crop: bad command line"
|
| _ -> error "crop: bad command line"
|
||||||
end
|
end
|
||||||
|
@ -3190,7 +3190,7 @@ let go () =
|
||||||
let pdf = get_single_pdf (Some Art) false in
|
let pdf = get_single_pdf (Some Art) false in
|
||||||
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
|
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
|
||||||
let range = parse_pagespec_allow_empty pdf pagespec in
|
let range = parse_pagespec_allow_empty pdf pagespec in
|
||||||
let pdf = Cpdf.crop_pdf ~box:"/ArtBox" xywhlist pdf range in
|
let pdf = Cpdfpage.crop_pdf ~box:"/ArtBox" xywhlist pdf range in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| _ -> error "art: bad command line"
|
| _ -> error "art: bad command line"
|
||||||
end
|
end
|
||||||
|
@ -3200,7 +3200,7 @@ let go () =
|
||||||
let pdf = get_single_pdf (Some Bleed) false in
|
let pdf = get_single_pdf (Some Bleed) false in
|
||||||
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
|
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
|
||||||
let range = parse_pagespec_allow_empty pdf pagespec in
|
let range = parse_pagespec_allow_empty pdf pagespec in
|
||||||
let pdf = Cpdf.crop_pdf ~box:"/BleedBox" xywhlist pdf range in
|
let pdf = Cpdfpage.crop_pdf ~box:"/BleedBox" xywhlist pdf range in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| _ -> error "bleed: bad command line"
|
| _ -> error "bleed: bad command line"
|
||||||
end
|
end
|
||||||
|
@ -3210,7 +3210,7 @@ let go () =
|
||||||
let pdf = get_single_pdf (Some Trim) false in
|
let pdf = get_single_pdf (Some Trim) false in
|
||||||
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
|
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
|
||||||
let range = parse_pagespec_allow_empty pdf pagespec in
|
let range = parse_pagespec_allow_empty pdf pagespec in
|
||||||
let pdf = Cpdf.crop_pdf ~box:"/TrimBox" xywhlist pdf range in
|
let pdf = Cpdfpage.crop_pdf ~box:"/TrimBox" xywhlist pdf range in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| _ -> error "trim: bad command line"
|
| _ -> error "trim: bad command line"
|
||||||
end
|
end
|
||||||
|
@ -3244,7 +3244,7 @@ let go () =
|
||||||
| _ -> error "Copy box: no tobox or no frombox specified"
|
| _ -> error "Copy box: no tobox or no frombox specified"
|
||||||
end
|
end
|
||||||
in
|
in
|
||||||
let pdf = Cpdf.copy_box f t args.mediabox_if_missing pdf range in
|
let pdf = Cpdfpage.copy_box f t args.mediabox_if_missing pdf range in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| _ -> error "Copy Box: bad command line"
|
| _ -> error "Copy Box: bad command line"
|
||||||
end
|
end
|
||||||
|
@ -3346,8 +3346,8 @@ let go () =
|
||||||
let range = parse_pagespec_allow_empty pdf pagespec in
|
let range = parse_pagespec_allow_empty pdf pagespec in
|
||||||
let pdf =
|
let pdf =
|
||||||
if flip = VFlip
|
if flip = VFlip
|
||||||
then Cpdf.vflip_pdf ~fast:args.fast pdf range
|
then Cpdfpage.vflip_pdf ~fast:args.fast pdf range
|
||||||
else Cpdf.hflip_pdf ~fast:args.fast pdf range
|
else Cpdfpage.hflip_pdf ~fast:args.fast pdf range
|
||||||
in
|
in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| _ -> error "flip: bad command line"
|
| _ -> error "flip: bad command line"
|
||||||
|
@ -3524,19 +3524,19 @@ let go () =
|
||||||
| Some (ThinLines w) ->
|
| Some (ThinLines w) ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
write_pdf false (Cpdf.thinlines range w pdf)
|
write_pdf false (Cpdftweak.thinlines range w pdf)
|
||||||
| Some BlackText ->
|
| Some BlackText ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
write_pdf false (Cpdf.blacktext args.color range pdf)
|
write_pdf false (Cpdftweak.blacktext args.color range pdf)
|
||||||
| Some BlackLines ->
|
| Some BlackLines ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
write_pdf false (Cpdf.blacklines args.color range pdf)
|
write_pdf false (Cpdftweak.blacklines args.color range pdf)
|
||||||
| Some BlackFills ->
|
| Some BlackFills ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
write_pdf false (Cpdf.blackfills args.color range pdf)
|
write_pdf false (Cpdftweak.blackfills args.color range pdf)
|
||||||
| Some RemoveAnnotations ->
|
| Some RemoveAnnotations ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
|
@ -3711,7 +3711,7 @@ let go () =
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
let pdf =
|
let pdf =
|
||||||
Cpdf.stamp
|
Cpdfpage.stamp
|
||||||
args.relative_to_cropbox args.position args.topline args.midline args.fast
|
args.relative_to_cropbox args.position args.topline args.midline args.fast
|
||||||
args.scale_stamp_to_fit true range overpdf pdf
|
args.scale_stamp_to_fit true range overpdf pdf
|
||||||
in
|
in
|
||||||
|
@ -3725,14 +3725,14 @@ let go () =
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
let pdf =
|
let pdf =
|
||||||
Cpdf.stamp
|
Cpdfpage.stamp
|
||||||
args.relative_to_cropbox args.position args.topline args.midline args.fast
|
args.relative_to_cropbox args.position args.topline args.midline args.fast
|
||||||
args.scale_stamp_to_fit false range underpdf pdf
|
args.scale_stamp_to_fit false range underpdf pdf
|
||||||
in
|
in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| Some (CombinePages over) ->
|
| Some (CombinePages over) ->
|
||||||
write_pdf false
|
write_pdf false
|
||||||
(Cpdf.combine_pages args.fast (get_single_pdf args.op false) (pdfread_pdf_of_file None None over) false false true)
|
(Cpdfpage.combine_pages args.fast (get_single_pdf args.op false) (pdfread_pdf_of_file None None over) false false true)
|
||||||
| Some Encrypt ->
|
| Some Encrypt ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let pdf = Cpdfsqueeze.recompress_pdf pdf
|
let pdf = Cpdfsqueeze.recompress_pdf pdf
|
||||||
|
@ -3819,22 +3819,22 @@ let go () =
|
||||||
(map Pdfpagelabels.string_of_pagelabel (Pdfpagelabels.read pdf))
|
(map Pdfpagelabels.string_of_pagelabel (Pdfpagelabels.read pdf))
|
||||||
| Some (RemoveDictEntry key) ->
|
| Some (RemoveDictEntry key) ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
Cpdf.remove_dict_entry pdf key args.dict_entry_search;
|
Cpdftweak.remove_dict_entry pdf key args.dict_entry_search;
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| Some (ReplaceDictEntry key) ->
|
| Some (ReplaceDictEntry key) ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
Cpdf.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search;
|
Cpdftweak.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search;
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| Some (PrintDictEntry key) ->
|
| Some (PrintDictEntry key) ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
Cpdf.print_dict_entry pdf key
|
Cpdftweak.print_dict_entry pdf key
|
||||||
| Some ListSpotColours ->
|
| Some ListSpotColours ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
Cpdfspot.list_spot_colours pdf
|
Cpdfspot.list_spot_colours pdf
|
||||||
| Some RemoveClipping ->
|
| Some RemoveClipping ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
write_pdf false (Cpdf.remove_clipping pdf range)
|
write_pdf false (Cpdftweak.remove_clipping pdf range)
|
||||||
| Some CreateMetadata ->
|
| Some CreateMetadata ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
write_pdf false (Cpdfmetadata.create_metadata pdf)
|
write_pdf false (Cpdfmetadata.create_metadata pdf)
|
||||||
|
@ -3863,16 +3863,16 @@ let go () =
|
||||||
| Some ShowBoxes ->
|
| Some ShowBoxes ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
write_pdf false (Cpdf.show_boxes pdf range)
|
write_pdf false (Cpdfpage.show_boxes pdf range)
|
||||||
| Some TrimMarks ->
|
| Some TrimMarks ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
write_pdf false (Cpdf.trim_marks pdf range)
|
write_pdf false (Cpdfpage.trim_marks pdf range)
|
||||||
| Some (Postpend s | Prepend s as x) ->
|
| Some (Postpend s | Prepend s as x) ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
let before = match x with Prepend _ -> true | _ -> false in
|
let before = match x with Prepend _ -> true | _ -> false in
|
||||||
write_pdf false (Cpdf.append_page_content s before args.fast range pdf)
|
write_pdf false (Cpdftweak.append_page_content s before args.fast range pdf)
|
||||||
| Some OutputJSON ->
|
| Some OutputJSON ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
write_json args.out pdf
|
write_json args.out pdf
|
||||||
|
|
390
cpdfpage.ml
390
cpdfpage.ml
|
@ -385,3 +385,393 @@ let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range =
|
||||||
in
|
in
|
||||||
process_pages scale_page_to_fit pdf range
|
process_pages scale_page_to_fit pdf range
|
||||||
|
|
||||||
|
(* Add stack operators to a content stream to ensure it is composeable. On
|
||||||
|
-fast, we don't check for Q deficit, assuming PDF is ISO. *)
|
||||||
|
let protect fast pdf resources content =
|
||||||
|
let deficit =
|
||||||
|
if fast then 0 else
|
||||||
|
let ops = Pdfops.parse_operators pdf resources content in
|
||||||
|
let qs = length (keep (eq Pdfops.Op_q) ops) in
|
||||||
|
let bigqs = length (keep (eq Pdfops.Op_Q) ops) in
|
||||||
|
let deficit = if qs > bigqs then qs - bigqs else 0 in
|
||||||
|
if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n%!" deficit;
|
||||||
|
deficit
|
||||||
|
in
|
||||||
|
let addstream ops = Pdf.addobj pdf (Pdfops.stream_of_ops ops) in
|
||||||
|
let q = addstream [Pdfops.Op_q] in
|
||||||
|
let qs = addstream (many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) in
|
||||||
|
[Pdf.Indirect q] @ content @ [Pdf.Indirect qs]
|
||||||
|
|
||||||
|
(* Does the page have a defined box e.g "/CropBox" *)
|
||||||
|
let hasbox pdf page boxname =
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
if page > length pages || page < 1 then raise (Failure "hasbox: bad page") else
|
||||||
|
let p = select page pages in
|
||||||
|
match Pdf.lookup_direct pdf boxname p.Pdfpage.rest with
|
||||||
|
| Some _ -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(* \section{Flip pages} *)
|
||||||
|
let flip_page ?(fast=false) transform_op pdf pnum page =
|
||||||
|
let minx, miny, maxx, maxy =
|
||||||
|
Pdf.parse_rectangle page.Pdfpage.mediabox
|
||||||
|
in
|
||||||
|
let tr = transform_op minx miny maxx maxy in
|
||||||
|
let page = Cpdfutil.change_pattern_matrices_page pdf tr page in
|
||||||
|
Cpdfutil.transform_annotations pdf tr page.Pdfpage.rest;
|
||||||
|
(Pdfpage.prepend_operators pdf [Pdfops.Op_cm tr] ~fast page, pnum, tr)
|
||||||
|
|
||||||
|
let vflip_pdf ?(fast=false) pdf range =
|
||||||
|
let transform_op _ miny _ maxy =
|
||||||
|
Pdftransform.matrix_of_op
|
||||||
|
(Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.))
|
||||||
|
in
|
||||||
|
process_pages (flip_page ~fast transform_op pdf) pdf range
|
||||||
|
|
||||||
|
let hflip_pdf ?(fast=false) pdf range =
|
||||||
|
let transform_op minx _ maxx _ =
|
||||||
|
Pdftransform.matrix_of_op
|
||||||
|
(Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.))
|
||||||
|
in
|
||||||
|
process_pages (flip_page ~fast transform_op pdf) pdf range
|
||||||
|
|
||||||
|
let stamp_shift_of_position topline midline sw sh w h p =
|
||||||
|
let half x = x /. 2.
|
||||||
|
and dy =
|
||||||
|
if midline then sh /. 2.
|
||||||
|
else if topline then sh
|
||||||
|
else 0.
|
||||||
|
in
|
||||||
|
let open Cpdfposition in
|
||||||
|
match p with
|
||||||
|
| PosCentre (ox, oy) -> ox -. half sw, oy -. dy
|
||||||
|
| PosLeft (ox, oy) -> ox, oy -. dy
|
||||||
|
| PosRight (ox, oy) -> ox -. sw, oy -. dy
|
||||||
|
| Top o -> half w -. half sw, h -. o -. sh -. dy
|
||||||
|
| TopLeft o -> o, h -. sh -. o -. dy
|
||||||
|
| TopRight o -> w -. sw -. o, h -. sh -. o -. dy
|
||||||
|
| Left o -> o, half h -. half sh -. dy
|
||||||
|
| BottomLeft o -> o, o -. dy
|
||||||
|
| Bottom o -> half w -. half sw, o -. dy
|
||||||
|
| BottomRight o -> w -. sw -. o, o -. dy
|
||||||
|
| Right o -> w -. sw -. o, half h -. half sh -. dy
|
||||||
|
| Diagonal | ReverseDiagonal | Centre ->
|
||||||
|
half w -. half sw, half h -. half sh -. dy
|
||||||
|
|
||||||
|
(* Combine Pdfpage.rest items for two PDFs. For now, we combine /Annots, and
|
||||||
|
* copy everything else from adict. What else should we combine? *)
|
||||||
|
let combine_page_items pdf adict bdict =
|
||||||
|
let getannots dict =
|
||||||
|
begin match dict with
|
||||||
|
Pdf.Dictionary d ->
|
||||||
|
begin match lookup "/Annots" d with
|
||||||
|
Some (Pdf.Array items) -> items
|
||||||
|
| _ -> []
|
||||||
|
end
|
||||||
|
| _ -> []
|
||||||
|
end
|
||||||
|
in
|
||||||
|
let a_annots = getannots adict in
|
||||||
|
let b_annots = getannots bdict in
|
||||||
|
match a_annots @ b_annots with
|
||||||
|
[] -> adict
|
||||||
|
| annots -> Pdf.add_dict_entry adict "/Annots" (Pdf.Array annots)
|
||||||
|
|
||||||
|
let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover pdf o u opdf =
|
||||||
|
(* Scale page stamp o to fit page u *)
|
||||||
|
let sxmin, symin, sxmax, symax =
|
||||||
|
Pdf.parse_rectangle
|
||||||
|
(match Pdf.lookup_direct pdf "/CropBox" o.Pdfpage.rest with | Some r -> r | None -> o.Pdfpage.mediabox)
|
||||||
|
in let txmin, tymin, txmax, tymax =
|
||||||
|
Pdf.parse_rectangle
|
||||||
|
(match Pdf.lookup_direct pdf "/CropBox" u.Pdfpage.rest with | Some r -> r | None -> u.Pdfpage.mediabox)
|
||||||
|
in
|
||||||
|
let o =
|
||||||
|
if scale_to_fit then
|
||||||
|
let xmag = (txmax -. txmin) /. (sxmax -. sxmin) in
|
||||||
|
let ymag = (tymax -. tymin) /. (symax -. symin) in
|
||||||
|
let scale =
|
||||||
|
if xmag < 0.999 && ymag < 0.999 then
|
||||||
|
if xmag > ymag then xmag else ymag
|
||||||
|
else if xmag >= 1.001 && ymag >= 1.001 then
|
||||||
|
if xmag > ymag then ymag else xmag
|
||||||
|
else if xmag >= 1.001 then ymag
|
||||||
|
else xmag
|
||||||
|
in
|
||||||
|
let dx = txmin +. ((txmax -. txmin) -. (sxmax -. sxmin) *. scale) /. 2. in
|
||||||
|
let dy = tymin +. ((tymax -. tymin) -. (symax -. symin) *. scale) /. 2. in
|
||||||
|
let matrix =
|
||||||
|
(Pdftransform.matrix_of_transform
|
||||||
|
([Pdftransform.Translate (dx, dy)] @
|
||||||
|
(if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @
|
||||||
|
[Pdftransform.Scale ((sxmin, symin), scale, scale)]))
|
||||||
|
in
|
||||||
|
Cpdfutil.transform_annotations pdf matrix o.Pdfpage.rest;
|
||||||
|
let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in
|
||||||
|
Cpdfutil.change_pattern_matrices_page pdf matrix r
|
||||||
|
else
|
||||||
|
let sw = sxmax -. sxmin and sh = symax -. symin
|
||||||
|
and w = txmax -. txmin and h = tymax -. tymin in
|
||||||
|
let dx, dy = stamp_shift_of_position topline midline sw sh w h position in
|
||||||
|
let matrix =
|
||||||
|
(Pdftransform.matrix_of_transform
|
||||||
|
((if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @
|
||||||
|
[Pdftransform.Translate (dx, dy)]))
|
||||||
|
in
|
||||||
|
Cpdfutil.transform_annotations pdf matrix o.Pdfpage.rest;
|
||||||
|
let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in
|
||||||
|
Cpdfutil.change_pattern_matrices_page pdf matrix r
|
||||||
|
in
|
||||||
|
{u with
|
||||||
|
Pdfpage.content =
|
||||||
|
(if isover then ( @ ) else ( @@ ))
|
||||||
|
(protect fast pdf u.Pdfpage.resources u.Pdfpage.content)
|
||||||
|
(protect fast pdf o.Pdfpage.resources o.Pdfpage.content);
|
||||||
|
Pdfpage.rest =
|
||||||
|
combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest;
|
||||||
|
Pdfpage.resources =
|
||||||
|
Cpdfutil.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources}
|
||||||
|
|
||||||
|
let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf =
|
||||||
|
let prefix = Pdfpage.shortest_unused_prefix pdf in
|
||||||
|
Pdfpage.add_prefix over prefix;
|
||||||
|
let marks = Pdfmarks.read_bookmarks pdf in
|
||||||
|
let marks_refnumbers = Pdf.page_reference_numbers pdf in
|
||||||
|
let pdf = Pdfmarks.remove_bookmarks pdf in
|
||||||
|
let over = Pdfmarks.remove_bookmarks over in
|
||||||
|
let pageseqs = ilist 1 (Pdfpage.endpage pdf) in
|
||||||
|
let over_firstpage_pdf =
|
||||||
|
match Pdfpage.pages_of_pagetree over with
|
||||||
|
| [] -> error "empty PDF"
|
||||||
|
| h::_ -> Pdfpage.change_pages ~changes:[(1, 1)] true over [h]
|
||||||
|
in
|
||||||
|
let merged =
|
||||||
|
Pdfmerge.merge_pdfs
|
||||||
|
false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]]
|
||||||
|
in
|
||||||
|
let merged =
|
||||||
|
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
|
||||||
|
in
|
||||||
|
let merged = Cpdfmetadata.copy_id true pdf merged in
|
||||||
|
let merged_pages = Pdfpage.pages_of_pagetree merged in
|
||||||
|
let under_pages, over_page =
|
||||||
|
all_but_last merged_pages, last merged_pages
|
||||||
|
in
|
||||||
|
let new_pages =
|
||||||
|
map2
|
||||||
|
(fun pageseq under_page ->
|
||||||
|
do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover merged
|
||||||
|
(if mem pageseq range then over_page else
|
||||||
|
Pdfpage.blankpage Pdfpaper.a4)
|
||||||
|
under_page over)
|
||||||
|
pageseqs
|
||||||
|
under_pages
|
||||||
|
in
|
||||||
|
let changed =
|
||||||
|
let changes =
|
||||||
|
map (fun x -> (x, x)) (ilist 1 (length new_pages))
|
||||||
|
in
|
||||||
|
Pdfpage.change_pages ~changes true merged new_pages
|
||||||
|
in
|
||||||
|
let new_refnumbers = Pdf.page_reference_numbers changed in
|
||||||
|
let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in
|
||||||
|
let new_marks = map (Cpdfbookmarks.change_bookmark changetable) marks in
|
||||||
|
Pdfmarks.add_bookmarks new_marks changed
|
||||||
|
|
||||||
|
(* Combine pages from two PDFs. For now, assume equal length. *)
|
||||||
|
|
||||||
|
(* If [over] has more pages than [under], chop the excess. If the converse, pad
|
||||||
|
[over] to the same length *)
|
||||||
|
let equalize_pages under over =
|
||||||
|
let length_under = Pdfpage.endpage under in
|
||||||
|
let length_over = Pdfpage.endpage over in
|
||||||
|
if length_over > length_under then
|
||||||
|
let changes =
|
||||||
|
map (fun x -> (x, x)) (ilist 1 length_under)
|
||||||
|
in
|
||||||
|
(under,
|
||||||
|
(Pdfpage.change_pages
|
||||||
|
~changes true over (take (Pdfpage.pages_of_pagetree over) length_under)))
|
||||||
|
else if length_under > length_over then
|
||||||
|
let changes =
|
||||||
|
map (fun x -> (x, x)) (ilist 1 length_over)
|
||||||
|
in
|
||||||
|
(under,
|
||||||
|
Pdfpage.change_pages
|
||||||
|
~changes true over
|
||||||
|
(Pdfpage.pages_of_pagetree over @
|
||||||
|
(many (Pdfpage.blankpage Pdfpaper.a4) (length_under - length_over))))
|
||||||
|
else
|
||||||
|
under, over
|
||||||
|
|
||||||
|
let combine_pages fast under over scaletofit swap equalize =
|
||||||
|
let debug_combine_pages = false in
|
||||||
|
let debug_pdf pdf n =
|
||||||
|
if debug_combine_pages then
|
||||||
|
begin Pdf.remove_unreferenced pdf; Pdfwrite.pdf_to_file pdf n end
|
||||||
|
in
|
||||||
|
Pdfpage.add_prefix over (Pdfpage.shortest_unused_prefix under);
|
||||||
|
let marks_under, marks_over = Pdfmarks.read_bookmarks under, Pdfmarks.read_bookmarks over in
|
||||||
|
let under, over = if equalize then equalize_pages under over else under, over in
|
||||||
|
let under_length, over_length = Pdfpage.endpage under, Pdfpage.endpage over in
|
||||||
|
if under_length <> over_length then
|
||||||
|
raise (Pdf.PDFError "combine_pages: not of equal length")
|
||||||
|
else
|
||||||
|
let pageseqs_under = ilist 1 (Pdfpage.endpage under) in
|
||||||
|
let pageseqs_over = ilist 1 (Pdfpage.endpage over) in
|
||||||
|
let merged =
|
||||||
|
Pdfmerge.merge_pdfs
|
||||||
|
false false ["a"; "b"] [under; over] [pageseqs_under; pageseqs_over]
|
||||||
|
in
|
||||||
|
debug_pdf merged "merged.pdf";
|
||||||
|
let under_pages, over_pages =
|
||||||
|
cleave (Pdfpage.pages_of_pagetree merged) under_length
|
||||||
|
in
|
||||||
|
let new_pages =
|
||||||
|
map2
|
||||||
|
(fun o u ->
|
||||||
|
do_stamp
|
||||||
|
false fast (BottomLeft 0.) false false scaletofit (not swap) merged o u over)
|
||||||
|
over_pages under_pages
|
||||||
|
in
|
||||||
|
(* Build the changes. 123456 -> 123123 *)
|
||||||
|
let changes =
|
||||||
|
let len = length new_pages in
|
||||||
|
combine (ilist 1 (len * 2)) (let x = ilist 1 len in x @ x)
|
||||||
|
in
|
||||||
|
let changed = Pdfpage.change_pages ~changes true merged new_pages in
|
||||||
|
let r = Pdfmarks.add_bookmarks (marks_under @ marks_over) changed in
|
||||||
|
debug_pdf r "final.pdf";
|
||||||
|
r
|
||||||
|
|
||||||
|
|
||||||
|
(* Just used by cpdflib for historical reasons *)
|
||||||
|
let setBox box minx maxx miny maxy pdf range =
|
||||||
|
let set_box_page _ page =
|
||||||
|
{page with
|
||||||
|
Pdfpage.rest =
|
||||||
|
Pdf.add_dict_entry
|
||||||
|
page.Pdfpage.rest box
|
||||||
|
(Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])}
|
||||||
|
in
|
||||||
|
process_pages (Cpdfutil.ppstub set_box_page) pdf range
|
||||||
|
|
||||||
|
(* \section{Cropping} *)
|
||||||
|
let crop_pdf ?(box="/CropBox") xywhlist pdf range =
|
||||||
|
let crop_page pagenum page =
|
||||||
|
{page with
|
||||||
|
Pdfpage.rest =
|
||||||
|
(Pdf.add_dict_entry
|
||||||
|
page.Pdfpage.rest
|
||||||
|
box
|
||||||
|
(let x, y, w, h = List.nth xywhlist (pagenum - 1) in
|
||||||
|
(Pdf.Array
|
||||||
|
[Pdf.Real x; Pdf.Real y;
|
||||||
|
Pdf.Real (x +. w); Pdf.Real (y +. h)])))}
|
||||||
|
in
|
||||||
|
process_pages (Cpdfutil.ppstub crop_page) pdf range
|
||||||
|
|
||||||
|
(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes.
|
||||||
|
*
|
||||||
|
* We use different dash lengths and colours to help distinguish coincident
|
||||||
|
* boxes The sequence of operators is postpended to the page content,
|
||||||
|
* appropriately protected to prevent pollution of matrices.
|
||||||
|
*
|
||||||
|
* /MediaBox: Solid red line
|
||||||
|
* /CropBox: Dashed 7 on 7 off green line
|
||||||
|
* /ArtBox: Dashed 5 on 5 off blue line
|
||||||
|
* /TrimBox: Dashed 3 on 3 off orange line
|
||||||
|
* /BleedBox: Dashed 2 on 2 off pink line *)
|
||||||
|
let get_rectangle pdf page box =
|
||||||
|
if box = "/MediaBox" then
|
||||||
|
match page.Pdfpage.mediabox with
|
||||||
|
Pdf.Array [a; b; c; d] as r -> Some (Pdf.parse_rectangle r)
|
||||||
|
| _ -> None
|
||||||
|
else
|
||||||
|
match Pdf.lookup_direct pdf box page.Pdfpage.rest with
|
||||||
|
Some (Pdf.Array [a; b; c; d] as r) -> Some (Pdf.parse_rectangle r)
|
||||||
|
| _ -> None
|
||||||
|
|
||||||
|
let show_boxes_page fast pdf _ page =
|
||||||
|
let make_ops (r, g, b) on off boxname =
|
||||||
|
match get_rectangle pdf page boxname with
|
||||||
|
Some (r1, r2, r3, r4) ->
|
||||||
|
[Pdfops.Op_q;
|
||||||
|
Pdfops.Op_RG (r /. 255., g /. 255., b /. 255.);
|
||||||
|
Pdfops.Op_w 1.;
|
||||||
|
Pdfops.Op_d ((if on = 0. && off = 0. then [] else [on; off]), 0.);
|
||||||
|
Pdfops.Op_re (r1, r2, r3 -. r1, r4 -. r2);
|
||||||
|
Pdfops.Op_S;
|
||||||
|
Pdfops.Op_Q]
|
||||||
|
| None -> []
|
||||||
|
in
|
||||||
|
let ops =
|
||||||
|
make_ops (255., 0., 0.) 0. 0. "/MediaBox"
|
||||||
|
@ make_ops (0., 255., 0.) 7. 7. "/CropBox"
|
||||||
|
@ make_ops (0., 0., 255.) 5. 5. "/ArtBox"
|
||||||
|
@ make_ops (255.,150.,0.) 3. 3. "/TrimBox"
|
||||||
|
@ make_ops (255.,9.,147.) 2. 2. "/BleedBox"
|
||||||
|
in
|
||||||
|
Pdfpage.postpend_operators pdf ops ~fast page
|
||||||
|
|
||||||
|
let show_boxes ?(fast=false) pdf range =
|
||||||
|
process_pages (Cpdfutil.ppstub (show_boxes_page fast pdf)) pdf range
|
||||||
|
|
||||||
|
let allowance = 9.
|
||||||
|
|
||||||
|
let line (x0, y0, x1, y1) =
|
||||||
|
[Pdfops.Op_m (x0, y0);
|
||||||
|
Pdfops.Op_l (x1, y1);
|
||||||
|
Pdfops.Op_s]
|
||||||
|
|
||||||
|
let trim_marks_page fast pdf n page =
|
||||||
|
match get_rectangle pdf page "/TrimBox", get_rectangle pdf page "/MediaBox" with
|
||||||
|
| Some (tminx, tminy, tmaxx, tmaxy), Some (minx, miny, maxx, maxy) ->
|
||||||
|
let ops =
|
||||||
|
[Pdfops.Op_q;
|
||||||
|
Pdfops.Op_K (1., 1., 1., 1.);
|
||||||
|
Pdfops.Op_w 1.]
|
||||||
|
@ line (minx, tmaxy, tminy -. allowance, tmaxy) (* top left *)
|
||||||
|
@ line (tminx, tmaxy +. allowance, tminx, maxy)
|
||||||
|
@ line (tmaxx +. allowance, tmaxy, maxx, tmaxy) (* top right *)
|
||||||
|
@ line (tmaxx, tmaxy +. allowance, tmaxx, maxy)
|
||||||
|
@ line (tmaxx +. allowance, tminy, maxx, tminy) (* bottom right *)
|
||||||
|
@ line (tmaxx, tminy -. allowance, tmaxx, miny)
|
||||||
|
@ line (tminx -. allowance, tminy, minx, tminy) (* bottom left *)
|
||||||
|
@ line (tminx, tminy -. allowance, tminx, miny)
|
||||||
|
@ [Pdfops.Op_Q]
|
||||||
|
in
|
||||||
|
Pdfpage.postpend_operators pdf ops ~fast page
|
||||||
|
| _, _ ->
|
||||||
|
(*Printf.eprintf "warning: no /TrimBox found on page %i\n%!" n;*)
|
||||||
|
page
|
||||||
|
|
||||||
|
let trim_marks ?(fast=false) pdf range =
|
||||||
|
process_pages (Cpdfutil.ppstub (trim_marks_page fast pdf)) pdf range
|
||||||
|
|
||||||
|
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
|
||||||
|
the contents of the mediabox will be used if the from fox is not available. If
|
||||||
|
mediabox_is_missing is false, the page is unaltered. *)
|
||||||
|
let copy_box f t mediabox_if_missing pdf range =
|
||||||
|
process_pages
|
||||||
|
(Cpdfutil.ppstub (fun _ page ->
|
||||||
|
if f = "/MediaBox" then
|
||||||
|
{page with Pdfpage.rest =
|
||||||
|
(Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))}
|
||||||
|
else
|
||||||
|
match Pdf.lookup_direct pdf f page.Pdfpage.rest with
|
||||||
|
| Some pdfobject ->
|
||||||
|
if t = "/MediaBox"
|
||||||
|
then {page with
|
||||||
|
Pdfpage.mediabox = Pdf.direct pdf pdfobject}
|
||||||
|
else {page with Pdfpage.rest =
|
||||||
|
(Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))}
|
||||||
|
| None ->
|
||||||
|
if mediabox_if_missing
|
||||||
|
then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox}
|
||||||
|
else page))
|
||||||
|
pdf
|
||||||
|
range
|
||||||
|
|
||||||
|
|
37
cpdfpage.mli
37
cpdfpage.mli
|
@ -66,3 +66,40 @@ page size given by (x, y) and by the [input_scale] (e.g 1.0 = scale to fit, 0.9
|
||||||
= scale to fit leaving a border etc.). [op] is unused. *)
|
= scale to fit leaving a border etc.). [op] is unused. *)
|
||||||
val scale_to_fit_pdf : ?fast:bool -> Cpdfposition.position -> float -> (float * float) list -> 'a -> Pdf.t -> int list -> Pdf.t
|
val scale_to_fit_pdf : ?fast:bool -> Cpdfposition.position -> float -> (float * float) list -> 'a -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
(** {2 Stamping} *)
|
||||||
|
|
||||||
|
(** [combine_pages fast under over scaletofit swap equalize] combines the page
|
||||||
|
content of two PDFs, page-by-page. If [equalize] is true the output will have
|
||||||
|
the same number of pages as the shorter file. If [scaletofit] is true, the
|
||||||
|
[over] file will be scaled to fit. If [swap] is true, [over] and [under] are
|
||||||
|
swapped. If [fast] is true, the PDFs are assumed to be well-formed and no
|
||||||
|
fixes are done. *)
|
||||||
|
val combine_pages : bool -> Pdf.t -> Pdf.t -> bool -> bool -> bool -> Pdf.t
|
||||||
|
|
||||||
|
(** [stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf] stamps the first page of
|
||||||
|
[over] over each page of the PDF. The arguments have the same meaning as in
|
||||||
|
[combine_pages]. *)
|
||||||
|
val stamp : bool -> Cpdfposition.position -> bool -> bool -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** {2 Page geometry} *)
|
||||||
|
|
||||||
|
(** True if a given page in a PDF has a given box *)
|
||||||
|
val hasbox : Pdf.t -> int -> string -> bool
|
||||||
|
|
||||||
|
(** [crop_pdf xywhlist pdf range] sets the cropbox on the given pages. *)
|
||||||
|
val crop_pdf : ?box:string -> (float * float * float * float) list -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
(** [setBox boxname x y w h pdf range] sets the given box on the given pages. *)
|
||||||
|
val setBox : string -> float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
(** Flip the given pages vertically *)
|
||||||
|
val vflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
(** Flip the given pages horizontally *)
|
||||||
|
val hflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
val trim_marks : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
val copy_box : string -> string -> bool -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
|
@ -0,0 +1,319 @@
|
||||||
|
open Pdfutil
|
||||||
|
open Pdfio
|
||||||
|
open Cpdferror
|
||||||
|
|
||||||
|
(* \section{Blacken text} *)
|
||||||
|
|
||||||
|
(*
|
||||||
|
\begin{verbatim}
|
||||||
|
Algorithm: Change
|
||||||
|
BT
|
||||||
|
<ops>
|
||||||
|
ET
|
||||||
|
|
||||||
|
...to...
|
||||||
|
|
||||||
|
BT
|
||||||
|
Op_g 0.
|
||||||
|
<ops minus any color, shading or gs operators>
|
||||||
|
ET
|
||||||
|
<ops minus any text positioning or text rendering ones>
|
||||||
|
\end{verbatim}
|
||||||
|
*)
|
||||||
|
let blacktext_ops colour pdf resources content =
|
||||||
|
let not_text = function
|
||||||
|
| Pdfops.Op_Tj _ | Pdfops.Op_TJ _
|
||||||
|
| Pdfops.Op_' _ | Pdfops.Op_'' (_, _, _)
|
||||||
|
| Pdfops.Op_Td (_, _) | Pdfops.Op_TD (_, _)
|
||||||
|
| Pdfops.Op_Tm _ | Pdfops.Op_T'
|
||||||
|
| Pdfops.Op_Tc _
|
||||||
|
| Pdfops.Op_Tw _
|
||||||
|
| Pdfops.Op_Tz _
|
||||||
|
| Pdfops.Op_TL _
|
||||||
|
| Pdfops.Op_Tf (_, _)
|
||||||
|
| Pdfops.Op_Tr _
|
||||||
|
| Pdfops.Op_Ts _ -> false
|
||||||
|
| _ -> true
|
||||||
|
in let textlevel = ref 0
|
||||||
|
in let removed = ref []
|
||||||
|
in let operators =
|
||||||
|
Pdfops.parse_operators pdf resources content
|
||||||
|
in
|
||||||
|
let rec remove_colourops prev = function
|
||||||
|
| [] -> rev prev
|
||||||
|
| Pdfops.Op_BT::more ->
|
||||||
|
incr textlevel;
|
||||||
|
remove_colourops
|
||||||
|
(Cpdfaddtext.colour_op colour::Pdfops.Op_BT::prev)
|
||||||
|
more
|
||||||
|
| Pdfops.Op_ET::more ->
|
||||||
|
decr textlevel;
|
||||||
|
let prev' = !removed @ Pdfops.Op_ET::prev in
|
||||||
|
removed := [];
|
||||||
|
remove_colourops prev' more
|
||||||
|
| (Pdfops.Op_G _
|
||||||
|
| Pdfops.Op_g _
|
||||||
|
| Pdfops.Op_RG (_, _, _)
|
||||||
|
| Pdfops.Op_rg (_, _, _)
|
||||||
|
| Pdfops.Op_k (_, _, _, _)
|
||||||
|
| Pdfops.Op_K (_, _, _, _)
|
||||||
|
| Pdfops.Op_SCN _
|
||||||
|
| Pdfops.Op_SC _
|
||||||
|
| Pdfops.Op_scn _
|
||||||
|
| Pdfops.Op_sc _
|
||||||
|
| Pdfops.Op_SCNName (_, _)
|
||||||
|
| Pdfops.Op_scnName (_, _)
|
||||||
|
| Pdfops.Op_CS _
|
||||||
|
| Pdfops.Op_cs _
|
||||||
|
| Pdfops.Op_sh _
|
||||||
|
| Pdfops.Op_gs _)
|
||||||
|
as op::more ->
|
||||||
|
if !textlevel > 0
|
||||||
|
then
|
||||||
|
begin
|
||||||
|
removed =| op;
|
||||||
|
remove_colourops prev more
|
||||||
|
end
|
||||||
|
else remove_colourops (op::prev) more
|
||||||
|
| op::more ->
|
||||||
|
if !textlevel > 0 && not_text op then removed =| op;
|
||||||
|
remove_colourops (op::prev) more
|
||||||
|
in
|
||||||
|
let operators' = remove_colourops [] operators in
|
||||||
|
[Pdfops.stream_of_ops operators']
|
||||||
|
|
||||||
|
(* Blacken a form xobject, writing it to the same object. *)
|
||||||
|
|
||||||
|
let blacktext c range pdf =
|
||||||
|
let blacktext_page _ page =
|
||||||
|
let content' =
|
||||||
|
blacktext_ops c pdf page.Pdfpage.resources page.Pdfpage.content
|
||||||
|
in
|
||||||
|
Cpdfutil.process_xobjects pdf page (blacktext_ops c);
|
||||||
|
{page with Pdfpage.content = content'}
|
||||||
|
in
|
||||||
|
Cpdfpage.process_pages (Cpdfutil.ppstub blacktext_page) pdf range
|
||||||
|
|
||||||
|
(* \section{Blacken lines} *)
|
||||||
|
let blacklines_ops c pdf resources content =
|
||||||
|
let rec blacken_strokeops prev = function
|
||||||
|
| [] -> rev prev
|
||||||
|
| Pdfops.Op_CS _::t ->
|
||||||
|
blacken_strokeops (Pdfops.Op_CS "/DeviceRGB"::prev) t
|
||||||
|
| (Pdfops.Op_SC _ | Pdfops.Op_SCN _ | Pdfops.Op_SCNName _ | Pdfops.Op_G _
|
||||||
|
| Pdfops.Op_RG _ | Pdfops.Op_K _)::t ->
|
||||||
|
blacken_strokeops (Cpdfaddtext.colour_op_stroke c::prev) t
|
||||||
|
| h::t -> blacken_strokeops (h::prev) t
|
||||||
|
and operators =
|
||||||
|
Pdfops.parse_operators pdf resources content
|
||||||
|
in
|
||||||
|
let operators' = blacken_strokeops [] operators in
|
||||||
|
[Pdfops.stream_of_ops operators']
|
||||||
|
|
||||||
|
let blacklines c range pdf =
|
||||||
|
let blacklines_page _ page =
|
||||||
|
let content' =
|
||||||
|
blacklines_ops c pdf page.Pdfpage.resources page.Pdfpage.content
|
||||||
|
in
|
||||||
|
Cpdfutil.process_xobjects pdf page (blacklines_ops c);
|
||||||
|
{page with Pdfpage.content = content'}
|
||||||
|
in
|
||||||
|
Cpdfpage.process_pages (Cpdfutil.ppstub blacklines_page) pdf range
|
||||||
|
|
||||||
|
(* \section{Blacken Fills} *)
|
||||||
|
let blackfills_ops c pdf resources content =
|
||||||
|
let rec blacken_fillops prev = function
|
||||||
|
| [] -> rev prev
|
||||||
|
| Pdfops.Op_cs _::t ->
|
||||||
|
blacken_fillops (Pdfops.Op_cs "/DeviceRGB"::prev) t
|
||||||
|
| (Pdfops.Op_sc _ | Pdfops.Op_scn _ | Pdfops.Op_scnName _ | Pdfops.Op_g _
|
||||||
|
| Pdfops.Op_rg _ | Pdfops.Op_k _)::t ->
|
||||||
|
blacken_fillops (Cpdfaddtext.colour_op c::prev) t
|
||||||
|
| h::t -> blacken_fillops (h::prev) t
|
||||||
|
and operators =
|
||||||
|
Pdfops.parse_operators pdf resources content
|
||||||
|
in
|
||||||
|
let operators' = blacken_fillops [] operators in
|
||||||
|
[Pdfops.stream_of_ops operators']
|
||||||
|
|
||||||
|
let blackfills c range pdf =
|
||||||
|
let blackfills_page _ page =
|
||||||
|
let content' =
|
||||||
|
blackfills_ops c pdf page.Pdfpage.resources page.Pdfpage.content
|
||||||
|
in
|
||||||
|
Cpdfutil.process_xobjects pdf page (blackfills_ops c);
|
||||||
|
{page with Pdfpage.content = content'}
|
||||||
|
in
|
||||||
|
Cpdfpage.process_pages (Cpdfutil.ppstub blackfills_page) pdf range
|
||||||
|
|
||||||
|
(* \section{Set a minimum line width to avoid dropout} *)
|
||||||
|
let thinlines range width pdf =
|
||||||
|
let thinpage _ page =
|
||||||
|
let operators =
|
||||||
|
Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content
|
||||||
|
in
|
||||||
|
let ctmstack = ref [ref Pdftransform.i_matrix] in
|
||||||
|
let scaleof_ctm () =
|
||||||
|
try
|
||||||
|
match Pdftransform.decompose (!(hd !ctmstack)) with
|
||||||
|
(scale, _, _, _, _, _) ->
|
||||||
|
scale
|
||||||
|
with
|
||||||
|
Failure _ (*"hd"*) -> 1.
|
||||||
|
in
|
||||||
|
let rec replace_operators prev = function
|
||||||
|
| [] -> rev prev
|
||||||
|
| (Pdfops.Op_w w)::more ->
|
||||||
|
(* Alter width. *)
|
||||||
|
let width' = width /. scaleof_ctm () in
|
||||||
|
let w' =
|
||||||
|
if w >= width' then Pdfops.Op_w w else Pdfops.Op_w width'
|
||||||
|
in
|
||||||
|
replace_operators (w'::prev) more
|
||||||
|
| (Pdfops.Op_cm m)::more ->
|
||||||
|
(* Update CTM *)
|
||||||
|
begin try
|
||||||
|
let top = hd !ctmstack in
|
||||||
|
top := Pdftransform.matrix_compose !top m
|
||||||
|
with
|
||||||
|
Failure _ (*"hd"*) -> error "Malformed file."
|
||||||
|
end;
|
||||||
|
replace_operators ((Pdfops.Op_cm m)::prev) more
|
||||||
|
| Pdfops.Op_q::more ->
|
||||||
|
(* Push stack *)
|
||||||
|
begin try
|
||||||
|
ctmstack =| ref (!(hd !ctmstack))
|
||||||
|
with
|
||||||
|
Failure _ (*"hd"*) -> error "Malformed file"
|
||||||
|
end;
|
||||||
|
replace_operators (Pdfops.Op_q::prev) more
|
||||||
|
| Pdfops.Op_Q::more ->
|
||||||
|
(* Pop stack *)
|
||||||
|
begin try
|
||||||
|
ctmstack := tl !ctmstack
|
||||||
|
with
|
||||||
|
Failure _ (*"tl"*) -> error "Malformed file"
|
||||||
|
end;
|
||||||
|
replace_operators (Pdfops.Op_Q::prev) more
|
||||||
|
| (Pdfops.Op_gs gsname)::more ->
|
||||||
|
(* Perhaps insert [Op_w]. *)
|
||||||
|
let opw =
|
||||||
|
match Pdf.lookup_direct pdf "/ExtGState" page.Pdfpage.resources with
|
||||||
|
| None -> []
|
||||||
|
| Some ext_state_dict ->
|
||||||
|
match Pdf.lookup_direct pdf gsname ext_state_dict with
|
||||||
|
| None -> []
|
||||||
|
| Some gdict ->
|
||||||
|
match Pdf.lookup_direct pdf "/LW" gdict with
|
||||||
|
| Some s -> (try [Pdfops.Op_w (Pdf.getnum s)] with _ -> [])
|
||||||
|
| None -> []
|
||||||
|
in
|
||||||
|
replace_operators (opw @ ((Pdfops.Op_gs gsname)::prev)) more
|
||||||
|
| x::more -> replace_operators (x::prev) more
|
||||||
|
in
|
||||||
|
let operators = replace_operators [] operators in
|
||||||
|
(* 2. Add an initial 'w' if width more than default width *)
|
||||||
|
let operators =
|
||||||
|
if width > 1. then (Pdfops.Op_w width)::operators else operators
|
||||||
|
in
|
||||||
|
let content' = [Pdfops.stream_of_ops operators] in
|
||||||
|
{page with Pdfpage.content = content'}
|
||||||
|
in
|
||||||
|
Cpdfpage.process_pages (Cpdfutil.ppstub thinpage) pdf range
|
||||||
|
|
||||||
|
(* Parse the new content to make sure syntactically ok, append
|
||||||
|
* as required. Rewrite the content *)
|
||||||
|
let append_page_content_page fast s before pdf n page =
|
||||||
|
let ops =
|
||||||
|
Pdfops.parse_stream pdf page.Pdfpage.resources [bytes_of_string s]
|
||||||
|
in
|
||||||
|
(if before then Pdfpage.prepend_operators else Pdfpage.postpend_operators)
|
||||||
|
pdf ops ~fast page
|
||||||
|
|
||||||
|
let append_page_content s before fast range pdf =
|
||||||
|
Cpdfpage.process_pages (Cpdfutil.ppstub (append_page_content_page fast s before pdf)) pdf range
|
||||||
|
|
||||||
|
|
||||||
|
(* 1. Extend remove_dict_entry with search term
|
||||||
|
2. Implement replace_dict_entry by analogy to remove_dict_entry *)
|
||||||
|
let rec dict_entry_single_object f pdf = function
|
||||||
|
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d)
|
||||||
|
| (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) ->
|
||||||
|
f (Pdf.Stream {contents = (Pdf.recurse_dict (dict_entry_single_object f pdf) dict, data)})
|
||||||
|
| Pdf.Array a -> Pdf.recurse_array (dict_entry_single_object f pdf) a
|
||||||
|
| x -> x
|
||||||
|
|
||||||
|
(* FIXME are we sure that functional values can never appear in the equality here? *)
|
||||||
|
let remove_dict_entry pdf key search =
|
||||||
|
let f d =
|
||||||
|
match search with
|
||||||
|
| None -> Pdf.remove_dict_entry d key
|
||||||
|
| Some s ->
|
||||||
|
match Pdf.lookup_direct pdf key d with
|
||||||
|
| Some v when v = s -> Pdf.remove_dict_entry d key
|
||||||
|
| _ -> d
|
||||||
|
in
|
||||||
|
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
||||||
|
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
||||||
|
|
||||||
|
let replace_dict_entry pdf key value search =
|
||||||
|
let f d =
|
||||||
|
match search with
|
||||||
|
| None -> Pdf.replace_dict_entry d key value
|
||||||
|
| Some s ->
|
||||||
|
match Pdf.lookup_direct pdf key d with
|
||||||
|
| Some v when v = s -> Pdf.replace_dict_entry d key value
|
||||||
|
| _ -> d
|
||||||
|
in
|
||||||
|
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
||||||
|
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
||||||
|
|
||||||
|
(* FIXME no need to self map here, since nothing changes *)
|
||||||
|
let print_dict_entry pdf key =
|
||||||
|
let f d =
|
||||||
|
match Pdf.lookup_direct pdf key d with
|
||||||
|
| Some v ->
|
||||||
|
(* We use a double newline as a separator. *)
|
||||||
|
Printf.printf "%s\n\n" (Cpdfyojson.Safe.to_string (Cpdfjson.json_of_object pdf (fun _ -> ()) false false v));
|
||||||
|
d
|
||||||
|
| None -> d
|
||||||
|
in
|
||||||
|
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
||||||
|
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
||||||
|
|
||||||
|
let remove_clipping_ops pdf resources content =
|
||||||
|
let ops = Pdfops.parse_operators pdf resources content in
|
||||||
|
let rec process a = function
|
||||||
|
Pdfops.Op_W::Pdfops.Op_n::t -> process (Pdfops.Op_n::a) t
|
||||||
|
| h::t -> process (h::a) t
|
||||||
|
| [] -> rev a
|
||||||
|
in
|
||||||
|
[Pdfops.stream_of_ops (process [] ops)]
|
||||||
|
|
||||||
|
let remove_clipping pdf range =
|
||||||
|
let remove_clipping_page _ page =
|
||||||
|
let content' =
|
||||||
|
remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content
|
||||||
|
in
|
||||||
|
Cpdfutil.process_xobjects pdf page remove_clipping_ops;
|
||||||
|
{page with Pdfpage.content = content'}
|
||||||
|
in
|
||||||
|
Cpdfpage.process_pages (Cpdfutil.ppstub remove_clipping_page) pdf range
|
||||||
|
|
||||||
|
let remove_unused_resources_page pdf n page =
|
||||||
|
let xobjects, all_names =
|
||||||
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
||||||
|
| Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d
|
||||||
|
| _ -> Pdf.Dictionary [], []
|
||||||
|
in
|
||||||
|
let names_to_keep =
|
||||||
|
option_map
|
||||||
|
(function Pdfops.Op_Do n -> Some n | _ -> None)
|
||||||
|
(Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)
|
||||||
|
in
|
||||||
|
let names_to_remove = lose (mem' names_to_keep) all_names in
|
||||||
|
let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in
|
||||||
|
{page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict}
|
||||||
|
|
||||||
|
let remove_unused_resources pdf =
|
||||||
|
Cpdfpage.process_pages (Cpdfutil.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))
|
|
@ -0,0 +1,25 @@
|
||||||
|
(** {2 Miscellany} *)
|
||||||
|
|
||||||
|
(** Make all lines in the PDF at least a certain thickness. *)
|
||||||
|
val thinlines : int list -> float -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** Make all text on certain pages black. *)
|
||||||
|
val blacktext : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** Make all lines on certain pages black. *)
|
||||||
|
val blacklines : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** Make all fills on certain pages black. *)
|
||||||
|
val blackfills : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit
|
||||||
|
|
||||||
|
val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit
|
||||||
|
|
||||||
|
val print_dict_entry : Pdf.t -> string -> unit
|
||||||
|
|
||||||
|
val remove_clipping : Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
val remove_unused_resources : Pdf.t -> Pdf.t
|
Loading…
Reference in New Issue