diff --git a/Makefile b/Makefile index c791c5c..ac37dc4 100644 --- a/Makefile +++ b/Makefile @@ -2,9 +2,10 @@ MODS = cpdfyojson cpdfxmlm cpdfutil \ cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \ cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \ - cpdfbookmarks cpdfpage cpdfaddtext cpdf cpdfimage cpdffont cpdftype \ + cpdfbookmarks cpdfpage cpdfaddtext cpdfimage cpdffont cpdftype \ cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfdraft cpdfspot \ - cpdfpagelabels cpdfcreate cpdfannot cpdfxobject cpdfimpose cpdfcommand + cpdfpagelabels cpdfcreate cpdfannot cpdfxobject cpdfimpose cpdftweak \ + cpdfcommand SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml @@ -24,9 +25,14 @@ clean :: rm -rf doc foo foo2 out.pdf out2.pdf foo.pdf decomp.pdf *.cmt *.cmti \ *.json test/*.pdf debug/*.pdf *.ps -DOC_FILES = cpdferror.mli cpdfjson.mli cpdfstrftime.mli cpdfcoord.mli \ - cpdfattach.mli cpdfpagespec.mli cpdfposition.mli cpdf.mli \ - cpdfcommand.mli +DOC_FILES = cpdfunicodedata.mli cpdferror.mli cpdfdebug.mli cpdfjson.mli \ + cpdfstrftime.mli cpdfcoord.mli cpdfattach.mli cpdfpagespec.mli \ + cpdfposition.mli cpdfpresent.mli cpdfmetadata.mli \ + cpdfbookmarks.mli cpdfpage.mli cpdfaddtext.mli cpdfimage.mli \ + cpdffont.mli cpdftype.mli cpdftexttopdf.mli cpdftoc.mli \ + cpdfpad.mli cpdfocg.mli cpdfsqueeze.mli cpdfdraft.mli \ + cpdfspot.mli cpdfpagelabels.mli cpdfcreate.mli cpdfannot.mli \ + cpdfxobject.mli cpdfimpose.mli cpdftweak.mli cpdfcommand.mli LIBINSTALL_FILES = cpdf.a cpdf.cma cpdf.cmxa \ $(foreach x,$(MODS),$x.mli) $(foreach x,$(MODS),$x.cmi) \ diff --git a/cpdfcommand.ml b/cpdfcommand.ml index a615a09..78aa23e 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3069,7 +3069,7 @@ let go () = begin match args.inputs, args.out with | _::_, _ -> let pdf = get_single_pdf (Some RemoveUnusedResources) false in - let outpdf = Cpdf.remove_unused_resources pdf in + let outpdf = Cpdftweak.remove_unused_resources pdf in write_pdf true outpdf | _ -> error "RemoveUnusedResources: bad command line" end @@ -3180,7 +3180,7 @@ let go () = let pdf = get_single_pdf (Some Crop) false in let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.crop_pdf xywhlist pdf range in + let pdf = Cpdfpage.crop_pdf xywhlist pdf range in write_pdf false pdf | _ -> error "crop: bad command line" end @@ -3190,7 +3190,7 @@ let go () = let pdf = get_single_pdf (Some Art) false in let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.crop_pdf ~box:"/ArtBox" xywhlist pdf range in + let pdf = Cpdfpage.crop_pdf ~box:"/ArtBox" xywhlist pdf range in write_pdf false pdf | _ -> error "art: bad command line" end @@ -3200,7 +3200,7 @@ let go () = let pdf = get_single_pdf (Some Bleed) false in let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.crop_pdf ~box:"/BleedBox" xywhlist pdf range in + let pdf = Cpdfpage.crop_pdf ~box:"/BleedBox" xywhlist pdf range in write_pdf false pdf | _ -> error "bleed: bad command line" end @@ -3210,7 +3210,7 @@ let go () = let pdf = get_single_pdf (Some Trim) false in let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in let range = parse_pagespec_allow_empty pdf pagespec in - let pdf = Cpdf.crop_pdf ~box:"/TrimBox" xywhlist pdf range in + let pdf = Cpdfpage.crop_pdf ~box:"/TrimBox" xywhlist pdf range in write_pdf false pdf | _ -> error "trim: bad command line" end @@ -3244,7 +3244,7 @@ let go () = | _ -> error "Copy box: no tobox or no frombox specified" end in - let pdf = Cpdf.copy_box f t args.mediabox_if_missing pdf range in + let pdf = Cpdfpage.copy_box f t args.mediabox_if_missing pdf range in write_pdf false pdf | _ -> error "Copy Box: bad command line" end @@ -3346,8 +3346,8 @@ let go () = let range = parse_pagespec_allow_empty pdf pagespec in let pdf = if flip = VFlip - then Cpdf.vflip_pdf ~fast:args.fast pdf range - else Cpdf.hflip_pdf ~fast:args.fast pdf range + then Cpdfpage.vflip_pdf ~fast:args.fast pdf range + else Cpdfpage.hflip_pdf ~fast:args.fast pdf range in write_pdf false pdf | _ -> error "flip: bad command line" @@ -3524,19 +3524,19 @@ let go () = | Some (ThinLines w) -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdf.thinlines range w pdf) + write_pdf false (Cpdftweak.thinlines range w pdf) | Some BlackText -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdf.blacktext args.color range pdf) + write_pdf false (Cpdftweak.blacktext args.color range pdf) | Some BlackLines -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdf.blacklines args.color range pdf) + write_pdf false (Cpdftweak.blacklines args.color range pdf) | Some BlackFills -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdf.blackfills args.color range pdf) + write_pdf false (Cpdftweak.blackfills args.color range pdf) | Some RemoveAnnotations -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in @@ -3711,7 +3711,7 @@ let go () = let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let pdf = - Cpdf.stamp + Cpdfpage.stamp args.relative_to_cropbox args.position args.topline args.midline args.fast args.scale_stamp_to_fit true range overpdf pdf in @@ -3725,14 +3725,14 @@ let go () = let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let pdf = - Cpdf.stamp + Cpdfpage.stamp args.relative_to_cropbox args.position args.topline args.midline args.fast args.scale_stamp_to_fit false range underpdf pdf in write_pdf false pdf | Some (CombinePages over) -> write_pdf false - (Cpdf.combine_pages args.fast (get_single_pdf args.op false) (pdfread_pdf_of_file None None over) false false true) + (Cpdfpage.combine_pages args.fast (get_single_pdf args.op false) (pdfread_pdf_of_file None None over) false false true) | Some Encrypt -> let pdf = get_single_pdf args.op false in let pdf = Cpdfsqueeze.recompress_pdf pdf @@ -3819,22 +3819,22 @@ let go () = (map Pdfpagelabels.string_of_pagelabel (Pdfpagelabels.read pdf)) | Some (RemoveDictEntry key) -> let pdf = get_single_pdf args.op true in - Cpdf.remove_dict_entry pdf key args.dict_entry_search; + Cpdftweak.remove_dict_entry pdf key args.dict_entry_search; write_pdf false pdf | Some (ReplaceDictEntry key) -> let pdf = get_single_pdf args.op true in - Cpdf.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search; + Cpdftweak.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search; write_pdf false pdf | Some (PrintDictEntry key) -> let pdf = get_single_pdf args.op true in - Cpdf.print_dict_entry pdf key + Cpdftweak.print_dict_entry pdf key | Some ListSpotColours -> let pdf = get_single_pdf args.op false in Cpdfspot.list_spot_colours pdf | Some RemoveClipping -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdf.remove_clipping pdf range) + write_pdf false (Cpdftweak.remove_clipping pdf range) | Some CreateMetadata -> let pdf = get_single_pdf args.op false in write_pdf false (Cpdfmetadata.create_metadata pdf) @@ -3863,16 +3863,16 @@ let go () = | Some ShowBoxes -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdf.show_boxes pdf range) + write_pdf false (Cpdfpage.show_boxes pdf range) | Some TrimMarks -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdf.trim_marks pdf range) + write_pdf false (Cpdfpage.trim_marks pdf range) | Some (Postpend s | Prepend s as x) -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let before = match x with Prepend _ -> true | _ -> false in - write_pdf false (Cpdf.append_page_content s before args.fast range pdf) + write_pdf false (Cpdftweak.append_page_content s before args.fast range pdf) | Some OutputJSON -> let pdf = get_single_pdf args.op false in write_json args.out pdf diff --git a/cpdfpage.ml b/cpdfpage.ml index 29f8707..4efa53e 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -385,3 +385,393 @@ let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range = in process_pages scale_page_to_fit pdf range +(* Add stack operators to a content stream to ensure it is composeable. On +-fast, we don't check for Q deficit, assuming PDF is ISO. *) +let protect fast pdf resources content = + let deficit = + if fast then 0 else + let ops = Pdfops.parse_operators pdf resources content in + let qs = length (keep (eq Pdfops.Op_q) ops) in + let bigqs = length (keep (eq Pdfops.Op_Q) ops) in + let deficit = if qs > bigqs then qs - bigqs else 0 in + if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n%!" deficit; + deficit + in + let addstream ops = Pdf.addobj pdf (Pdfops.stream_of_ops ops) in + let q = addstream [Pdfops.Op_q] in + let qs = addstream (many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) in + [Pdf.Indirect q] @ content @ [Pdf.Indirect qs] + +(* Does the page have a defined box e.g "/CropBox" *) +let hasbox pdf page boxname = + let pages = Pdfpage.pages_of_pagetree pdf in + if page > length pages || page < 1 then raise (Failure "hasbox: bad page") else + let p = select page pages in + match Pdf.lookup_direct pdf boxname p.Pdfpage.rest with + | Some _ -> true + | _ -> false + + + +(* \section{Flip pages} *) +let flip_page ?(fast=false) transform_op pdf pnum page = + let minx, miny, maxx, maxy = + Pdf.parse_rectangle page.Pdfpage.mediabox + in + let tr = transform_op minx miny maxx maxy in + let page = Cpdfutil.change_pattern_matrices_page pdf tr page in + Cpdfutil.transform_annotations pdf tr page.Pdfpage.rest; + (Pdfpage.prepend_operators pdf [Pdfops.Op_cm tr] ~fast page, pnum, tr) + +let vflip_pdf ?(fast=false) pdf range = + let transform_op _ miny _ maxy = + Pdftransform.matrix_of_op + (Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.)) + in + process_pages (flip_page ~fast transform_op pdf) pdf range + +let hflip_pdf ?(fast=false) pdf range = + let transform_op minx _ maxx _ = + Pdftransform.matrix_of_op + (Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.)) + in + process_pages (flip_page ~fast transform_op pdf) pdf range + +let stamp_shift_of_position topline midline sw sh w h p = + let half x = x /. 2. + and dy = + if midline then sh /. 2. + else if topline then sh + else 0. + in + let open Cpdfposition in + match p with + | PosCentre (ox, oy) -> ox -. half sw, oy -. dy + | PosLeft (ox, oy) -> ox, oy -. dy + | PosRight (ox, oy) -> ox -. sw, oy -. dy + | Top o -> half w -. half sw, h -. o -. sh -. dy + | TopLeft o -> o, h -. sh -. o -. dy + | TopRight o -> w -. sw -. o, h -. sh -. o -. dy + | Left o -> o, half h -. half sh -. dy + | BottomLeft o -> o, o -. dy + | Bottom o -> half w -. half sw, o -. dy + | BottomRight o -> w -. sw -. o, o -. dy + | Right o -> w -. sw -. o, half h -. half sh -. dy + | Diagonal | ReverseDiagonal | Centre -> + half w -. half sw, half h -. half sh -. dy + +(* Combine Pdfpage.rest items for two PDFs. For now, we combine /Annots, and + * copy everything else from adict. What else should we combine? *) +let combine_page_items pdf adict bdict = + let getannots dict = + begin match dict with + Pdf.Dictionary d -> + begin match lookup "/Annots" d with + Some (Pdf.Array items) -> items + | _ -> [] + end + | _ -> [] + end + in + let a_annots = getannots adict in + let b_annots = getannots bdict in + match a_annots @ b_annots with + [] -> adict + | annots -> Pdf.add_dict_entry adict "/Annots" (Pdf.Array annots) + +let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover pdf o u opdf = + (* Scale page stamp o to fit page u *) + let sxmin, symin, sxmax, symax = + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" o.Pdfpage.rest with | Some r -> r | None -> o.Pdfpage.mediabox) + in let txmin, tymin, txmax, tymax = + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" u.Pdfpage.rest with | Some r -> r | None -> u.Pdfpage.mediabox) + in + let o = + if scale_to_fit then + let xmag = (txmax -. txmin) /. (sxmax -. sxmin) in + let ymag = (tymax -. tymin) /. (symax -. symin) in + let scale = + if xmag < 0.999 && ymag < 0.999 then + if xmag > ymag then xmag else ymag + else if xmag >= 1.001 && ymag >= 1.001 then + if xmag > ymag then ymag else xmag + else if xmag >= 1.001 then ymag + else xmag + in + let dx = txmin +. ((txmax -. txmin) -. (sxmax -. sxmin) *. scale) /. 2. in + let dy = tymin +. ((tymax -. tymin) -. (symax -. symin) *. scale) /. 2. in + let matrix = + (Pdftransform.matrix_of_transform + ([Pdftransform.Translate (dx, dy)] @ + (if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @ + [Pdftransform.Scale ((sxmin, symin), scale, scale)])) + in + Cpdfutil.transform_annotations pdf matrix o.Pdfpage.rest; + let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in + Cpdfutil.change_pattern_matrices_page pdf matrix r + else + let sw = sxmax -. sxmin and sh = symax -. symin + and w = txmax -. txmin and h = tymax -. tymin in + let dx, dy = stamp_shift_of_position topline midline sw sh w h position in + let matrix = + (Pdftransform.matrix_of_transform + ((if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @ + [Pdftransform.Translate (dx, dy)])) + in + Cpdfutil.transform_annotations pdf matrix o.Pdfpage.rest; + let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in + Cpdfutil.change_pattern_matrices_page pdf matrix r + in + {u with + Pdfpage.content = + (if isover then ( @ ) else ( @@ )) + (protect fast pdf u.Pdfpage.resources u.Pdfpage.content) + (protect fast pdf o.Pdfpage.resources o.Pdfpage.content); + Pdfpage.rest = + combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest; + Pdfpage.resources = + Cpdfutil.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} + +let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf = + let prefix = Pdfpage.shortest_unused_prefix pdf in + Pdfpage.add_prefix over prefix; + let marks = Pdfmarks.read_bookmarks pdf in + let marks_refnumbers = Pdf.page_reference_numbers pdf in + let pdf = Pdfmarks.remove_bookmarks pdf in + let over = Pdfmarks.remove_bookmarks over in + let pageseqs = ilist 1 (Pdfpage.endpage pdf) in + let over_firstpage_pdf = + match Pdfpage.pages_of_pagetree over with + | [] -> error "empty PDF" + | h::_ -> Pdfpage.change_pages ~changes:[(1, 1)] true over [h] + in + let merged = + Pdfmerge.merge_pdfs + false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]] + in + let merged = + {merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption} + in + let merged = Cpdfmetadata.copy_id true pdf merged in + let merged_pages = Pdfpage.pages_of_pagetree merged in + let under_pages, over_page = + all_but_last merged_pages, last merged_pages + in + let new_pages = + map2 + (fun pageseq under_page -> + do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover merged + (if mem pageseq range then over_page else + Pdfpage.blankpage Pdfpaper.a4) + under_page over) + pageseqs + under_pages + in + let changed = + let changes = + map (fun x -> (x, x)) (ilist 1 (length new_pages)) + in + Pdfpage.change_pages ~changes true merged new_pages + in + let new_refnumbers = Pdf.page_reference_numbers changed in + let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in + let new_marks = map (Cpdfbookmarks.change_bookmark changetable) marks in + Pdfmarks.add_bookmarks new_marks changed + +(* Combine pages from two PDFs. For now, assume equal length. *) + +(* If [over] has more pages than [under], chop the excess. If the converse, pad +[over] to the same length *) +let equalize_pages under over = + let length_under = Pdfpage.endpage under in + let length_over = Pdfpage.endpage over in + if length_over > length_under then + let changes = + map (fun x -> (x, x)) (ilist 1 length_under) + in + (under, + (Pdfpage.change_pages + ~changes true over (take (Pdfpage.pages_of_pagetree over) length_under))) + else if length_under > length_over then + let changes = + map (fun x -> (x, x)) (ilist 1 length_over) + in + (under, + Pdfpage.change_pages + ~changes true over + (Pdfpage.pages_of_pagetree over @ + (many (Pdfpage.blankpage Pdfpaper.a4) (length_under - length_over)))) + else + under, over + +let combine_pages fast under over scaletofit swap equalize = + let debug_combine_pages = false in + let debug_pdf pdf n = + if debug_combine_pages then + begin Pdf.remove_unreferenced pdf; Pdfwrite.pdf_to_file pdf n end + in + Pdfpage.add_prefix over (Pdfpage.shortest_unused_prefix under); + let marks_under, marks_over = Pdfmarks.read_bookmarks under, Pdfmarks.read_bookmarks over in + let under, over = if equalize then equalize_pages under over else under, over in + let under_length, over_length = Pdfpage.endpage under, Pdfpage.endpage over in + if under_length <> over_length then + raise (Pdf.PDFError "combine_pages: not of equal length") + else + let pageseqs_under = ilist 1 (Pdfpage.endpage under) in + let pageseqs_over = ilist 1 (Pdfpage.endpage over) in + let merged = + Pdfmerge.merge_pdfs + false false ["a"; "b"] [under; over] [pageseqs_under; pageseqs_over] + in + debug_pdf merged "merged.pdf"; + let under_pages, over_pages = + cleave (Pdfpage.pages_of_pagetree merged) under_length + in + let new_pages = + map2 + (fun o u -> + do_stamp + false fast (BottomLeft 0.) false false scaletofit (not swap) merged o u over) + over_pages under_pages + in + (* Build the changes. 123456 -> 123123 *) + let changes = + let len = length new_pages in + combine (ilist 1 (len * 2)) (let x = ilist 1 len in x @ x) + in + let changed = Pdfpage.change_pages ~changes true merged new_pages in + let r = Pdfmarks.add_bookmarks (marks_under @ marks_over) changed in + debug_pdf r "final.pdf"; + r + + +(* Just used by cpdflib for historical reasons *) +let setBox box minx maxx miny maxy pdf range = + let set_box_page _ page = + {page with + Pdfpage.rest = + Pdf.add_dict_entry + page.Pdfpage.rest box + (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} + in + process_pages (Cpdfutil.ppstub set_box_page) pdf range + +(* \section{Cropping} *) +let crop_pdf ?(box="/CropBox") xywhlist pdf range = + let crop_page pagenum page = + {page with + Pdfpage.rest = + (Pdf.add_dict_entry + page.Pdfpage.rest + box + (let x, y, w, h = List.nth xywhlist (pagenum - 1) in + (Pdf.Array + [Pdf.Real x; Pdf.Real y; + Pdf.Real (x +. w); Pdf.Real (y +. h)])))} + in + process_pages (Cpdfutil.ppstub crop_page) pdf range + +(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. + * + * We use different dash lengths and colours to help distinguish coincident + * boxes The sequence of operators is postpended to the page content, + * appropriately protected to prevent pollution of matrices. + * + * /MediaBox: Solid red line + * /CropBox: Dashed 7 on 7 off green line + * /ArtBox: Dashed 5 on 5 off blue line + * /TrimBox: Dashed 3 on 3 off orange line + * /BleedBox: Dashed 2 on 2 off pink line *) +let get_rectangle pdf page box = + if box = "/MediaBox" then + match page.Pdfpage.mediabox with + Pdf.Array [a; b; c; d] as r -> Some (Pdf.parse_rectangle r) + | _ -> None + else + match Pdf.lookup_direct pdf box page.Pdfpage.rest with + Some (Pdf.Array [a; b; c; d] as r) -> Some (Pdf.parse_rectangle r) + | _ -> None + +let show_boxes_page fast pdf _ page = + let make_ops (r, g, b) on off boxname = + match get_rectangle pdf page boxname with + Some (r1, r2, r3, r4) -> + [Pdfops.Op_q; + Pdfops.Op_RG (r /. 255., g /. 255., b /. 255.); + Pdfops.Op_w 1.; + Pdfops.Op_d ((if on = 0. && off = 0. then [] else [on; off]), 0.); + Pdfops.Op_re (r1, r2, r3 -. r1, r4 -. r2); + Pdfops.Op_S; + Pdfops.Op_Q] + | None -> [] + in + let ops = + make_ops (255., 0., 0.) 0. 0. "/MediaBox" + @ make_ops (0., 255., 0.) 7. 7. "/CropBox" + @ make_ops (0., 0., 255.) 5. 5. "/ArtBox" + @ make_ops (255.,150.,0.) 3. 3. "/TrimBox" + @ make_ops (255.,9.,147.) 2. 2. "/BleedBox" + in + Pdfpage.postpend_operators pdf ops ~fast page + +let show_boxes ?(fast=false) pdf range = + process_pages (Cpdfutil.ppstub (show_boxes_page fast pdf)) pdf range + +let allowance = 9. + +let line (x0, y0, x1, y1) = + [Pdfops.Op_m (x0, y0); + Pdfops.Op_l (x1, y1); + Pdfops.Op_s] + +let trim_marks_page fast pdf n page = + match get_rectangle pdf page "/TrimBox", get_rectangle pdf page "/MediaBox" with + | Some (tminx, tminy, tmaxx, tmaxy), Some (minx, miny, maxx, maxy) -> + let ops = + [Pdfops.Op_q; + Pdfops.Op_K (1., 1., 1., 1.); + Pdfops.Op_w 1.] + @ line (minx, tmaxy, tminy -. allowance, tmaxy) (* top left *) + @ line (tminx, tmaxy +. allowance, tminx, maxy) + @ line (tmaxx +. allowance, tmaxy, maxx, tmaxy) (* top right *) + @ line (tmaxx, tmaxy +. allowance, tmaxx, maxy) + @ line (tmaxx +. allowance, tminy, maxx, tminy) (* bottom right *) + @ line (tmaxx, tminy -. allowance, tmaxx, miny) + @ line (tminx -. allowance, tminy, minx, tminy) (* bottom left *) + @ line (tminx, tminy -. allowance, tminx, miny) + @ [Pdfops.Op_Q] + in + Pdfpage.postpend_operators pdf ops ~fast page + | _, _ -> + (*Printf.eprintf "warning: no /TrimBox found on page %i\n%!" n;*) + page + +let trim_marks ?(fast=false) pdf range = + process_pages (Cpdfutil.ppstub (trim_marks_page fast pdf)) pdf range + +(* copy the contents of the box f to the box t. If mediabox_if_missing is set, +the contents of the mediabox will be used if the from fox is not available. If +mediabox_is_missing is false, the page is unaltered. *) +let copy_box f t mediabox_if_missing pdf range = + process_pages + (Cpdfutil.ppstub (fun _ page -> + if f = "/MediaBox" then + {page with Pdfpage.rest = + (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} + else + match Pdf.lookup_direct pdf f page.Pdfpage.rest with + | Some pdfobject -> + if t = "/MediaBox" + then {page with + Pdfpage.mediabox = Pdf.direct pdf pdfobject} + else {page with Pdfpage.rest = + (Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))} + | None -> + if mediabox_if_missing + then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox} + else page)) + pdf + range + diff --git a/cpdfpage.mli b/cpdfpage.mli index ba0512d..c7dc93d 100644 --- a/cpdfpage.mli +++ b/cpdfpage.mli @@ -66,3 +66,40 @@ page size given by (x, y) and by the [input_scale] (e.g 1.0 = scale to fit, 0.9 = scale to fit leaving a border etc.). [op] is unused. *) val scale_to_fit_pdf : ?fast:bool -> Cpdfposition.position -> float -> (float * float) list -> 'a -> Pdf.t -> int list -> Pdf.t +(** {2 Stamping} *) + +(** [combine_pages fast under over scaletofit swap equalize] combines the page +content of two PDFs, page-by-page. If [equalize] is true the output will have +the same number of pages as the shorter file. If [scaletofit] is true, the +[over] file will be scaled to fit. If [swap] is true, [over] and [under] are +swapped. If [fast] is true, the PDFs are assumed to be well-formed and no +fixes are done. *) +val combine_pages : bool -> Pdf.t -> Pdf.t -> bool -> bool -> bool -> Pdf.t + +(** [stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf] stamps the first page of +[over] over each page of the PDF. The arguments have the same meaning as in +[combine_pages]. *) +val stamp : bool -> Cpdfposition.position -> bool -> bool -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t + +(** {2 Page geometry} *) + +(** True if a given page in a PDF has a given box *) +val hasbox : Pdf.t -> int -> string -> bool + +(** [crop_pdf xywhlist pdf range] sets the cropbox on the given pages. *) +val crop_pdf : ?box:string -> (float * float * float * float) list -> Pdf.t -> int list -> Pdf.t + +(** [setBox boxname x y w h pdf range] sets the given box on the given pages. *) +val setBox : string -> float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t + +(** Flip the given pages vertically *) +val vflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t + +(** Flip the given pages horizontally *) +val hflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t + +val trim_marks : ?fast:bool -> Pdf.t -> int list -> Pdf.t + +val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t + +val copy_box : string -> string -> bool -> Pdf.t -> int list -> Pdf.t diff --git a/cpdftweak.ml b/cpdftweak.ml new file mode 100644 index 0000000..9187e86 --- /dev/null +++ b/cpdftweak.ml @@ -0,0 +1,319 @@ +open Pdfutil +open Pdfio +open Cpdferror + +(* \section{Blacken text} *) + +(* + \begin{verbatim} + Algorithm: Change + BT + + ET + + ...to... + + BT + Op_g 0. + + ET + + \end{verbatim} +*) +let blacktext_ops colour pdf resources content = + let not_text = function + | Pdfops.Op_Tj _ | Pdfops.Op_TJ _ + | Pdfops.Op_' _ | Pdfops.Op_'' (_, _, _) + | Pdfops.Op_Td (_, _) | Pdfops.Op_TD (_, _) + | Pdfops.Op_Tm _ | Pdfops.Op_T' + | Pdfops.Op_Tc _ + | Pdfops.Op_Tw _ + | Pdfops.Op_Tz _ + | Pdfops.Op_TL _ + | Pdfops.Op_Tf (_, _) + | Pdfops.Op_Tr _ + | Pdfops.Op_Ts _ -> false + | _ -> true + in let textlevel = ref 0 + in let removed = ref [] + in let operators = + Pdfops.parse_operators pdf resources content + in + let rec remove_colourops prev = function + | [] -> rev prev + | Pdfops.Op_BT::more -> + incr textlevel; + remove_colourops + (Cpdfaddtext.colour_op colour::Pdfops.Op_BT::prev) + more + | Pdfops.Op_ET::more -> + decr textlevel; + let prev' = !removed @ Pdfops.Op_ET::prev in + removed := []; + remove_colourops prev' more + | (Pdfops.Op_G _ + | Pdfops.Op_g _ + | Pdfops.Op_RG (_, _, _) + | Pdfops.Op_rg (_, _, _) + | Pdfops.Op_k (_, _, _, _) + | Pdfops.Op_K (_, _, _, _) + | Pdfops.Op_SCN _ + | Pdfops.Op_SC _ + | Pdfops.Op_scn _ + | Pdfops.Op_sc _ + | Pdfops.Op_SCNName (_, _) + | Pdfops.Op_scnName (_, _) + | Pdfops.Op_CS _ + | Pdfops.Op_cs _ + | Pdfops.Op_sh _ + | Pdfops.Op_gs _) + as op::more -> + if !textlevel > 0 + then + begin + removed =| op; + remove_colourops prev more + end + else remove_colourops (op::prev) more + | op::more -> + if !textlevel > 0 && not_text op then removed =| op; + remove_colourops (op::prev) more + in + let operators' = remove_colourops [] operators in + [Pdfops.stream_of_ops operators'] + +(* Blacken a form xobject, writing it to the same object. *) + +let blacktext c range pdf = + let blacktext_page _ page = + let content' = + blacktext_ops c pdf page.Pdfpage.resources page.Pdfpage.content + in + Cpdfutil.process_xobjects pdf page (blacktext_ops c); + {page with Pdfpage.content = content'} + in + Cpdfpage.process_pages (Cpdfutil.ppstub blacktext_page) pdf range + +(* \section{Blacken lines} *) +let blacklines_ops c pdf resources content = + let rec blacken_strokeops prev = function + | [] -> rev prev + | Pdfops.Op_CS _::t -> + blacken_strokeops (Pdfops.Op_CS "/DeviceRGB"::prev) t + | (Pdfops.Op_SC _ | Pdfops.Op_SCN _ | Pdfops.Op_SCNName _ | Pdfops.Op_G _ + | Pdfops.Op_RG _ | Pdfops.Op_K _)::t -> + blacken_strokeops (Cpdfaddtext.colour_op_stroke c::prev) t + | h::t -> blacken_strokeops (h::prev) t + and operators = + Pdfops.parse_operators pdf resources content + in + let operators' = blacken_strokeops [] operators in + [Pdfops.stream_of_ops operators'] + +let blacklines c range pdf = + let blacklines_page _ page = + let content' = + blacklines_ops c pdf page.Pdfpage.resources page.Pdfpage.content + in + Cpdfutil.process_xobjects pdf page (blacklines_ops c); + {page with Pdfpage.content = content'} + in + Cpdfpage.process_pages (Cpdfutil.ppstub blacklines_page) pdf range + +(* \section{Blacken Fills} *) +let blackfills_ops c pdf resources content = + let rec blacken_fillops prev = function + | [] -> rev prev + | Pdfops.Op_cs _::t -> + blacken_fillops (Pdfops.Op_cs "/DeviceRGB"::prev) t + | (Pdfops.Op_sc _ | Pdfops.Op_scn _ | Pdfops.Op_scnName _ | Pdfops.Op_g _ + | Pdfops.Op_rg _ | Pdfops.Op_k _)::t -> + blacken_fillops (Cpdfaddtext.colour_op c::prev) t + | h::t -> blacken_fillops (h::prev) t + and operators = + Pdfops.parse_operators pdf resources content + in + let operators' = blacken_fillops [] operators in + [Pdfops.stream_of_ops operators'] + +let blackfills c range pdf = + let blackfills_page _ page = + let content' = + blackfills_ops c pdf page.Pdfpage.resources page.Pdfpage.content + in + Cpdfutil.process_xobjects pdf page (blackfills_ops c); + {page with Pdfpage.content = content'} + in + Cpdfpage.process_pages (Cpdfutil.ppstub blackfills_page) pdf range + +(* \section{Set a minimum line width to avoid dropout} *) +let thinlines range width pdf = + let thinpage _ page = + let operators = + Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content + in + let ctmstack = ref [ref Pdftransform.i_matrix] in + let scaleof_ctm () = + try + match Pdftransform.decompose (!(hd !ctmstack)) with + (scale, _, _, _, _, _) -> + scale + with + Failure _ (*"hd"*) -> 1. + in + let rec replace_operators prev = function + | [] -> rev prev + | (Pdfops.Op_w w)::more -> + (* Alter width. *) + let width' = width /. scaleof_ctm () in + let w' = + if w >= width' then Pdfops.Op_w w else Pdfops.Op_w width' + in + replace_operators (w'::prev) more + | (Pdfops.Op_cm m)::more -> + (* Update CTM *) + begin try + let top = hd !ctmstack in + top := Pdftransform.matrix_compose !top m + with + Failure _ (*"hd"*) -> error "Malformed file." + end; + replace_operators ((Pdfops.Op_cm m)::prev) more + | Pdfops.Op_q::more -> + (* Push stack *) + begin try + ctmstack =| ref (!(hd !ctmstack)) + with + Failure _ (*"hd"*) -> error "Malformed file" + end; + replace_operators (Pdfops.Op_q::prev) more + | Pdfops.Op_Q::more -> + (* Pop stack *) + begin try + ctmstack := tl !ctmstack + with + Failure _ (*"tl"*) -> error "Malformed file" + end; + replace_operators (Pdfops.Op_Q::prev) more + | (Pdfops.Op_gs gsname)::more -> + (* Perhaps insert [Op_w]. *) + let opw = + match Pdf.lookup_direct pdf "/ExtGState" page.Pdfpage.resources with + | None -> [] + | Some ext_state_dict -> + match Pdf.lookup_direct pdf gsname ext_state_dict with + | None -> [] + | Some gdict -> + match Pdf.lookup_direct pdf "/LW" gdict with + | Some s -> (try [Pdfops.Op_w (Pdf.getnum s)] with _ -> []) + | None -> [] + in + replace_operators (opw @ ((Pdfops.Op_gs gsname)::prev)) more + | x::more -> replace_operators (x::prev) more + in + let operators = replace_operators [] operators in + (* 2. Add an initial 'w' if width more than default width *) + let operators = + if width > 1. then (Pdfops.Op_w width)::operators else operators + in + let content' = [Pdfops.stream_of_ops operators] in + {page with Pdfpage.content = content'} + in + Cpdfpage.process_pages (Cpdfutil.ppstub thinpage) pdf range + +(* Parse the new content to make sure syntactically ok, append + * as required. Rewrite the content *) +let append_page_content_page fast s before pdf n page = + let ops = + Pdfops.parse_stream pdf page.Pdfpage.resources [bytes_of_string s] + in + (if before then Pdfpage.prepend_operators else Pdfpage.postpend_operators) + pdf ops ~fast page + +let append_page_content s before fast range pdf = + Cpdfpage.process_pages (Cpdfutil.ppstub (append_page_content_page fast s before pdf)) pdf range + + +(* 1. Extend remove_dict_entry with search term + 2. Implement replace_dict_entry by analogy to remove_dict_entry *) +let rec dict_entry_single_object f pdf = function + | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d) + | (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) -> + f (Pdf.Stream {contents = (Pdf.recurse_dict (dict_entry_single_object f pdf) dict, data)}) + | Pdf.Array a -> Pdf.recurse_array (dict_entry_single_object f pdf) a + | x -> x + +(* FIXME are we sure that functional values can never appear in the equality here? *) +let remove_dict_entry pdf key search = + let f d = + match search with + | None -> Pdf.remove_dict_entry d key + | Some s -> + match Pdf.lookup_direct pdf key d with + | Some v when v = s -> Pdf.remove_dict_entry d key + | _ -> d + in + Pdf.objselfmap (dict_entry_single_object f pdf) pdf; + pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict + +let replace_dict_entry pdf key value search = + let f d = + match search with + | None -> Pdf.replace_dict_entry d key value + | Some s -> + match Pdf.lookup_direct pdf key d with + | Some v when v = s -> Pdf.replace_dict_entry d key value + | _ -> d + in + Pdf.objselfmap (dict_entry_single_object f pdf) pdf; + pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict + +(* FIXME no need to self map here, since nothing changes *) +let print_dict_entry pdf key = + let f d = + match Pdf.lookup_direct pdf key d with + | Some v -> + (* We use a double newline as a separator. *) + Printf.printf "%s\n\n" (Cpdfyojson.Safe.to_string (Cpdfjson.json_of_object pdf (fun _ -> ()) false false v)); + d + | None -> d + in + Pdf.objselfmap (dict_entry_single_object f pdf) pdf; + pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict + +let remove_clipping_ops pdf resources content = + let ops = Pdfops.parse_operators pdf resources content in + let rec process a = function + Pdfops.Op_W::Pdfops.Op_n::t -> process (Pdfops.Op_n::a) t + | h::t -> process (h::a) t + | [] -> rev a + in + [Pdfops.stream_of_ops (process [] ops)] + +let remove_clipping pdf range = + let remove_clipping_page _ page = + let content' = + remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content + in + Cpdfutil.process_xobjects pdf page remove_clipping_ops; + {page with Pdfpage.content = content'} + in + Cpdfpage.process_pages (Cpdfutil.ppstub remove_clipping_page) pdf range + +let remove_unused_resources_page pdf n page = + let xobjects, all_names = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d + | _ -> Pdf.Dictionary [], [] + in + let names_to_keep = + option_map + (function Pdfops.Op_Do n -> Some n | _ -> None) + (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) + in + let names_to_remove = lose (mem' names_to_keep) all_names in + let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in + {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} + +let remove_unused_resources pdf = + Cpdfpage.process_pages (Cpdfutil.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) diff --git a/cpdftweak.mli b/cpdftweak.mli new file mode 100644 index 0000000..c002aec --- /dev/null +++ b/cpdftweak.mli @@ -0,0 +1,25 @@ +(** {2 Miscellany} *) + +(** Make all lines in the PDF at least a certain thickness. *) +val thinlines : int list -> float -> Pdf.t -> Pdf.t + +(** Make all text on certain pages black. *) +val blacktext : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t + +(** Make all lines on certain pages black. *) +val blacklines : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t + +(** Make all fills on certain pages black. *) +val blackfills : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t + +val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t + +val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit + +val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit + +val print_dict_entry : Pdf.t -> string -> unit + +val remove_clipping : Pdf.t -> int list -> Pdf.t + +val remove_unused_resources : Pdf.t -> Pdf.t