diff --git a/cpdf.ml b/cpdf.ml deleted file mode 100644 index 8c8ea13..0000000 --- a/cpdf.ml +++ /dev/null @@ -1,712 +0,0 @@ -(* CPDF Core routines *) -open Pdfutil -open Pdfio -open Cpdferror - -(* Add stack operators to a content stream to ensure it is composeable. On --fast, we don't check for Q deficit, assuming PDF is ISO. *) -let protect fast pdf resources content = - let deficit = - if fast then 0 else - let ops = Pdfops.parse_operators pdf resources content in - let qs = length (keep (eq Pdfops.Op_q) ops) in - let bigqs = length (keep (eq Pdfops.Op_Q) ops) in - let deficit = if qs > bigqs then qs - bigqs else 0 in - if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n%!" deficit; - deficit - in - let addstream ops = Pdf.addobj pdf (Pdfops.stream_of_ops ops) in - let q = addstream [Pdfops.Op_q] in - let qs = addstream (many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) in - [Pdf.Indirect q] @ content @ [Pdf.Indirect qs] - -(* Does the page have a defined box e.g "/CropBox" *) -let hasbox pdf page boxname = - let pages = Pdfpage.pages_of_pagetree pdf in - if page > length pages || page < 1 then raise (Failure "hasbox: bad page") else - let p = select page pages in - match Pdf.lookup_direct pdf boxname p.Pdfpage.rest with - | Some _ -> true - | _ -> false - - - -(* \section{Flip pages} *) -let flip_page ?(fast=false) transform_op pdf pnum page = - let minx, miny, maxx, maxy = - Pdf.parse_rectangle page.Pdfpage.mediabox - in - let tr = transform_op minx miny maxx maxy in - let page = Cpdfutil.change_pattern_matrices_page pdf tr page in - Cpdfutil.transform_annotations pdf tr page.Pdfpage.rest; - (Pdfpage.prepend_operators pdf [Pdfops.Op_cm tr] ~fast page, pnum, tr) - -let vflip_pdf ?(fast=false) pdf range = - let transform_op _ miny _ maxy = - Pdftransform.matrix_of_op - (Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.)) - in - Cpdfpage.process_pages (flip_page ~fast transform_op pdf) pdf range - -let hflip_pdf ?(fast=false) pdf range = - let transform_op minx _ maxx _ = - Pdftransform.matrix_of_op - (Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.)) - in - Cpdfpage.process_pages (flip_page ~fast transform_op pdf) pdf range - -let stamp_shift_of_position topline midline sw sh w h p = - let half x = x /. 2. - and dy = - if midline then sh /. 2. - else if topline then sh - else 0. - in - let open Cpdfposition in - match p with - | PosCentre (ox, oy) -> ox -. half sw, oy -. dy - | PosLeft (ox, oy) -> ox, oy -. dy - | PosRight (ox, oy) -> ox -. sw, oy -. dy - | Top o -> half w -. half sw, h -. o -. sh -. dy - | TopLeft o -> o, h -. sh -. o -. dy - | TopRight o -> w -. sw -. o, h -. sh -. o -. dy - | Left o -> o, half h -. half sh -. dy - | BottomLeft o -> o, o -. dy - | Bottom o -> half w -. half sw, o -. dy - | BottomRight o -> w -. sw -. o, o -. dy - | Right o -> w -. sw -. o, half h -. half sh -. dy - | Diagonal | ReverseDiagonal | Centre -> - half w -. half sw, half h -. half sh -. dy - -(* Combine Pdfpage.rest items for two PDFs. For now, we combine /Annots, and - * copy everything else from adict. What else should we combine? *) -let combine_page_items pdf adict bdict = - let getannots dict = - begin match dict with - Pdf.Dictionary d -> - begin match lookup "/Annots" d with - Some (Pdf.Array items) -> items - | _ -> [] - end - | _ -> [] - end - in - let a_annots = getannots adict in - let b_annots = getannots bdict in - match a_annots @ b_annots with - [] -> adict - | annots -> Pdf.add_dict_entry adict "/Annots" (Pdf.Array annots) - -let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover pdf o u opdf = - (* Scale page stamp o to fit page u *) - let sxmin, symin, sxmax, symax = - Pdf.parse_rectangle - (match Pdf.lookup_direct pdf "/CropBox" o.Pdfpage.rest with | Some r -> r | None -> o.Pdfpage.mediabox) - in let txmin, tymin, txmax, tymax = - Pdf.parse_rectangle - (match Pdf.lookup_direct pdf "/CropBox" u.Pdfpage.rest with | Some r -> r | None -> u.Pdfpage.mediabox) - in - let o = - if scale_to_fit then - let xmag = (txmax -. txmin) /. (sxmax -. sxmin) in - let ymag = (tymax -. tymin) /. (symax -. symin) in - let scale = - if xmag < 0.999 && ymag < 0.999 then - if xmag > ymag then xmag else ymag - else if xmag >= 1.001 && ymag >= 1.001 then - if xmag > ymag then ymag else xmag - else if xmag >= 1.001 then ymag - else xmag - in - let dx = txmin +. ((txmax -. txmin) -. (sxmax -. sxmin) *. scale) /. 2. in - let dy = tymin +. ((tymax -. tymin) -. (symax -. symin) *. scale) /. 2. in - let matrix = - (Pdftransform.matrix_of_transform - ([Pdftransform.Translate (dx, dy)] @ - (if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @ - [Pdftransform.Scale ((sxmin, symin), scale, scale)])) - in - Cpdfutil.transform_annotations pdf matrix o.Pdfpage.rest; - let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in - Cpdfutil.change_pattern_matrices_page pdf matrix r - else - let sw = sxmax -. sxmin and sh = symax -. symin - and w = txmax -. txmin and h = tymax -. tymin in - let dx, dy = stamp_shift_of_position topline midline sw sh w h position in - let matrix = - (Pdftransform.matrix_of_transform - ((if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @ - [Pdftransform.Translate (dx, dy)])) - in - Cpdfutil.transform_annotations pdf matrix o.Pdfpage.rest; - let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in - Cpdfutil.change_pattern_matrices_page pdf matrix r - in - {u with - Pdfpage.content = - (if isover then ( @ ) else ( @@ )) - (protect fast pdf u.Pdfpage.resources u.Pdfpage.content) - (protect fast pdf o.Pdfpage.resources o.Pdfpage.content); - Pdfpage.rest = - combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest; - Pdfpage.resources = - Cpdfutil.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} - -let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf = - let prefix = Pdfpage.shortest_unused_prefix pdf in - Pdfpage.add_prefix over prefix; - let marks = Pdfmarks.read_bookmarks pdf in - let marks_refnumbers = Pdf.page_reference_numbers pdf in - let pdf = Pdfmarks.remove_bookmarks pdf in - let over = Pdfmarks.remove_bookmarks over in - let pageseqs = ilist 1 (Pdfpage.endpage pdf) in - let over_firstpage_pdf = - match Pdfpage.pages_of_pagetree over with - | [] -> error "empty PDF" - | h::_ -> Pdfpage.change_pages ~changes:[(1, 1)] true over [h] - in - let merged = - Pdfmerge.merge_pdfs - false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]] - in - let merged = - {merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption} - in - let merged = Cpdfmetadata.copy_id true pdf merged in - let merged_pages = Pdfpage.pages_of_pagetree merged in - let under_pages, over_page = - all_but_last merged_pages, last merged_pages - in - let new_pages = - map2 - (fun pageseq under_page -> - do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover merged - (if mem pageseq range then over_page else - Pdfpage.blankpage Pdfpaper.a4) - under_page over) - pageseqs - under_pages - in - let changed = - let changes = - map (fun x -> (x, x)) (ilist 1 (length new_pages)) - in - Pdfpage.change_pages ~changes true merged new_pages - in - let new_refnumbers = Pdf.page_reference_numbers changed in - let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in - let new_marks = map (Cpdfbookmarks.change_bookmark changetable) marks in - Pdfmarks.add_bookmarks new_marks changed - -(* Combine pages from two PDFs. For now, assume equal length. *) - -(* If [over] has more pages than [under], chop the excess. If the converse, pad -[over] to the same length *) -let equalize_pages under over = - let length_under = Pdfpage.endpage under in - let length_over = Pdfpage.endpage over in - if length_over > length_under then - let changes = - map (fun x -> (x, x)) (ilist 1 length_under) - in - (under, - (Pdfpage.change_pages - ~changes true over (take (Pdfpage.pages_of_pagetree over) length_under))) - else if length_under > length_over then - let changes = - map (fun x -> (x, x)) (ilist 1 length_over) - in - (under, - Pdfpage.change_pages - ~changes true over - (Pdfpage.pages_of_pagetree over @ - (many (Pdfpage.blankpage Pdfpaper.a4) (length_under - length_over)))) - else - under, over - -let combine_pages fast under over scaletofit swap equalize = - let debug_combine_pages = false in - let debug_pdf pdf n = - if debug_combine_pages then - begin Pdf.remove_unreferenced pdf; Pdfwrite.pdf_to_file pdf n end - in - Pdfpage.add_prefix over (Pdfpage.shortest_unused_prefix under); - let marks_under, marks_over = Pdfmarks.read_bookmarks under, Pdfmarks.read_bookmarks over in - let under, over = if equalize then equalize_pages under over else under, over in - let under_length, over_length = Pdfpage.endpage under, Pdfpage.endpage over in - if under_length <> over_length then - raise (Pdf.PDFError "combine_pages: not of equal length") - else - let pageseqs_under = ilist 1 (Pdfpage.endpage under) in - let pageseqs_over = ilist 1 (Pdfpage.endpage over) in - let merged = - Pdfmerge.merge_pdfs - false false ["a"; "b"] [under; over] [pageseqs_under; pageseqs_over] - in - debug_pdf merged "merged.pdf"; - let under_pages, over_pages = - cleave (Pdfpage.pages_of_pagetree merged) under_length - in - let new_pages = - map2 - (fun o u -> - do_stamp - false fast (BottomLeft 0.) false false scaletofit (not swap) merged o u over) - over_pages under_pages - in - (* Build the changes. 123456 -> 123123 *) - let changes = - let len = length new_pages in - combine (ilist 1 (len * 2)) (let x = ilist 1 len in x @ x) - in - let changed = Pdfpage.change_pages ~changes true merged new_pages in - let r = Pdfmarks.add_bookmarks (marks_under @ marks_over) changed in - debug_pdf r "final.pdf"; - r - - -(* Just used by cpdflib for historical reasons *) -let setBox box minx maxx miny maxy pdf range = - let set_box_page _ page = - {page with - Pdfpage.rest = - Pdf.add_dict_entry - page.Pdfpage.rest box - (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} - in - Cpdfpage.process_pages (Cpdfutil.ppstub set_box_page) pdf range - -(* \section{Cropping} *) -let crop_pdf ?(box="/CropBox") xywhlist pdf range = - let crop_page pagenum page = - {page with - Pdfpage.rest = - (Pdf.add_dict_entry - page.Pdfpage.rest - box - (let x, y, w, h = List.nth xywhlist (pagenum - 1) in - (Pdf.Array - [Pdf.Real x; Pdf.Real y; - Pdf.Real (x +. w); Pdf.Real (y +. h)])))} - in - Cpdfpage.process_pages (Cpdfutil.ppstub crop_page) pdf range - - -(* \section{Blacken text} *) - -(* - \begin{verbatim} - Algorithm: Change - BT - - ET - - ...to... - - BT - Op_g 0. - - ET - - \end{verbatim} -*) -let blacktext_ops colour pdf resources content = - let not_text = function - | Pdfops.Op_Tj _ | Pdfops.Op_TJ _ - | Pdfops.Op_' _ | Pdfops.Op_'' (_, _, _) - | Pdfops.Op_Td (_, _) | Pdfops.Op_TD (_, _) - | Pdfops.Op_Tm _ | Pdfops.Op_T' - | Pdfops.Op_Tc _ - | Pdfops.Op_Tw _ - | Pdfops.Op_Tz _ - | Pdfops.Op_TL _ - | Pdfops.Op_Tf (_, _) - | Pdfops.Op_Tr _ - | Pdfops.Op_Ts _ -> false - | _ -> true - in let textlevel = ref 0 - in let removed = ref [] - in let operators = - Pdfops.parse_operators pdf resources content - in - let rec remove_colourops prev = function - | [] -> rev prev - | Pdfops.Op_BT::more -> - incr textlevel; - remove_colourops - (Cpdfaddtext.colour_op colour::Pdfops.Op_BT::prev) - more - | Pdfops.Op_ET::more -> - decr textlevel; - let prev' = !removed @ Pdfops.Op_ET::prev in - removed := []; - remove_colourops prev' more - | (Pdfops.Op_G _ - | Pdfops.Op_g _ - | Pdfops.Op_RG (_, _, _) - | Pdfops.Op_rg (_, _, _) - | Pdfops.Op_k (_, _, _, _) - | Pdfops.Op_K (_, _, _, _) - | Pdfops.Op_SCN _ - | Pdfops.Op_SC _ - | Pdfops.Op_scn _ - | Pdfops.Op_sc _ - | Pdfops.Op_SCNName (_, _) - | Pdfops.Op_scnName (_, _) - | Pdfops.Op_CS _ - | Pdfops.Op_cs _ - | Pdfops.Op_sh _ - | Pdfops.Op_gs _) - as op::more -> - if !textlevel > 0 - then - begin - removed =| op; - remove_colourops prev more - end - else remove_colourops (op::prev) more - | op::more -> - if !textlevel > 0 && not_text op then removed =| op; - remove_colourops (op::prev) more - in - let operators' = remove_colourops [] operators in - [Pdfops.stream_of_ops operators'] - -(* Blacken a form xobject, writing it to the same object. *) - -let blacktext c range pdf = - let blacktext_page _ page = - let content' = - blacktext_ops c pdf page.Pdfpage.resources page.Pdfpage.content - in - Cpdfutil.process_xobjects pdf page (blacktext_ops c); - {page with Pdfpage.content = content'} - in - Cpdfpage.process_pages (Cpdfutil.ppstub blacktext_page) pdf range - -(* \section{Blacken lines} *) -let blacklines_ops c pdf resources content = - let rec blacken_strokeops prev = function - | [] -> rev prev - | Pdfops.Op_CS _::t -> - blacken_strokeops (Pdfops.Op_CS "/DeviceRGB"::prev) t - | (Pdfops.Op_SC _ | Pdfops.Op_SCN _ | Pdfops.Op_SCNName _ | Pdfops.Op_G _ - | Pdfops.Op_RG _ | Pdfops.Op_K _)::t -> - blacken_strokeops (Cpdfaddtext.colour_op_stroke c::prev) t - | h::t -> blacken_strokeops (h::prev) t - and operators = - Pdfops.parse_operators pdf resources content - in - let operators' = blacken_strokeops [] operators in - [Pdfops.stream_of_ops operators'] - -let blacklines c range pdf = - let blacklines_page _ page = - let content' = - blacklines_ops c pdf page.Pdfpage.resources page.Pdfpage.content - in - Cpdfutil.process_xobjects pdf page (blacklines_ops c); - {page with Pdfpage.content = content'} - in - Cpdfpage.process_pages (Cpdfutil.ppstub blacklines_page) pdf range - -(* \section{Blacken Fills} *) -let blackfills_ops c pdf resources content = - let rec blacken_fillops prev = function - | [] -> rev prev - | Pdfops.Op_cs _::t -> - blacken_fillops (Pdfops.Op_cs "/DeviceRGB"::prev) t - | (Pdfops.Op_sc _ | Pdfops.Op_scn _ | Pdfops.Op_scnName _ | Pdfops.Op_g _ - | Pdfops.Op_rg _ | Pdfops.Op_k _)::t -> - blacken_fillops (Cpdfaddtext.colour_op c::prev) t - | h::t -> blacken_fillops (h::prev) t - and operators = - Pdfops.parse_operators pdf resources content - in - let operators' = blacken_fillops [] operators in - [Pdfops.stream_of_ops operators'] - -let blackfills c range pdf = - let blackfills_page _ page = - let content' = - blackfills_ops c pdf page.Pdfpage.resources page.Pdfpage.content - in - Cpdfutil.process_xobjects pdf page (blackfills_ops c); - {page with Pdfpage.content = content'} - in - Cpdfpage.process_pages (Cpdfutil.ppstub blackfills_page) pdf range - -(* \section{Set a minimum line width to avoid dropout} *) -let thinlines range width pdf = - let thinpage _ page = - let operators = - Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content - in - let ctmstack = ref [ref Pdftransform.i_matrix] in - let scaleof_ctm () = - try - match Pdftransform.decompose (!(hd !ctmstack)) with - (scale, _, _, _, _, _) -> - scale - with - Failure _ (*"hd"*) -> 1. - in - let rec replace_operators prev = function - | [] -> rev prev - | (Pdfops.Op_w w)::more -> - (* Alter width. *) - let width' = width /. scaleof_ctm () in - let w' = - if w >= width' then Pdfops.Op_w w else Pdfops.Op_w width' - in - replace_operators (w'::prev) more - | (Pdfops.Op_cm m)::more -> - (* Update CTM *) - begin try - let top = hd !ctmstack in - top := Pdftransform.matrix_compose !top m - with - Failure _ (*"hd"*) -> error "Malformed file." - end; - replace_operators ((Pdfops.Op_cm m)::prev) more - | Pdfops.Op_q::more -> - (* Push stack *) - begin try - ctmstack =| ref (!(hd !ctmstack)) - with - Failure _ (*"hd"*) -> error "Malformed file" - end; - replace_operators (Pdfops.Op_q::prev) more - | Pdfops.Op_Q::more -> - (* Pop stack *) - begin try - ctmstack := tl !ctmstack - with - Failure _ (*"tl"*) -> error "Malformed file" - end; - replace_operators (Pdfops.Op_Q::prev) more - | (Pdfops.Op_gs gsname)::more -> - (* Perhaps insert [Op_w]. *) - let opw = - match Pdf.lookup_direct pdf "/ExtGState" page.Pdfpage.resources with - | None -> [] - | Some ext_state_dict -> - match Pdf.lookup_direct pdf gsname ext_state_dict with - | None -> [] - | Some gdict -> - match Pdf.lookup_direct pdf "/LW" gdict with - | Some s -> (try [Pdfops.Op_w (Pdf.getnum s)] with _ -> []) - | None -> [] - in - replace_operators (opw @ ((Pdfops.Op_gs gsname)::prev)) more - | x::more -> replace_operators (x::prev) more - in - let operators = replace_operators [] operators in - (* 2. Add an initial 'w' if width more than default width *) - let operators = - if width > 1. then (Pdfops.Op_w width)::operators else operators - in - let content' = [Pdfops.stream_of_ops operators] in - {page with Pdfpage.content = content'} - in - Cpdfpage.process_pages (Cpdfutil.ppstub thinpage) pdf range - -(* Parse the new content to make sure syntactically ok, append - * as required. Rewrite the content *) -let append_page_content_page fast s before pdf n page = - let ops = - Pdfops.parse_stream pdf page.Pdfpage.resources [bytes_of_string s] - in - (if before then Pdfpage.prepend_operators else Pdfpage.postpend_operators) - pdf ops ~fast page - -let append_page_content s before fast range pdf = - Cpdfpage.process_pages (Cpdfutil.ppstub (append_page_content_page fast s before pdf)) pdf range - -(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. - * - * We use different dash lengths and colours to help distinguish coincident - * boxes The sequence of operators is postpended to the page content, - * appropriately protected to prevent pollution of matrices. - * - * /MediaBox: Solid red line - * /CropBox: Dashed 7 on 7 off green line - * /ArtBox: Dashed 5 on 5 off blue line - * /TrimBox: Dashed 3 on 3 off orange line - * /BleedBox: Dashed 2 on 2 off pink line *) -let get_rectangle pdf page box = - if box = "/MediaBox" then - match page.Pdfpage.mediabox with - Pdf.Array [a; b; c; d] as r -> Some (Pdf.parse_rectangle r) - | _ -> None - else - match Pdf.lookup_direct pdf box page.Pdfpage.rest with - Some (Pdf.Array [a; b; c; d] as r) -> Some (Pdf.parse_rectangle r) - | _ -> None - -let show_boxes_page fast pdf _ page = - let make_ops (r, g, b) on off boxname = - match get_rectangle pdf page boxname with - Some (r1, r2, r3, r4) -> - [Pdfops.Op_q; - Pdfops.Op_RG (r /. 255., g /. 255., b /. 255.); - Pdfops.Op_w 1.; - Pdfops.Op_d ((if on = 0. && off = 0. then [] else [on; off]), 0.); - Pdfops.Op_re (r1, r2, r3 -. r1, r4 -. r2); - Pdfops.Op_S; - Pdfops.Op_Q] - | None -> [] - in - let ops = - make_ops (255., 0., 0.) 0. 0. "/MediaBox" - @ make_ops (0., 255., 0.) 7. 7. "/CropBox" - @ make_ops (0., 0., 255.) 5. 5. "/ArtBox" - @ make_ops (255.,150.,0.) 3. 3. "/TrimBox" - @ make_ops (255.,9.,147.) 2. 2. "/BleedBox" - in - Pdfpage.postpend_operators pdf ops ~fast page - -let show_boxes ?(fast=false) pdf range = - Cpdfpage.process_pages (Cpdfutil.ppstub (show_boxes_page fast pdf)) pdf range - - - -let allowance = 9. - -let line (x0, y0, x1, y1) = - [Pdfops.Op_m (x0, y0); - Pdfops.Op_l (x1, y1); - Pdfops.Op_s] - -let trim_marks_page fast pdf n page = - match get_rectangle pdf page "/TrimBox", get_rectangle pdf page "/MediaBox" with - | Some (tminx, tminy, tmaxx, tmaxy), Some (minx, miny, maxx, maxy) -> - let ops = - [Pdfops.Op_q; - Pdfops.Op_K (1., 1., 1., 1.); - Pdfops.Op_w 1.] - @ line (minx, tmaxy, tminy -. allowance, tmaxy) (* top left *) - @ line (tminx, tmaxy +. allowance, tminx, maxy) - @ line (tmaxx +. allowance, tmaxy, maxx, tmaxy) (* top right *) - @ line (tmaxx, tmaxy +. allowance, tmaxx, maxy) - @ line (tmaxx +. allowance, tminy, maxx, tminy) (* bottom right *) - @ line (tmaxx, tminy -. allowance, tmaxx, miny) - @ line (tminx -. allowance, tminy, minx, tminy) (* bottom left *) - @ line (tminx, tminy -. allowance, tminx, miny) - @ [Pdfops.Op_Q] - in - Pdfpage.postpend_operators pdf ops ~fast page - | _, _ -> - (*Printf.eprintf "warning: no /TrimBox found on page %i\n%!" n;*) - page - -let trim_marks ?(fast=false) pdf range = - Cpdfpage.process_pages (Cpdfutil.ppstub (trim_marks_page fast pdf)) pdf range - -(* 1. Extend remove_dict_entry with search term - 2. Implement replace_dict_entry by analogy to remove_dict_entry *) -let rec dict_entry_single_object f pdf = function - | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d) - | (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) -> - f (Pdf.Stream {contents = (Pdf.recurse_dict (dict_entry_single_object f pdf) dict, data)}) - | Pdf.Array a -> Pdf.recurse_array (dict_entry_single_object f pdf) a - | x -> x - -(* FIXME are we sure that functional values can never appear in the equality here? *) -let remove_dict_entry pdf key search = - let f d = - match search with - | None -> Pdf.remove_dict_entry d key - | Some s -> - match Pdf.lookup_direct pdf key d with - | Some v when v = s -> Pdf.remove_dict_entry d key - | _ -> d - in - Pdf.objselfmap (dict_entry_single_object f pdf) pdf; - pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict - -let replace_dict_entry pdf key value search = - let f d = - match search with - | None -> Pdf.replace_dict_entry d key value - | Some s -> - match Pdf.lookup_direct pdf key d with - | Some v when v = s -> Pdf.replace_dict_entry d key value - | _ -> d - in - Pdf.objselfmap (dict_entry_single_object f pdf) pdf; - pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict - -(* FIXME no need to self map here, since nothing changes *) -let print_dict_entry pdf key = - let f d = - match Pdf.lookup_direct pdf key d with - | Some v -> - (* We use a double newline as a separator. *) - Printf.printf "%s\n\n" (Cpdfyojson.Safe.to_string (Cpdfjson.json_of_object pdf (fun _ -> ()) false false v)); - d - | None -> d - in - Pdf.objselfmap (dict_entry_single_object f pdf) pdf; - pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict - -let remove_clipping_ops pdf resources content = - let ops = Pdfops.parse_operators pdf resources content in - let rec process a = function - Pdfops.Op_W::Pdfops.Op_n::t -> process (Pdfops.Op_n::a) t - | h::t -> process (h::a) t - | [] -> rev a - in - [Pdfops.stream_of_ops (process [] ops)] - -let remove_clipping pdf range = - let remove_clipping_page _ page = - let content' = - remove_clipping_ops pdf page.Pdfpage.resources page.Pdfpage.content - in - Cpdfutil.process_xobjects pdf page remove_clipping_ops; - {page with Pdfpage.content = content'} - in - Cpdfpage.process_pages (Cpdfutil.ppstub remove_clipping_page) pdf range - -(* copy the contents of the box f to the box t. If mediabox_if_missing is set, -the contents of the mediabox will be used if the from fox is not available. If -mediabox_is_missing is false, the page is unaltered. *) -let copy_box f t mediabox_if_missing pdf range = - Cpdfpage.process_pages - (Cpdfutil.ppstub (fun _ page -> - if f = "/MediaBox" then - {page with Pdfpage.rest = - (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} - else - match Pdf.lookup_direct pdf f page.Pdfpage.rest with - | Some pdfobject -> - if t = "/MediaBox" - then {page with - Pdfpage.mediabox = Pdf.direct pdf pdfobject} - else {page with Pdfpage.rest = - (Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))} - | None -> - if mediabox_if_missing - then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox} - else page)) - pdf - range - -let remove_unused_resources_page pdf n page = - let xobjects, all_names = - match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with - | Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d - | _ -> Pdf.Dictionary [], [] - in - let names_to_keep = - option_map - (function Pdfops.Op_Do n -> Some n | _ -> None) - (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) - in - let names_to_remove = lose (mem' names_to_keep) all_names in - let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in - {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} - -let remove_unused_resources pdf = - Cpdfpage.process_pages (Cpdfutil.ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) diff --git a/cpdf.mli b/cpdf.mli deleted file mode 100644 index ac8fb86..0000000 --- a/cpdf.mli +++ /dev/null @@ -1,68 +0,0 @@ -(** Coherent PDF Tools Core Routines *) -open Pdfutil - -(** {2 Stamping} *) - -(** [combine_pages fast under over scaletofit swap equalize] combines the page -content of two PDFs, page-by-page. If [equalize] is true the output will have -the same number of pages as the shorter file. If [scaletofit] is true, the -[over] file will be scaled to fit. If [swap] is true, [over] and [under] are -swapped. If [fast] is true, the PDFs are assumed to be well-formed and no -fixes are done. *) -val combine_pages : bool -> Pdf.t -> Pdf.t -> bool -> bool -> bool -> Pdf.t - -(** [stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf] stamps the first page of -[over] over each page of the PDF. The arguments have the same meaning as in -[combine_pages]. *) -val stamp : bool -> Cpdfposition.position -> bool -> bool -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t - -(** {2 Page geometry} *) - -(** True if a given page in a PDF has a given box *) -val hasbox : Pdf.t -> int -> string -> bool - -(** [crop_pdf xywhlist pdf range] sets the cropbox on the given pages. *) -val crop_pdf : ?box:string -> (float * float * float * float) list -> Pdf.t -> int list -> Pdf.t - -(** [setBox boxname x y w h pdf range] sets the given box on the given pages. *) -val setBox : string -> float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t - -(** Flip the given pages vertically *) -val vflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t - -(** Flip the given pages horizontally *) -val hflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t - -val trim_marks : ?fast:bool -> Pdf.t -> int list -> Pdf.t - -val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t - -(** {2 Miscellany} *) - -(** Make all lines in the PDF at least a certain thickness. *) -val thinlines : int list -> float -> Pdf.t -> Pdf.t - -(** Make all text on certain pages black. *) -val blacktext : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t - -(** Make all lines on certain pages black. *) -val blacklines : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t - -(** Make all fills on certain pages black. *) -val blackfills : Cpdfaddtext.color -> int list -> Pdf.t -> Pdf.t - -(**/**) - -val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t - -val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit - -val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit - -val print_dict_entry : Pdf.t -> string -> unit - -val remove_clipping : Pdf.t -> int list -> Pdf.t - -val copy_box : string -> string -> bool -> Pdf.t -> int list -> Pdf.t - -val remove_unused_resources : Pdf.t -> Pdf.t diff --git a/cpdfmanual.pdf b/cpdfmanual.pdf index 435023a..2e03219 100644 Binary files a/cpdfmanual.pdf and b/cpdfmanual.pdf differ diff --git a/cpdfmanual.tex b/cpdfmanual.tex index f2a5ff5..e47e00d 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -1,17 +1,3 @@ -%Document the new JSON format, -j and new -output-json options including UTF8 -%Document -decrypt-force -%Document -collate -%Document -impose and friends (inc. 0-w, 0-h for long ones, how lines scale etc., undefined if pages different sizes) -%Document -bookmarks-json including mentioning UTF8 -%Document -list-annotations-json -%Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry -%Document new text lookup for -add-text and new -raw mode -%Document -print-font-table -%Document NOT and DUP page ranges -%Document new colourspaces for -add-text -add-rectangle -%Document -table-of-contents, -table-of-contents-title (inc. newlines), -font. Mention -upright. -%Document -typeset -%Document -idir-only-pdfs must precede -idir \documentclass{book} % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc. \usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib} @@ -58,7 +44,7 @@ Command Line Tools} \vspace{12mm} {\Huge User Manual}\\ -Version 2.4 (June 2021) +Version 2.5 (January 2022) \vspace{25mm} @@ -82,7 +68,7 @@ Version 2.4 (June 2021) \noindent For bug reports, feature requests and comments, email\\ \texttt{contact@coherentgraphics.co.uk} \vspace*{\fill} -\noindent\copyright 2021 Coherent Graphics Limited. All rights reserved. ISBN 978-0957671140 +\noindent\copyright 2022 Coherent Graphics Limited. All rights reserved. ISBN 978-0957671140 \smallgap \noindent Adobe, Acrobat, and Adobe PDF are