diff --git a/cpdf.ml b/cpdf.ml index 7218ae8..e3931f9 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -4488,3 +4488,164 @@ let image_resolution pdf range dpi = image_results := []; image_resolution pdf range dpi; rev !image_results + +(* copy the contents of the box f to the box t. If mediabox_if_missing is set, +the contents of the mediabox will be used if the from fox is not available. If +mediabox_is_missing is false, the page is unaltered. *) +let copy_box f t mediabox_if_missing pdf range = + process_pages + (ppstub (fun _ page -> + if f = "/MediaBox" then + {page with Pdfpage.rest = + (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} + else + match Pdf.lookup_direct pdf f page.Pdfpage.rest with + | Some pdfobject -> + if t = "/MediaBox" + then {page with + Pdfpage.mediabox = Pdf.direct pdf pdfobject} + else {page with Pdfpage.rest = + (Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))} + | None -> + if mediabox_if_missing + then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox} + else page)) + pdf + range + +let dump_attachment out pdf (_, embeddedfile) = + match Pdf.lookup_direct pdf "/F" embeddedfile with + | Some (Pdf.String s) -> + let efdata = + begin match Pdf.lookup_direct pdf "/EF" embeddedfile with + | Some d -> + let stream = + match Pdf.lookup_direct pdf "/F" d with + | Some s -> s + | None -> error "Bad embedded file stream" + in + Pdfcodec.decode_pdfstream_until_unknown pdf stream; + begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> b | _ -> error "Bad embedded file stream" end + | _ -> error "Bad embedded file stream" + end + in + let s = remove_unsafe_characters s in + let filename = if out = "" then s else out ^ Filename.dir_sep ^ s in + begin try + let fh = open_out_bin filename in + for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done; + close_out fh + with + e -> Printf.eprintf "Failed to write attachment to %s\n%!" filename; + end + | _ -> () + +let dump_attached_document pdf out = + let root = Pdf.lookup_obj pdf pdf.Pdf.root in + let names = + match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary [] + in + match Pdf.lookup_direct pdf "/EmbeddedFiles" names with + | Some x -> + iter (dump_attachment out pdf) (Pdf.contents_of_nametree pdf x) + | None -> () + +let dump_attached_page pdf out page = + let annots = + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array l) -> l + | _ -> [] + in + let efannots = + keep + (fun annot -> + match Pdf.lookup_direct pdf "/Subtype" annot with + | Some (Pdf.Name "/FileAttachment") -> true + | _ -> false) + annots + in + let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in + iter (dump_attachment out pdf) (map (fun x -> 0, x) fsannots) + +(* Dump both document-level and page-level attached files to file, using their file names *) +let dump_attached_files pdf out = + try + dump_attached_document pdf out; + iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf) + with + e -> error (Printf.sprintf "Couldn't dump attached files: %s\n" (Printexc.to_string e)) + +let remove_unused_resources_page pdf n page = + let xobjects, all_names = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d + | _ -> Pdf.Dictionary [], [] + in + let names_to_keep = + option_map + (function Pdfops.Op_Do n -> Some n | _ -> None) + (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) + in + let names_to_remove = lose (mem' names_to_keep) all_names in + let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in + {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} + +let remove_unused_resources pdf = + process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) + +let print_spot_colour n s = + Printf.printf "%i %s\n" n s + +let list_spot_colours pdf = + Pdf.objiter + (fun _ obj -> + match obj with + Pdf.Array (Pdf.Name "/Separation"::x::_) -> + begin match Pdf.direct pdf x with + Pdf.Name col -> Printf.printf "%s\n" col + | _ -> () + end + | _ -> ()) + pdf + +(* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *) +let add_bookmark_title filename use_title pdf = + let title = + if use_title then + match get_info_utf8 pdf "/Title", get_xmp_info pdf "/Title" with + "", x | x, "" | _, x -> x + else + Filename.basename filename + in + let marks = Pdfmarks.read_bookmarks pdf in + let page1objnum = + match Pdfpage.page_object_number pdf 1 with + None -> error "add_bookmark_title: page not found" + | Some x -> x + in + let newmarks = + {Pdfmarks.level = 0; + Pdfmarks.text = title; + Pdfmarks.target = Pdfdest.XYZ (Pdfdest.PageObject page1objnum, None, None, None); + Pdfmarks.isopen = false} + ::map (function m -> {m with Pdfmarks.level = m.Pdfmarks.level + 1}) marks + in + Pdfmarks.add_bookmarks newmarks pdf + +let bookmarks_open_to_level n pdf = + let marks = Pdfmarks.read_bookmarks pdf in + let newmarks = + map + (fun m -> {m with Pdfmarks.isopen = m.Pdfmarks.level < n}) + marks + in + Pdfmarks.add_bookmarks newmarks pdf + +let create_pdf pages pagesize = + let page = + {(Pdfpage.blankpage pagesize) with + Pdfpage.content = [Pdfops.stream_of_ops []]; + Pdfpage.resources = Pdf.Dictionary []} + in + let pdf, pageroot = Pdfpage.add_pagetree (many page pages) (Pdf.empty ()) in + Pdfpage.add_root pageroot [] pdf diff --git a/cpdf.mli b/cpdf.mli index e73beae..636d555 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -397,3 +397,18 @@ val print_dict_entry : Pdf.t -> string -> unit val remove_clipping : Pdf.t -> int list -> Pdf.t val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * float * float) list + +val copy_box : string -> string -> bool -> Pdf.t -> int list -> Pdf.t + +val dump_attached_files : Pdf.t -> string -> unit + +val add_bookmark_title : string -> bool -> Pdf.t -> Pdf.t + +val remove_unused_resources : Pdf.t -> Pdf.t + +val list_spot_colours : Pdf.t -> unit + +val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t + +val create_pdf : int -> Pdfpaper.t -> Pdf.t + diff --git a/cpdfcommand.ml b/cpdfcommand.ml index c92d1fb..01c0982 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -915,9 +915,6 @@ let anon_fun s = (* If a password begins with a dash, we allow -pw= too *) let setdashpassword = anon_fun -(*let firstpage pdf = - List.hd (Pdfpage.pages_of_pagetree pdf)*) - (* Setting operations *) let setcrop s = setop Crop (); @@ -3056,93 +3053,6 @@ let getencryption pdf = | Some (Pdfwrite.AES256bitISO true) -> "256bit AES ISO, Metadata encrypted" | Some (Pdfwrite.AES256bitISO false) -> "256bit AES ISO, Metadata not encrypted" - -(* copy the contents of the box f to the box t. If mediabox_if_missing is set, -the contents of the mediabox will be used if the from fox is not available. If -mediabox_is_missing is false, the page is unaltered. *) -let copy_box f t mediabox_if_missing pdf range = - Cpdf.process_pages - (ppstub (fun _ page -> - if f = "/MediaBox" then - {page with Pdfpage.rest = - (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} - else - match Pdf.lookup_direct pdf f page.Pdfpage.rest with - | Some pdfobject -> - if t = "/MediaBox" - then {page with - Pdfpage.mediabox = Pdf.direct pdf pdfobject} - else {page with Pdfpage.rest = - (Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))} - | None -> - if mediabox_if_missing - then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox} - else page)) - pdf - range - -let dump_attachment out pdf (_, embeddedfile) = - match Pdf.lookup_direct pdf "/F" embeddedfile with - | Some (Pdf.String s) -> - let efdata = - begin match Pdf.lookup_direct pdf "/EF" embeddedfile with - | Some d -> - let stream = - match Pdf.lookup_direct pdf "/F" d with - | Some s -> s - | None -> error "Bad embedded file stream" - in - Pdfcodec.decode_pdfstream_until_unknown pdf stream; - begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> b | _ -> error "Bad embedded file stream" end - | _ -> error "Bad embedded file stream" - end - in - let s = remove_unsafe_characters s in - let filename = if out = "" then s else out ^ Filename.dir_sep ^ s in - begin try - let fh = open_out_bin filename in - for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done; - close_out fh - with - e -> Printf.eprintf "Failed to write attachment to %s\n%!" filename; - end - | _ -> () - -let dump_attached_document pdf out = - let root = Pdf.lookup_obj pdf pdf.Pdf.root in - let names = - match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary [] - in - match Pdf.lookup_direct pdf "/EmbeddedFiles" names with - | Some x -> - iter (dump_attachment out pdf) (Pdf.contents_of_nametree pdf x) - | None -> () - -let dump_attached_page pdf out page = - let annots = - match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with - | Some (Pdf.Array l) -> l - | _ -> [] - in - let efannots = - keep - (fun annot -> - match Pdf.lookup_direct pdf "/Subtype" annot with - | Some (Pdf.Name "/FileAttachment") -> true - | _ -> false) - annots - in - let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in - iter (dump_attachment out pdf) (map (fun x -> 0, x) fsannots) - -(* Dump both document-level and page-level attached files to file, using their file names *) -let dump_attached_files pdf out = - try - dump_attached_document pdf out; - iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf) - with - e -> error (Printf.sprintf "Couldn't dump attached files: %s\n" (Printexc.to_string e)) - (* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *) let equalize_pages_extend main stamp = let length_stamp = Pdfpage.endpage stamp @@ -3156,81 +3066,6 @@ let equalize_pages_extend main stamp = then chop stamp length_main else extend_lastpage (last (Pdfpage.pages_of_pagetree stamp)) stamp (length_main - length_stamp) -let remove_unused_resources_page pdf n page = - let xobjects, all_names = - match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with - | Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d - | _ -> Pdf.Dictionary [], [] - in - let names_to_keep = - option_map - (function Pdfops.Op_Do n -> Some n | _ -> None) - (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) - in - let names_to_remove = lose (mem' names_to_keep) all_names in - let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in - {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} - -let remove_unused_resources pdf = - Cpdf.process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf)) - -let print_spot_colour n s = - Printf.printf "%i %s\n" n s - -let list_spot_colours pdf = - Pdf.objiter - (fun _ obj -> - match obj with - Pdf.Array (Pdf.Name "/Separation"::x::_) -> - begin match Pdf.direct pdf x with - Pdf.Name col -> Printf.printf "%s\n" col - | _ -> () - end - | _ -> ()) - pdf - -(* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *) -let add_bookmark_title filename use_title pdf = - let title = - if use_title then - match Cpdf.get_info_utf8 pdf "/Title", Cpdf.get_xmp_info pdf "/Title" with - "", x | x, "" | _, x -> x - else - Filename.basename filename - in - let marks = Pdfmarks.read_bookmarks pdf in - let page1objnum = - match Pdfpage.page_object_number pdf 1 with - None -> error "add_bookmark_title: page not found" - | Some x -> x - in - let newmarks = - {Pdfmarks.level = 0; - Pdfmarks.text = title; - Pdfmarks.target = Pdfdest.XYZ (Pdfdest.PageObject page1objnum, None, None, None); - Pdfmarks.isopen = false} - ::map (function m -> {m with Pdfmarks.level = m.Pdfmarks.level + 1}) marks - in - Pdfmarks.add_bookmarks newmarks pdf - -let bookmarks_open_to_level n pdf = - let marks = Pdfmarks.read_bookmarks pdf in - let newmarks = - map - (fun m -> {m with Pdfmarks.isopen = m.Pdfmarks.level < n}) - marks - in - Pdfmarks.add_bookmarks newmarks pdf - -let create_pdf pages pagesize = - let page = - {(Pdfpage.blankpage args.createpdf_pagesize) with - Pdfpage.content = [Pdfops.stream_of_ops []]; - Pdfpage.resources = Pdf.Dictionary []} - in - let pdf, pageroot = Pdfpage.add_pagetree (many page args.createpdf_pages) (Pdf.empty ()) in - Pdfpage.add_root pageroot [] pdf - let write_json output pdf = match output with | NoOutputSpecified -> @@ -3317,7 +3152,7 @@ let go () = let pdfs = if args.merge_add_bookmarks then map2 - (fun filename pdf -> add_bookmark_title filename args.merge_add_bookmarks_use_titles pdf) + (fun filename pdf -> Cpdf.add_bookmark_title filename args.merge_add_bookmarks_use_titles pdf) (map (function InFile s -> s | StdIn -> "" | AlreadyInMemory _ -> "") names) pdfs else @@ -3348,7 +3183,7 @@ let go () = begin match args.inputs, args.out with | _::_, _ -> let pdf = get_single_pdf (Some RemoveUnusedResources) false in - let outpdf = remove_unused_resources pdf in + let outpdf = Cpdf.remove_unused_resources pdf in write_pdf true outpdf | _ -> error "RemoveUnusedResources: bad command line" end @@ -3523,7 +3358,7 @@ let go () = | _ -> error "Copy box: no tobox or no frombox specified" end in - let pdf = copy_box f t args.mediabox_if_missing pdf range in + let pdf = Cpdf.copy_box f t args.mediabox_if_missing pdf range in write_pdf false pdf | _ -> error "Copy Box: bad command line" end @@ -3866,8 +3701,8 @@ let go () = | Some DumpAttachedFiles -> let pdf = get_single_pdf args.op false in begin match args.out with - | NoOutputSpecified -> dump_attached_files pdf "" - | File n -> dump_attached_files pdf n + | NoOutputSpecified -> Cpdf.dump_attached_files pdf "" + | File n -> Cpdf.dump_attached_files pdf n | Stdout -> error "Can't dump attachments to stdout" end | Some RemoveAttachedFiles -> @@ -4109,7 +3944,7 @@ let go () = Cpdf.print_dict_entry pdf key | Some ListSpotColours -> let pdf = get_single_pdf args.op false in - list_spot_colours pdf + Cpdf.list_spot_colours pdf | Some RemoveClipping -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in @@ -4131,9 +3966,9 @@ let go () = Cpdffont.embed_missing_fonts args.path_to_ghostscript args.gs_quiet fi fo | Some (BookmarksOpenToLevel n) -> let pdf = get_single_pdf args.op false in - write_pdf false (bookmarks_open_to_level n pdf) + write_pdf false (Cpdf.bookmarks_open_to_level n pdf) | Some CreatePDF -> - let pdf = create_pdf args.createpdf_pages args.createpdf_pagesize in + let pdf = Cpdf.create_pdf args.createpdf_pages args.createpdf_pagesize in write_pdf false pdf | Some RemoveAllText -> let pdf = get_single_pdf args.op false in