More splitting up of source files
This commit is contained in:
parent
f6631aa101
commit
4e12e01848
161
cpdf.ml
161
cpdf.ml
|
@ -4488,3 +4488,164 @@ let image_resolution pdf range dpi =
|
|||
image_results := [];
|
||||
image_resolution pdf range dpi;
|
||||
rev !image_results
|
||||
|
||||
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
|
||||
the contents of the mediabox will be used if the from fox is not available. If
|
||||
mediabox_is_missing is false, the page is unaltered. *)
|
||||
let copy_box f t mediabox_if_missing pdf range =
|
||||
process_pages
|
||||
(ppstub (fun _ page ->
|
||||
if f = "/MediaBox" then
|
||||
{page with Pdfpage.rest =
|
||||
(Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))}
|
||||
else
|
||||
match Pdf.lookup_direct pdf f page.Pdfpage.rest with
|
||||
| Some pdfobject ->
|
||||
if t = "/MediaBox"
|
||||
then {page with
|
||||
Pdfpage.mediabox = Pdf.direct pdf pdfobject}
|
||||
else {page with Pdfpage.rest =
|
||||
(Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))}
|
||||
| None ->
|
||||
if mediabox_if_missing
|
||||
then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox}
|
||||
else page))
|
||||
pdf
|
||||
range
|
||||
|
||||
let dump_attachment out pdf (_, embeddedfile) =
|
||||
match Pdf.lookup_direct pdf "/F" embeddedfile with
|
||||
| Some (Pdf.String s) ->
|
||||
let efdata =
|
||||
begin match Pdf.lookup_direct pdf "/EF" embeddedfile with
|
||||
| Some d ->
|
||||
let stream =
|
||||
match Pdf.lookup_direct pdf "/F" d with
|
||||
| Some s -> s
|
||||
| None -> error "Bad embedded file stream"
|
||||
in
|
||||
Pdfcodec.decode_pdfstream_until_unknown pdf stream;
|
||||
begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> b | _ -> error "Bad embedded file stream" end
|
||||
| _ -> error "Bad embedded file stream"
|
||||
end
|
||||
in
|
||||
let s = remove_unsafe_characters s in
|
||||
let filename = if out = "" then s else out ^ Filename.dir_sep ^ s in
|
||||
begin try
|
||||
let fh = open_out_bin filename in
|
||||
for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done;
|
||||
close_out fh
|
||||
with
|
||||
e -> Printf.eprintf "Failed to write attachment to %s\n%!" filename;
|
||||
end
|
||||
| _ -> ()
|
||||
|
||||
let dump_attached_document pdf out =
|
||||
let root = Pdf.lookup_obj pdf pdf.Pdf.root in
|
||||
let names =
|
||||
match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary []
|
||||
in
|
||||
match Pdf.lookup_direct pdf "/EmbeddedFiles" names with
|
||||
| Some x ->
|
||||
iter (dump_attachment out pdf) (Pdf.contents_of_nametree pdf x)
|
||||
| None -> ()
|
||||
|
||||
let dump_attached_page pdf out page =
|
||||
let annots =
|
||||
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
||||
| Some (Pdf.Array l) -> l
|
||||
| _ -> []
|
||||
in
|
||||
let efannots =
|
||||
keep
|
||||
(fun annot ->
|
||||
match Pdf.lookup_direct pdf "/Subtype" annot with
|
||||
| Some (Pdf.Name "/FileAttachment") -> true
|
||||
| _ -> false)
|
||||
annots
|
||||
in
|
||||
let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in
|
||||
iter (dump_attachment out pdf) (map (fun x -> 0, x) fsannots)
|
||||
|
||||
(* Dump both document-level and page-level attached files to file, using their file names *)
|
||||
let dump_attached_files pdf out =
|
||||
try
|
||||
dump_attached_document pdf out;
|
||||
iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf)
|
||||
with
|
||||
e -> error (Printf.sprintf "Couldn't dump attached files: %s\n" (Printexc.to_string e))
|
||||
|
||||
let remove_unused_resources_page pdf n page =
|
||||
let xobjects, all_names =
|
||||
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
||||
| Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d
|
||||
| _ -> Pdf.Dictionary [], []
|
||||
in
|
||||
let names_to_keep =
|
||||
option_map
|
||||
(function Pdfops.Op_Do n -> Some n | _ -> None)
|
||||
(Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)
|
||||
in
|
||||
let names_to_remove = lose (mem' names_to_keep) all_names in
|
||||
let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in
|
||||
{page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict}
|
||||
|
||||
let remove_unused_resources pdf =
|
||||
process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))
|
||||
|
||||
let print_spot_colour n s =
|
||||
Printf.printf "%i %s\n" n s
|
||||
|
||||
let list_spot_colours pdf =
|
||||
Pdf.objiter
|
||||
(fun _ obj ->
|
||||
match obj with
|
||||
Pdf.Array (Pdf.Name "/Separation"::x::_) ->
|
||||
begin match Pdf.direct pdf x with
|
||||
Pdf.Name col -> Printf.printf "%s\n" col
|
||||
| _ -> ()
|
||||
end
|
||||
| _ -> ())
|
||||
pdf
|
||||
|
||||
(* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *)
|
||||
let add_bookmark_title filename use_title pdf =
|
||||
let title =
|
||||
if use_title then
|
||||
match get_info_utf8 pdf "/Title", get_xmp_info pdf "/Title" with
|
||||
"", x | x, "" | _, x -> x
|
||||
else
|
||||
Filename.basename filename
|
||||
in
|
||||
let marks = Pdfmarks.read_bookmarks pdf in
|
||||
let page1objnum =
|
||||
match Pdfpage.page_object_number pdf 1 with
|
||||
None -> error "add_bookmark_title: page not found"
|
||||
| Some x -> x
|
||||
in
|
||||
let newmarks =
|
||||
{Pdfmarks.level = 0;
|
||||
Pdfmarks.text = title;
|
||||
Pdfmarks.target = Pdfdest.XYZ (Pdfdest.PageObject page1objnum, None, None, None);
|
||||
Pdfmarks.isopen = false}
|
||||
::map (function m -> {m with Pdfmarks.level = m.Pdfmarks.level + 1}) marks
|
||||
in
|
||||
Pdfmarks.add_bookmarks newmarks pdf
|
||||
|
||||
let bookmarks_open_to_level n pdf =
|
||||
let marks = Pdfmarks.read_bookmarks pdf in
|
||||
let newmarks =
|
||||
map
|
||||
(fun m -> {m with Pdfmarks.isopen = m.Pdfmarks.level < n})
|
||||
marks
|
||||
in
|
||||
Pdfmarks.add_bookmarks newmarks pdf
|
||||
|
||||
let create_pdf pages pagesize =
|
||||
let page =
|
||||
{(Pdfpage.blankpage pagesize) with
|
||||
Pdfpage.content = [Pdfops.stream_of_ops []];
|
||||
Pdfpage.resources = Pdf.Dictionary []}
|
||||
in
|
||||
let pdf, pageroot = Pdfpage.add_pagetree (many page pages) (Pdf.empty ()) in
|
||||
Pdfpage.add_root pageroot [] pdf
|
||||
|
|
15
cpdf.mli
15
cpdf.mli
|
@ -397,3 +397,18 @@ val print_dict_entry : Pdf.t -> string -> unit
|
|||
val remove_clipping : Pdf.t -> int list -> Pdf.t
|
||||
|
||||
val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * float * float) list
|
||||
|
||||
val copy_box : string -> string -> bool -> Pdf.t -> int list -> Pdf.t
|
||||
|
||||
val dump_attached_files : Pdf.t -> string -> unit
|
||||
|
||||
val add_bookmark_title : string -> bool -> Pdf.t -> Pdf.t
|
||||
|
||||
val remove_unused_resources : Pdf.t -> Pdf.t
|
||||
|
||||
val list_spot_colours : Pdf.t -> unit
|
||||
|
||||
val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t
|
||||
|
||||
val create_pdf : int -> Pdfpaper.t -> Pdf.t
|
||||
|
||||
|
|
181
cpdfcommand.ml
181
cpdfcommand.ml
|
@ -915,9 +915,6 @@ let anon_fun s =
|
|||
(* If a password begins with a dash, we allow -pw=<password> too *)
|
||||
let setdashpassword = anon_fun
|
||||
|
||||
(*let firstpage pdf =
|
||||
List.hd (Pdfpage.pages_of_pagetree pdf)*)
|
||||
|
||||
(* Setting operations *)
|
||||
let setcrop s =
|
||||
setop Crop ();
|
||||
|
@ -3056,93 +3053,6 @@ let getencryption pdf =
|
|||
| Some (Pdfwrite.AES256bitISO true) -> "256bit AES ISO, Metadata encrypted"
|
||||
| Some (Pdfwrite.AES256bitISO false) -> "256bit AES ISO, Metadata not encrypted"
|
||||
|
||||
|
||||
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
|
||||
the contents of the mediabox will be used if the from fox is not available. If
|
||||
mediabox_is_missing is false, the page is unaltered. *)
|
||||
let copy_box f t mediabox_if_missing pdf range =
|
||||
Cpdf.process_pages
|
||||
(ppstub (fun _ page ->
|
||||
if f = "/MediaBox" then
|
||||
{page with Pdfpage.rest =
|
||||
(Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))}
|
||||
else
|
||||
match Pdf.lookup_direct pdf f page.Pdfpage.rest with
|
||||
| Some pdfobject ->
|
||||
if t = "/MediaBox"
|
||||
then {page with
|
||||
Pdfpage.mediabox = Pdf.direct pdf pdfobject}
|
||||
else {page with Pdfpage.rest =
|
||||
(Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))}
|
||||
| None ->
|
||||
if mediabox_if_missing
|
||||
then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox}
|
||||
else page))
|
||||
pdf
|
||||
range
|
||||
|
||||
let dump_attachment out pdf (_, embeddedfile) =
|
||||
match Pdf.lookup_direct pdf "/F" embeddedfile with
|
||||
| Some (Pdf.String s) ->
|
||||
let efdata =
|
||||
begin match Pdf.lookup_direct pdf "/EF" embeddedfile with
|
||||
| Some d ->
|
||||
let stream =
|
||||
match Pdf.lookup_direct pdf "/F" d with
|
||||
| Some s -> s
|
||||
| None -> error "Bad embedded file stream"
|
||||
in
|
||||
Pdfcodec.decode_pdfstream_until_unknown pdf stream;
|
||||
begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> b | _ -> error "Bad embedded file stream" end
|
||||
| _ -> error "Bad embedded file stream"
|
||||
end
|
||||
in
|
||||
let s = remove_unsafe_characters s in
|
||||
let filename = if out = "" then s else out ^ Filename.dir_sep ^ s in
|
||||
begin try
|
||||
let fh = open_out_bin filename in
|
||||
for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done;
|
||||
close_out fh
|
||||
with
|
||||
e -> Printf.eprintf "Failed to write attachment to %s\n%!" filename;
|
||||
end
|
||||
| _ -> ()
|
||||
|
||||
let dump_attached_document pdf out =
|
||||
let root = Pdf.lookup_obj pdf pdf.Pdf.root in
|
||||
let names =
|
||||
match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary []
|
||||
in
|
||||
match Pdf.lookup_direct pdf "/EmbeddedFiles" names with
|
||||
| Some x ->
|
||||
iter (dump_attachment out pdf) (Pdf.contents_of_nametree pdf x)
|
||||
| None -> ()
|
||||
|
||||
let dump_attached_page pdf out page =
|
||||
let annots =
|
||||
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
||||
| Some (Pdf.Array l) -> l
|
||||
| _ -> []
|
||||
in
|
||||
let efannots =
|
||||
keep
|
||||
(fun annot ->
|
||||
match Pdf.lookup_direct pdf "/Subtype" annot with
|
||||
| Some (Pdf.Name "/FileAttachment") -> true
|
||||
| _ -> false)
|
||||
annots
|
||||
in
|
||||
let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in
|
||||
iter (dump_attachment out pdf) (map (fun x -> 0, x) fsannots)
|
||||
|
||||
(* Dump both document-level and page-level attached files to file, using their file names *)
|
||||
let dump_attached_files pdf out =
|
||||
try
|
||||
dump_attached_document pdf out;
|
||||
iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf)
|
||||
with
|
||||
e -> error (Printf.sprintf "Couldn't dump attached files: %s\n" (Printexc.to_string e))
|
||||
|
||||
(* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *)
|
||||
let equalize_pages_extend main stamp =
|
||||
let length_stamp = Pdfpage.endpage stamp
|
||||
|
@ -3156,81 +3066,6 @@ let equalize_pages_extend main stamp =
|
|||
then chop stamp length_main
|
||||
else extend_lastpage (last (Pdfpage.pages_of_pagetree stamp)) stamp (length_main - length_stamp)
|
||||
|
||||
let remove_unused_resources_page pdf n page =
|
||||
let xobjects, all_names =
|
||||
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
||||
| Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d
|
||||
| _ -> Pdf.Dictionary [], []
|
||||
in
|
||||
let names_to_keep =
|
||||
option_map
|
||||
(function Pdfops.Op_Do n -> Some n | _ -> None)
|
||||
(Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)
|
||||
in
|
||||
let names_to_remove = lose (mem' names_to_keep) all_names in
|
||||
let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in
|
||||
{page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict}
|
||||
|
||||
let remove_unused_resources pdf =
|
||||
Cpdf.process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))
|
||||
|
||||
let print_spot_colour n s =
|
||||
Printf.printf "%i %s\n" n s
|
||||
|
||||
let list_spot_colours pdf =
|
||||
Pdf.objiter
|
||||
(fun _ obj ->
|
||||
match obj with
|
||||
Pdf.Array (Pdf.Name "/Separation"::x::_) ->
|
||||
begin match Pdf.direct pdf x with
|
||||
Pdf.Name col -> Printf.printf "%s\n" col
|
||||
| _ -> ()
|
||||
end
|
||||
| _ -> ())
|
||||
pdf
|
||||
|
||||
(* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *)
|
||||
let add_bookmark_title filename use_title pdf =
|
||||
let title =
|
||||
if use_title then
|
||||
match Cpdf.get_info_utf8 pdf "/Title", Cpdf.get_xmp_info pdf "/Title" with
|
||||
"", x | x, "" | _, x -> x
|
||||
else
|
||||
Filename.basename filename
|
||||
in
|
||||
let marks = Pdfmarks.read_bookmarks pdf in
|
||||
let page1objnum =
|
||||
match Pdfpage.page_object_number pdf 1 with
|
||||
None -> error "add_bookmark_title: page not found"
|
||||
| Some x -> x
|
||||
in
|
||||
let newmarks =
|
||||
{Pdfmarks.level = 0;
|
||||
Pdfmarks.text = title;
|
||||
Pdfmarks.target = Pdfdest.XYZ (Pdfdest.PageObject page1objnum, None, None, None);
|
||||
Pdfmarks.isopen = false}
|
||||
::map (function m -> {m with Pdfmarks.level = m.Pdfmarks.level + 1}) marks
|
||||
in
|
||||
Pdfmarks.add_bookmarks newmarks pdf
|
||||
|
||||
let bookmarks_open_to_level n pdf =
|
||||
let marks = Pdfmarks.read_bookmarks pdf in
|
||||
let newmarks =
|
||||
map
|
||||
(fun m -> {m with Pdfmarks.isopen = m.Pdfmarks.level < n})
|
||||
marks
|
||||
in
|
||||
Pdfmarks.add_bookmarks newmarks pdf
|
||||
|
||||
let create_pdf pages pagesize =
|
||||
let page =
|
||||
{(Pdfpage.blankpage args.createpdf_pagesize) with
|
||||
Pdfpage.content = [Pdfops.stream_of_ops []];
|
||||
Pdfpage.resources = Pdf.Dictionary []}
|
||||
in
|
||||
let pdf, pageroot = Pdfpage.add_pagetree (many page args.createpdf_pages) (Pdf.empty ()) in
|
||||
Pdfpage.add_root pageroot [] pdf
|
||||
|
||||
let write_json output pdf =
|
||||
match output with
|
||||
| NoOutputSpecified ->
|
||||
|
@ -3317,7 +3152,7 @@ let go () =
|
|||
let pdfs =
|
||||
if args.merge_add_bookmarks then
|
||||
map2
|
||||
(fun filename pdf -> add_bookmark_title filename args.merge_add_bookmarks_use_titles pdf)
|
||||
(fun filename pdf -> Cpdf.add_bookmark_title filename args.merge_add_bookmarks_use_titles pdf)
|
||||
(map (function InFile s -> s | StdIn -> "" | AlreadyInMemory _ -> "") names)
|
||||
pdfs
|
||||
else
|
||||
|
@ -3348,7 +3183,7 @@ let go () =
|
|||
begin match args.inputs, args.out with
|
||||
| _::_, _ ->
|
||||
let pdf = get_single_pdf (Some RemoveUnusedResources) false in
|
||||
let outpdf = remove_unused_resources pdf in
|
||||
let outpdf = Cpdf.remove_unused_resources pdf in
|
||||
write_pdf true outpdf
|
||||
| _ -> error "RemoveUnusedResources: bad command line"
|
||||
end
|
||||
|
@ -3523,7 +3358,7 @@ let go () =
|
|||
| _ -> error "Copy box: no tobox or no frombox specified"
|
||||
end
|
||||
in
|
||||
let pdf = copy_box f t args.mediabox_if_missing pdf range in
|
||||
let pdf = Cpdf.copy_box f t args.mediabox_if_missing pdf range in
|
||||
write_pdf false pdf
|
||||
| _ -> error "Copy Box: bad command line"
|
||||
end
|
||||
|
@ -3866,8 +3701,8 @@ let go () =
|
|||
| Some DumpAttachedFiles ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
begin match args.out with
|
||||
| NoOutputSpecified -> dump_attached_files pdf ""
|
||||
| File n -> dump_attached_files pdf n
|
||||
| NoOutputSpecified -> Cpdf.dump_attached_files pdf ""
|
||||
| File n -> Cpdf.dump_attached_files pdf n
|
||||
| Stdout -> error "Can't dump attachments to stdout"
|
||||
end
|
||||
| Some RemoveAttachedFiles ->
|
||||
|
@ -4109,7 +3944,7 @@ let go () =
|
|||
Cpdf.print_dict_entry pdf key
|
||||
| Some ListSpotColours ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
list_spot_colours pdf
|
||||
Cpdf.list_spot_colours pdf
|
||||
| Some RemoveClipping ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||
|
@ -4131,9 +3966,9 @@ let go () =
|
|||
Cpdffont.embed_missing_fonts args.path_to_ghostscript args.gs_quiet fi fo
|
||||
| Some (BookmarksOpenToLevel n) ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
write_pdf false (bookmarks_open_to_level n pdf)
|
||||
write_pdf false (Cpdf.bookmarks_open_to_level n pdf)
|
||||
| Some CreatePDF ->
|
||||
let pdf = create_pdf args.createpdf_pages args.createpdf_pagesize in
|
||||
let pdf = Cpdf.create_pdf args.createpdf_pages args.createpdf_pagesize in
|
||||
write_pdf false pdf
|
||||
| Some RemoveAllText ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
|
|
Loading…
Reference in New Issue