Fixed -split to preserve/create object streams, leading to smaller

splits
This commit is contained in:
John Whitington 2014-09-25 14:20:23 +01:00
parent 2aab644fea
commit 5fb0a3b7cc
3 changed files with 18 additions and 8 deletions

11
cpdf.ml
View File

@ -1266,7 +1266,7 @@ let name_of_spec printf marks (pdf : Pdf.t) splitlevel spec n filename startpage
let stem s = let stem s =
implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s)))))) implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s))))))
let fast_write_split_pdfs enc printf splitlevel original_filename linearize nobble spec main_pdf pagenums pdf_pages = let fast_write_split_pdfs enc printf splitlevel original_filename linearize preserve_objstm create_objstm nobble spec main_pdf pagenums pdf_pages =
let marks = Pdfmarks.read_bookmarks main_pdf in let marks = Pdfmarks.read_bookmarks main_pdf in
iter2 iter2
(fun number pagenums -> (fun number pagenums ->
@ -1274,13 +1274,14 @@ let fast_write_split_pdfs enc printf splitlevel original_filename linearize nobb
let startpage, endpage = extremes pagenums in let startpage, endpage = extremes pagenums in
let name = name_of_spec printf marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in let name = name_of_spec printf marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
Pdf.remove_unreferenced pdf; Pdf.remove_unreferenced pdf;
Pdfwrite.pdf_to_file_options linearize enc (not (enc = None)) pdf name) Pdfwrite.pdf_to_file_options ~preserve_objstm ~generate_objstm:create_objstm linearize enc (not (enc = None)) pdf name)
(indx pagenums) (indx pagenums)
pagenums pagenums
let split_pdf enc printf original_filename chunksize linearize nobble spec pdf = let split_pdf enc printf original_filename chunksize linearize ~preserve_objstm ~create_objstm nobble spec pdf =
let pdf_pages = Pdfpage.pages_of_pagetree pdf in let pdf_pages = Pdfpage.pages_of_pagetree pdf in
fast_write_split_pdfs enc printf 0 original_filename linearize nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages fast_write_split_pdfs enc printf 0 original_filename linearize preserve_objstm
create_objstm nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *) (* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
let bookmark_pages level pdf = let bookmark_pages level pdf =
@ -1302,7 +1303,7 @@ let split_at_bookmarks original_filename linearize nobble level spec pdf =
let pts = splitat points (indx pdf_pages) in let pts = splitat points (indx pdf_pages) in
(*flprint "Calling fast_write_split_pdfs\n";*) (*flprint "Calling fast_write_split_pdfs\n";*)
fast_write_split_pdfs None false level fast_write_split_pdfs None false level
original_filename linearize nobble spec pdf pts pdf_pages original_filename linearize false false nobble spec pdf pts pdf_pages
(* Called from cpdflib.ml - different from above *) (* Called from cpdflib.ml - different from above *)
let split_on_bookmarks pdf level = let split_on_bookmarks pdf level =

View File

@ -194,8 +194,15 @@ val split_at_bookmarks : string -> bool -> (Pdf.t -> Pdf.t) -> int -> string ->
(** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *) (** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *)
val split_on_bookmarks : Pdf.t -> int -> Pdf.t list val split_on_bookmarks : Pdf.t -> int -> Pdf.t list
(** [split_pdf printf chunksize linearize nobble spec pdf] splits a PDF to files given a chunk size (number of pages in each split), a file specification (see cpdfmanual.pdf). If linearize is true each output is linearized. [printf] and [nobble] are undocumented and should be false. *) (** [split_pdf printf chunksize linearize preserve_objstm create_objstm nobble
val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit spec pdf] splits a PDF to files given a chunk size (number of pages in each
split), a file specification (see cpdfmanual.pdf). If [linearize] is true
each output is linearized. If [preserve_objstm] is true, object streams will
be used if the input file had them. If [create_objstm] is true, object
streams will be created in any event. [printf] and [nobble] are
undocumented and should be false. *)
val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool ->
preserve_objstm:bool -> create_objstm:bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit
(** {2 Listing fonts} *) (** {2 Listing fonts} *)

View File

@ -3172,7 +3172,9 @@ let go () =
Pdfwrite.user_password = args.user; Pdfwrite.user_password = args.user;
Pdfwrite.permissions = banlist_of_args ()} Pdfwrite.permissions = banlist_of_args ()}
in in
Cpdf.split_pdf enc args.printf_format filename args.chunksize args.linearize nobble output_spec pdf Cpdf.split_pdf
enc args.printf_format filename args.chunksize args.linearize
args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *) nobble output_spec pdf
| _, Stdout -> error "Can't split to standard output" | _, Stdout -> error "Can't split to standard output"
| _, NoOutputSpecified -> error "Split: No output format specified" | _, NoOutputSpecified -> error "Split: No output format specified"
| _ -> error "Split: bad parameters" | _ -> error "Split: bad parameters"