From 5fb0a3b7cc00e3ef13c6dd6330c374ad6a020748 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Thu, 25 Sep 2014 14:20:23 +0100 Subject: [PATCH] Fixed -split to preserve/create object streams, leading to smaller splits --- cpdf.ml | 11 ++++++----- cpdf.mli | 11 +++++++++-- cpdfcommand.ml | 4 +++- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/cpdf.ml b/cpdf.ml index a6eed2a..9b713cf 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -1266,7 +1266,7 @@ let name_of_spec printf marks (pdf : Pdf.t) splitlevel spec n filename startpage let stem s = implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s)))))) -let fast_write_split_pdfs enc printf splitlevel original_filename linearize nobble spec main_pdf pagenums pdf_pages = +let fast_write_split_pdfs enc printf splitlevel original_filename linearize preserve_objstm create_objstm nobble spec main_pdf pagenums pdf_pages = let marks = Pdfmarks.read_bookmarks main_pdf in iter2 (fun number pagenums -> @@ -1274,13 +1274,14 @@ let fast_write_split_pdfs enc printf splitlevel original_filename linearize nobb let startpage, endpage = extremes pagenums in let name = name_of_spec printf marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in Pdf.remove_unreferenced pdf; - Pdfwrite.pdf_to_file_options linearize enc (not (enc = None)) pdf name) + Pdfwrite.pdf_to_file_options ~preserve_objstm ~generate_objstm:create_objstm linearize enc (not (enc = None)) pdf name) (indx pagenums) pagenums -let split_pdf enc printf original_filename chunksize linearize nobble spec pdf = +let split_pdf enc printf original_filename chunksize linearize ~preserve_objstm ~create_objstm nobble spec pdf = let pdf_pages = Pdfpage.pages_of_pagetree pdf in - fast_write_split_pdfs enc printf 0 original_filename linearize nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages + fast_write_split_pdfs enc printf 0 original_filename linearize preserve_objstm + create_objstm nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages (* Return list, in order, a *set* of page numbers of bookmarks at a given level *) let bookmark_pages level pdf = @@ -1302,7 +1303,7 @@ let split_at_bookmarks original_filename linearize nobble level spec pdf = let pts = splitat points (indx pdf_pages) in (*flprint "Calling fast_write_split_pdfs\n";*) fast_write_split_pdfs None false level - original_filename linearize nobble spec pdf pts pdf_pages + original_filename linearize false false nobble spec pdf pts pdf_pages (* Called from cpdflib.ml - different from above *) let split_on_bookmarks pdf level = diff --git a/cpdf.mli b/cpdf.mli index a6f8f4b..908d88d 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -194,8 +194,15 @@ val split_at_bookmarks : string -> bool -> (Pdf.t -> Pdf.t) -> int -> string -> (** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *) val split_on_bookmarks : Pdf.t -> int -> Pdf.t list -(** [split_pdf printf chunksize linearize nobble spec pdf] splits a PDF to files given a chunk size (number of pages in each split), a file specification (see cpdfmanual.pdf). If linearize is true each output is linearized. [printf] and [nobble] are undocumented and should be false. *) -val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit +(** [split_pdf printf chunksize linearize preserve_objstm create_objstm nobble +spec pdf] splits a PDF to files given a chunk size (number of pages in each +split), a file specification (see cpdfmanual.pdf). If [linearize] is true +each output is linearized. If [preserve_objstm] is true, object streams will +be used if the input file had them. If [create_objstm] is true, object +streams will be created in any event. [printf] and [nobble] are +undocumented and should be false. *) +val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool -> + preserve_objstm:bool -> create_objstm:bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit (** {2 Listing fonts} *) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 0211655..814fd05 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3172,7 +3172,9 @@ let go () = Pdfwrite.user_password = args.user; Pdfwrite.permissions = banlist_of_args ()} in - Cpdf.split_pdf enc args.printf_format filename args.chunksize args.linearize nobble output_spec pdf + Cpdf.split_pdf + enc args.printf_format filename args.chunksize args.linearize + args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *) nobble output_spec pdf | _, Stdout -> error "Can't split to standard output" | _, NoOutputSpecified -> error "Split: No output format specified" | _ -> error "Split: bad parameters"