Moved split writing into cpdfcommand.ml, where it belongs.

This commit is contained in:
John Whitington 2014-10-28 12:56:45 +00:00
parent 15f57035fb
commit e782298197
3 changed files with 117 additions and 14 deletions

View File

@ -1192,7 +1192,7 @@ let really_write_pdf ~preserve_objstm ~create_objstm ?(encryption = None) ?(cpdf
raise (Pdf.PDFError "linearizer failed") raise (Pdf.PDFError "linearizer failed")
end end
let fast_write_split_pdfs (*let fast_write_split_pdfs
recrypt was_encrypted was_decrypted_with_owner enc splitlevel recrypt was_encrypted was_decrypted_with_owner enc splitlevel
original_filename linearize ?(cpdflin = None) preserve_objstm original_filename linearize ?(cpdflin = None) preserve_objstm
create_objstm sq nobble spec main_pdf pagenums pdf_pages create_objstm sq nobble spec main_pdf pagenums pdf_pages
@ -1218,7 +1218,7 @@ let split_pdf
fast_write_split_pdfs fast_write_split_pdfs
recrypt was_encrypted was_decrypted_with_owner recrypt was_encrypted was_decrypted_with_owner
enc 0 original_filename linearize preserve_objstm create_objstm enc 0 original_filename linearize preserve_objstm create_objstm
squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages*)
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *) (* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
let bookmark_pages level pdf = let bookmark_pages level pdf =
@ -1227,7 +1227,7 @@ let bookmark_pages level pdf =
(function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target pdf l.Pdfmarks.target) | _ -> None) (function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target pdf l.Pdfmarks.target) | _ -> None)
(Pdfmarks.read_bookmarks pdf)) (Pdfmarks.read_bookmarks pdf))
let split_at_bookmarks (*let split_at_bookmarks
recrypt was_encrypted was_decrypted_with_owner original_filename linearize recrypt was_encrypted was_decrypted_with_owner original_filename linearize
~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble level spec pdf ~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble level spec pdf
= =
@ -1240,7 +1240,7 @@ let split_at_bookmarks
fast_write_split_pdfs fast_write_split_pdfs
recrypt was_encrypted was_decrypted_with_owner None level recrypt was_encrypted was_decrypted_with_owner None level
original_filename linearize preserve_objstm create_objstm original_filename linearize preserve_objstm create_objstm
squeeze nobble spec pdf pts pdf_pages squeeze nobble spec pdf pts pdf_pages*)
(* Called from cpdflib.ml - different from above *) (* Called from cpdflib.ml - different from above *)
let split_on_bookmarks pdf level = let split_on_bookmarks pdf level =

View File

@ -208,7 +208,7 @@ val stamp : position -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pd
the fiven level, writing to files with names given by [spec] (see the fiven level, writing to files with names given by [spec] (see
cpdfmanual.pdf). [nobble] is undocumented and should be false. If [linearize] cpdfmanual.pdf). [nobble] is undocumented and should be false. If [linearize]
is true, the files will be linearized. *) is true, the files will be linearized. *)
val split_at_bookmarks : (*val split_at_bookmarks :
bool -> bool ->
bool -> bool ->
bool -> bool ->
@ -222,7 +222,7 @@ val split_at_bookmarks :
int -> int ->
string -> string ->
Pdf.t -> Pdf.t ->
unit unit*)
(** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *) (** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *)
val split_on_bookmarks : Pdf.t -> int -> Pdf.t list val split_on_bookmarks : Pdf.t -> int -> Pdf.t list
@ -234,7 +234,7 @@ each output is linearized. If [preserve_objstm] is true, object streams will
be used if the input file had them. If [create_objstm] is true, object be used if the input file had them. If [create_objstm] is true, object
streams will be created in any event. [printf] and [nobble] are streams will be created in any event. [printf] and [nobble] are
undocumented and should be false. *) undocumented and should be false. *)
val split_pdf : (*val split_pdf :
bool -> bool ->
bool -> bool ->
bool -> bool ->
@ -249,7 +249,7 @@ val split_pdf :
(Pdf.t -> Pdf.t) -> (Pdf.t -> Pdf.t) ->
string -> string ->
Pdf.t -> Pdf.t ->
unit unit*)
(** {2 Listing fonts} *) (** {2 Listing fonts} *)

View File

@ -2026,14 +2026,119 @@ let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
with with
End_of_file -> flush stdout (*r For Windows *) End_of_file -> flush stdout (*r For Windows *)
(* Returns empty string on failure. Should only be used in conjunction with
split at bookmarks code, so should never fail, by definiton. *)
let remove_unsafe_characters s =
let chars =
lose
(function x ->
match x with
'/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true
| x when int_of_char x < 32 || int_of_char x > 126 -> true
| _ -> false)
(explode s)
in
match chars with
| '.'::more -> implode more
| chars -> implode chars
let get_bookmark_name pdf marks splitlevel n _ =
match keep (function m -> n = Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with
| {Pdfmarks.text = title}::_ -> remove_unsafe_characters title
| _ -> ""
(* @F means filename without extension *)
(* @N means sequence number with no padding *)
(* @S means start page of this section *)
(* @E means end page of this section *)
(* @B means bookmark name at start page *)
let process_others marks pdf splitlevel filename sequence startpage endpage s =
let rec procss prev = function
| [] -> rev prev
| '@'::'F'::t -> procss (rev (explode filename) @ prev) t
| '@'::'N'::t -> procss (rev (explode (string_of_int sequence)) @ prev) t
| '@'::'S'::t -> procss (rev (explode (string_of_int startpage)) @ prev) t
| '@'::'E'::t -> procss (rev (explode (string_of_int endpage)) @ prev) t
| '@'::'B'::t -> procss (rev (explode (get_bookmark_name pdf marks splitlevel startpage pdf)) @ prev) t
| h::t -> procss (h::prev) t
in
implode (procss [] (explode s))
let name_of_spec marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
let fill l n =
let chars = explode (string_of_int n) in
if length chars > l
then implode (drop chars (length chars - l))
else implode ((many '0' (l - length chars)) @ chars)
in
let chars = explode spec in
let before, including = cleavewhile (neq '%') chars in
let percents, after = cleavewhile (eq '%') including in
if percents = []
then
process_others marks pdf splitlevel filename n startpage endpage spec
else
process_others marks pdf splitlevel filename n startpage endpage
(implode before ^ fill (length percents) n ^ implode after)
(* Find the stem of a filename *)
let stem s =
implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s))))))
let fast_write_split_pdfs
enc splitlevel
original_filename linearize ?(cpdflin = None) preserve_objstm
create_objstm sq nobble spec main_pdf pagenums pdf_pages
=
let marks = Pdfmarks.read_bookmarks main_pdf in
iter2
(fun number pagenums ->
let pdf = nobble (Pdfpage.pdf_of_pages main_pdf pagenums) in
let startpage, endpage = extremes pagenums in
let name = name_of_spec marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
Pdf.remove_unreferenced pdf;
if sq then Cpdf.squeeze pdf;
really_write_pdf ~encryption:enc (not (enc = None)) pdf name)
(indx pagenums)
pagenums
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
let bookmark_pages level pdf =
setify_preserving_order
(option_map
(function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target pdf l.Pdfmarks.target) | _ -> None)
(Pdfmarks.read_bookmarks pdf))
let split_at_bookmarks
original_filename linearize
~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble level spec pdf
=
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
let points = bookmark_pages level pdf in
let points =
lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points)
in
let pts = splitat points (indx pdf_pages) in
fast_write_split_pdfs
None level
original_filename linearize preserve_objstm create_objstm
squeeze nobble spec pdf pts pdf_pages
let split_pdf
enc original_filename
chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze
nobble spec pdf
=
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
fast_write_split_pdfs
enc 0 original_filename linearize preserve_objstm create_objstm
squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages
let get_pagespec () = let get_pagespec () =
match args.inputs with match args.inputs with
| (_, ps, _, _, _)::_ -> ps | (_, ps, _, _, _)::_ -> ps
| _ -> error "get_pagespec" | _ -> error "get_pagespec"
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *) (* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
let copy_font frompdf fontname fontpage range pdf = let copy_font frompdf fontname fontpage range pdf =
match Pdf.renumber_pdfs [frompdf; pdf] with match Pdf.renumber_pdfs [frompdf; pdf] with
@ -3065,8 +3170,7 @@ let go () =
Pdfwrite.user_password = args.user; Pdfwrite.user_password = args.user;
Pdfwrite.permissions = banlist_of_args ()} Pdfwrite.permissions = banlist_of_args ()}
in in
Cpdf.split_pdf split_pdf
args.recrypt args.was_encrypted args.was_decrypted_with_owner
enc args.original_filename args.chunksize args.linearize args.cpdflin enc args.original_filename args.chunksize args.linearize args.cpdflin
args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *) args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *)
args.squeeze nobble output_spec pdf args.squeeze nobble output_spec pdf
@ -3210,8 +3314,7 @@ let go () =
| [(InFile f, _, _, _, _)] -> f | [(InFile f, _, _, _, _)] -> f
| _ -> "" | _ -> ""
in in
Cpdf.split_at_bookmarks split_at_bookmarks
args.recrypt args.was_encrypted args.was_decrypted_with_owner
filename args.linearize args.cpdflin args.preserve_objstm filename args.linearize args.cpdflin args.preserve_objstm
(* Yes *)args.preserve_objstm args.squeeze nobble level output_spec pdf (* Yes *)args.preserve_objstm args.squeeze nobble level output_spec pdf
| Stdout -> error "Can't split to standard output" | Stdout -> error "Can't split to standard output"