Moved split writing into cpdfcommand.ml, where it belongs.
This commit is contained in:
parent
15f57035fb
commit
e782298197
8
cpdf.ml
8
cpdf.ml
|
@ -1192,7 +1192,7 @@ let really_write_pdf ~preserve_objstm ~create_objstm ?(encryption = None) ?(cpdf
|
||||||
raise (Pdf.PDFError "linearizer failed")
|
raise (Pdf.PDFError "linearizer failed")
|
||||||
end
|
end
|
||||||
|
|
||||||
let fast_write_split_pdfs
|
(*let fast_write_split_pdfs
|
||||||
recrypt was_encrypted was_decrypted_with_owner enc splitlevel
|
recrypt was_encrypted was_decrypted_with_owner enc splitlevel
|
||||||
original_filename linearize ?(cpdflin = None) preserve_objstm
|
original_filename linearize ?(cpdflin = None) preserve_objstm
|
||||||
create_objstm sq nobble spec main_pdf pagenums pdf_pages
|
create_objstm sq nobble spec main_pdf pagenums pdf_pages
|
||||||
|
@ -1218,7 +1218,7 @@ let split_pdf
|
||||||
fast_write_split_pdfs
|
fast_write_split_pdfs
|
||||||
recrypt was_encrypted was_decrypted_with_owner
|
recrypt was_encrypted was_decrypted_with_owner
|
||||||
enc 0 original_filename linearize preserve_objstm create_objstm
|
enc 0 original_filename linearize preserve_objstm create_objstm
|
||||||
squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages
|
squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages*)
|
||||||
|
|
||||||
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
|
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
|
||||||
let bookmark_pages level pdf =
|
let bookmark_pages level pdf =
|
||||||
|
@ -1227,7 +1227,7 @@ let bookmark_pages level pdf =
|
||||||
(function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target pdf l.Pdfmarks.target) | _ -> None)
|
(function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target pdf l.Pdfmarks.target) | _ -> None)
|
||||||
(Pdfmarks.read_bookmarks pdf))
|
(Pdfmarks.read_bookmarks pdf))
|
||||||
|
|
||||||
let split_at_bookmarks
|
(*let split_at_bookmarks
|
||||||
recrypt was_encrypted was_decrypted_with_owner original_filename linearize
|
recrypt was_encrypted was_decrypted_with_owner original_filename linearize
|
||||||
~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble level spec pdf
|
~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble level spec pdf
|
||||||
=
|
=
|
||||||
|
@ -1240,7 +1240,7 @@ let split_at_bookmarks
|
||||||
fast_write_split_pdfs
|
fast_write_split_pdfs
|
||||||
recrypt was_encrypted was_decrypted_with_owner None level
|
recrypt was_encrypted was_decrypted_with_owner None level
|
||||||
original_filename linearize preserve_objstm create_objstm
|
original_filename linearize preserve_objstm create_objstm
|
||||||
squeeze nobble spec pdf pts pdf_pages
|
squeeze nobble spec pdf pts pdf_pages*)
|
||||||
|
|
||||||
(* Called from cpdflib.ml - different from above *)
|
(* Called from cpdflib.ml - different from above *)
|
||||||
let split_on_bookmarks pdf level =
|
let split_on_bookmarks pdf level =
|
||||||
|
|
8
cpdf.mli
8
cpdf.mli
|
@ -208,7 +208,7 @@ val stamp : position -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pd
|
||||||
the fiven level, writing to files with names given by [spec] (see
|
the fiven level, writing to files with names given by [spec] (see
|
||||||
cpdfmanual.pdf). [nobble] is undocumented and should be false. If [linearize]
|
cpdfmanual.pdf). [nobble] is undocumented and should be false. If [linearize]
|
||||||
is true, the files will be linearized. *)
|
is true, the files will be linearized. *)
|
||||||
val split_at_bookmarks :
|
(*val split_at_bookmarks :
|
||||||
bool ->
|
bool ->
|
||||||
bool ->
|
bool ->
|
||||||
bool ->
|
bool ->
|
||||||
|
@ -222,7 +222,7 @@ val split_at_bookmarks :
|
||||||
int ->
|
int ->
|
||||||
string ->
|
string ->
|
||||||
Pdf.t ->
|
Pdf.t ->
|
||||||
unit
|
unit*)
|
||||||
|
|
||||||
(** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *)
|
(** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *)
|
||||||
val split_on_bookmarks : Pdf.t -> int -> Pdf.t list
|
val split_on_bookmarks : Pdf.t -> int -> Pdf.t list
|
||||||
|
@ -234,7 +234,7 @@ each output is linearized. If [preserve_objstm] is true, object streams will
|
||||||
be used if the input file had them. If [create_objstm] is true, object
|
be used if the input file had them. If [create_objstm] is true, object
|
||||||
streams will be created in any event. [printf] and [nobble] are
|
streams will be created in any event. [printf] and [nobble] are
|
||||||
undocumented and should be false. *)
|
undocumented and should be false. *)
|
||||||
val split_pdf :
|
(*val split_pdf :
|
||||||
bool ->
|
bool ->
|
||||||
bool ->
|
bool ->
|
||||||
bool ->
|
bool ->
|
||||||
|
@ -249,7 +249,7 @@ val split_pdf :
|
||||||
(Pdf.t -> Pdf.t) ->
|
(Pdf.t -> Pdf.t) ->
|
||||||
string ->
|
string ->
|
||||||
Pdf.t ->
|
Pdf.t ->
|
||||||
unit
|
unit*)
|
||||||
|
|
||||||
(** {2 Listing fonts} *)
|
(** {2 Listing fonts} *)
|
||||||
|
|
||||||
|
|
115
cpdfcommand.ml
115
cpdfcommand.ml
|
@ -2026,14 +2026,119 @@ let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
|
||||||
with
|
with
|
||||||
End_of_file -> flush stdout (*r For Windows *)
|
End_of_file -> flush stdout (*r For Windows *)
|
||||||
|
|
||||||
|
(* Returns empty string on failure. Should only be used in conjunction with
|
||||||
|
split at bookmarks code, so should never fail, by definiton. *)
|
||||||
|
let remove_unsafe_characters s =
|
||||||
|
let chars =
|
||||||
|
lose
|
||||||
|
(function x ->
|
||||||
|
match x with
|
||||||
|
'/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true
|
||||||
|
| x when int_of_char x < 32 || int_of_char x > 126 -> true
|
||||||
|
| _ -> false)
|
||||||
|
(explode s)
|
||||||
|
in
|
||||||
|
match chars with
|
||||||
|
| '.'::more -> implode more
|
||||||
|
| chars -> implode chars
|
||||||
|
|
||||||
|
let get_bookmark_name pdf marks splitlevel n _ =
|
||||||
|
match keep (function m -> n = Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with
|
||||||
|
| {Pdfmarks.text = title}::_ -> remove_unsafe_characters title
|
||||||
|
| _ -> ""
|
||||||
|
|
||||||
|
(* @F means filename without extension *)
|
||||||
|
(* @N means sequence number with no padding *)
|
||||||
|
(* @S means start page of this section *)
|
||||||
|
(* @E means end page of this section *)
|
||||||
|
(* @B means bookmark name at start page *)
|
||||||
|
let process_others marks pdf splitlevel filename sequence startpage endpage s =
|
||||||
|
let rec procss prev = function
|
||||||
|
| [] -> rev prev
|
||||||
|
| '@'::'F'::t -> procss (rev (explode filename) @ prev) t
|
||||||
|
| '@'::'N'::t -> procss (rev (explode (string_of_int sequence)) @ prev) t
|
||||||
|
| '@'::'S'::t -> procss (rev (explode (string_of_int startpage)) @ prev) t
|
||||||
|
| '@'::'E'::t -> procss (rev (explode (string_of_int endpage)) @ prev) t
|
||||||
|
| '@'::'B'::t -> procss (rev (explode (get_bookmark_name pdf marks splitlevel startpage pdf)) @ prev) t
|
||||||
|
| h::t -> procss (h::prev) t
|
||||||
|
in
|
||||||
|
implode (procss [] (explode s))
|
||||||
|
|
||||||
|
let name_of_spec marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
|
||||||
|
let fill l n =
|
||||||
|
let chars = explode (string_of_int n) in
|
||||||
|
if length chars > l
|
||||||
|
then implode (drop chars (length chars - l))
|
||||||
|
else implode ((many '0' (l - length chars)) @ chars)
|
||||||
|
in
|
||||||
|
let chars = explode spec in
|
||||||
|
let before, including = cleavewhile (neq '%') chars in
|
||||||
|
let percents, after = cleavewhile (eq '%') including in
|
||||||
|
if percents = []
|
||||||
|
then
|
||||||
|
process_others marks pdf splitlevel filename n startpage endpage spec
|
||||||
|
else
|
||||||
|
process_others marks pdf splitlevel filename n startpage endpage
|
||||||
|
(implode before ^ fill (length percents) n ^ implode after)
|
||||||
|
|
||||||
|
(* Find the stem of a filename *)
|
||||||
|
let stem s =
|
||||||
|
implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s))))))
|
||||||
|
|
||||||
|
let fast_write_split_pdfs
|
||||||
|
enc splitlevel
|
||||||
|
original_filename linearize ?(cpdflin = None) preserve_objstm
|
||||||
|
create_objstm sq nobble spec main_pdf pagenums pdf_pages
|
||||||
|
=
|
||||||
|
let marks = Pdfmarks.read_bookmarks main_pdf in
|
||||||
|
iter2
|
||||||
|
(fun number pagenums ->
|
||||||
|
let pdf = nobble (Pdfpage.pdf_of_pages main_pdf pagenums) in
|
||||||
|
let startpage, endpage = extremes pagenums in
|
||||||
|
let name = name_of_spec marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
|
||||||
|
Pdf.remove_unreferenced pdf;
|
||||||
|
if sq then Cpdf.squeeze pdf;
|
||||||
|
really_write_pdf ~encryption:enc (not (enc = None)) pdf name)
|
||||||
|
(indx pagenums)
|
||||||
|
pagenums
|
||||||
|
|
||||||
|
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
|
||||||
|
let bookmark_pages level pdf =
|
||||||
|
setify_preserving_order
|
||||||
|
(option_map
|
||||||
|
(function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target pdf l.Pdfmarks.target) | _ -> None)
|
||||||
|
(Pdfmarks.read_bookmarks pdf))
|
||||||
|
|
||||||
|
let split_at_bookmarks
|
||||||
|
original_filename linearize
|
||||||
|
~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble level spec pdf
|
||||||
|
=
|
||||||
|
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
let points = bookmark_pages level pdf in
|
||||||
|
let points =
|
||||||
|
lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points)
|
||||||
|
in
|
||||||
|
let pts = splitat points (indx pdf_pages) in
|
||||||
|
fast_write_split_pdfs
|
||||||
|
None level
|
||||||
|
original_filename linearize preserve_objstm create_objstm
|
||||||
|
squeeze nobble spec pdf pts pdf_pages
|
||||||
|
|
||||||
|
let split_pdf
|
||||||
|
enc original_filename
|
||||||
|
chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze
|
||||||
|
nobble spec pdf
|
||||||
|
=
|
||||||
|
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
fast_write_split_pdfs
|
||||||
|
enc 0 original_filename linearize preserve_objstm create_objstm
|
||||||
|
squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages
|
||||||
|
|
||||||
let get_pagespec () =
|
let get_pagespec () =
|
||||||
match args.inputs with
|
match args.inputs with
|
||||||
| (_, ps, _, _, _)::_ -> ps
|
| (_, ps, _, _, _)::_ -> ps
|
||||||
| _ -> error "get_pagespec"
|
| _ -> error "get_pagespec"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
|
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
|
||||||
let copy_font frompdf fontname fontpage range pdf =
|
let copy_font frompdf fontname fontpage range pdf =
|
||||||
match Pdf.renumber_pdfs [frompdf; pdf] with
|
match Pdf.renumber_pdfs [frompdf; pdf] with
|
||||||
|
@ -3065,8 +3170,7 @@ let go () =
|
||||||
Pdfwrite.user_password = args.user;
|
Pdfwrite.user_password = args.user;
|
||||||
Pdfwrite.permissions = banlist_of_args ()}
|
Pdfwrite.permissions = banlist_of_args ()}
|
||||||
in
|
in
|
||||||
Cpdf.split_pdf
|
split_pdf
|
||||||
args.recrypt args.was_encrypted args.was_decrypted_with_owner
|
|
||||||
enc args.original_filename args.chunksize args.linearize args.cpdflin
|
enc args.original_filename args.chunksize args.linearize args.cpdflin
|
||||||
args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *)
|
args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *)
|
||||||
args.squeeze nobble output_spec pdf
|
args.squeeze nobble output_spec pdf
|
||||||
|
@ -3210,8 +3314,7 @@ let go () =
|
||||||
| [(InFile f, _, _, _, _)] -> f
|
| [(InFile f, _, _, _, _)] -> f
|
||||||
| _ -> ""
|
| _ -> ""
|
||||||
in
|
in
|
||||||
Cpdf.split_at_bookmarks
|
split_at_bookmarks
|
||||||
args.recrypt args.was_encrypted args.was_decrypted_with_owner
|
|
||||||
filename args.linearize args.cpdflin args.preserve_objstm
|
filename args.linearize args.cpdflin args.preserve_objstm
|
||||||
(* Yes *)args.preserve_objstm args.squeeze nobble level output_spec pdf
|
(* Yes *)args.preserve_objstm args.squeeze nobble level output_spec pdf
|
||||||
| Stdout -> error "Can't split to standard output"
|
| Stdout -> error "Can't split to standard output"
|
||||||
|
|
Loading…
Reference in New Issue