Began to excise cpdftk support
This commit is contained in:
parent
f42cb56ea0
commit
ded4c5f22d
212
cpdf.ml
212
cpdf.ml
|
@ -251,196 +251,6 @@ let squeeze pdf =
|
||||||
with
|
with
|
||||||
e -> raise (Pdf.PDFError "Squeeze failed. No output written")
|
e -> raise (Pdf.PDFError "Squeeze failed. No output written")
|
||||||
|
|
||||||
(* Printf implementation *)
|
|
||||||
exception PrintfFailure of string
|
|
||||||
|
|
||||||
type convop = Ca | CA | Cc | Cd | Ce | CE | Cf | Cg | CG | Ci | Cn | Co | Cp | Cs | Cu | Cx | CX | CPercent
|
|
||||||
|
|
||||||
type sizespec = Sll | Sl | SL | Sh | Shh | Sj | Sz | St
|
|
||||||
|
|
||||||
type pformat =
|
|
||||||
{leftjustify : bool;
|
|
||||||
padzero : bool;
|
|
||||||
signalways : bool;
|
|
||||||
space : bool;
|
|
||||||
variant : bool;
|
|
||||||
minwidth : int option;
|
|
||||||
precision : int option;
|
|
||||||
sizespec : sizespec option;
|
|
||||||
convop : convop}
|
|
||||||
|
|
||||||
let string_of_options f =
|
|
||||||
(if f.leftjustify then "-" else "") ^
|
|
||||||
(if f.padzero then "0" else "") ^
|
|
||||||
(if f.signalways then "+" else "") ^
|
|
||||||
(if f.space then " " else "") ^
|
|
||||||
(if f.variant then "#" else "")
|
|
||||||
|
|
||||||
let string_of_minwidth = function
|
|
||||||
| None -> ""
|
|
||||||
| Some x -> string_of_int x
|
|
||||||
|
|
||||||
let string_of_precision = function
|
|
||||||
| None -> ""
|
|
||||||
| Some x -> "." ^ string_of_int x
|
|
||||||
|
|
||||||
let string_of_sizespec = function
|
|
||||||
| None -> ""
|
|
||||||
| Some s->
|
|
||||||
match s with
|
|
||||||
| Sll -> "ll" | Sl -> "l" | SL -> "L" | Sh -> "h"
|
|
||||||
| Shh -> "hh" | Sj -> "j" | Sz -> "z" | St -> "t"
|
|
||||||
|
|
||||||
let string_of_convop = function
|
|
||||||
| Ca -> "a" | CA -> "A" | Cc -> "c" | Cd -> "d"
|
|
||||||
| Ce -> "e" | CE -> "E" | Cf -> "f" | Cg -> "g"
|
|
||||||
| CG -> "G" | Ci -> "i" | Cn -> "n" | Co -> "o"
|
|
||||||
| Cp -> "p" | Cs -> "s" | Cu -> "u" | Cx -> "x"
|
|
||||||
| CX -> "X" | CPercent -> "%"
|
|
||||||
|
|
||||||
let string_of_format f =
|
|
||||||
"%" ^
|
|
||||||
string_of_options f ^
|
|
||||||
string_of_minwidth f.minwidth ^
|
|
||||||
string_of_precision f.precision ^
|
|
||||||
string_of_sizespec f.sizespec ^
|
|
||||||
string_of_convop f.convop
|
|
||||||
|
|
||||||
type section =
|
|
||||||
| Format of pformat
|
|
||||||
| String of string
|
|
||||||
|
|
||||||
let sec_of_format cs =
|
|
||||||
(* 1. Read zero or more flags -, +, 0, #, <space> *)
|
|
||||||
let cs = ref cs in
|
|
||||||
let lj, pz, sa, sp, va, fin =
|
|
||||||
ref false, ref false, ref false, ref false, ref false, ref false
|
|
||||||
in
|
|
||||||
while not !fin do
|
|
||||||
match !cs with
|
|
||||||
| '-'::_ -> set lj; cs := tl !cs
|
|
||||||
| '+'::_ -> set sa; cs := tl !cs
|
|
||||||
| '0'::_ -> set pz; cs := tl !cs
|
|
||||||
| ' '::_ -> set sp; cs := tl !cs
|
|
||||||
| '#'::_ -> set va; cs := tl !cs
|
|
||||||
| _ -> set fin
|
|
||||||
done;
|
|
||||||
(* 2. Read a possible minimum field width *)
|
|
||||||
let minwidth =
|
|
||||||
let fwchars, rest = cleavewhile isdigit !cs in
|
|
||||||
cs := rest;
|
|
||||||
if fwchars = [] then None else Some (int_of_string (implode fwchars))
|
|
||||||
in
|
|
||||||
(* 3. Read an optional precision specification *)
|
|
||||||
let precision =
|
|
||||||
match !cs with
|
|
||||||
| '.'::more ->
|
|
||||||
cs := more;
|
|
||||||
let pchars, rest = cleavewhile isdigit !cs in
|
|
||||||
cs := rest;
|
|
||||||
if pchars = [] then None else Some (int_of_string (implode pchars))
|
|
||||||
| _ -> None
|
|
||||||
in
|
|
||||||
(* 4. Read an optional size specification *)
|
|
||||||
let sizespec =
|
|
||||||
match !cs with
|
|
||||||
| 'l'::'l'::r -> cs := r; Some Sll
|
|
||||||
| 'l'::r -> cs := r; Some Sl
|
|
||||||
| 'L'::r -> cs := r; Some SL
|
|
||||||
| 'h'::'h'::r -> cs := r; Some Shh
|
|
||||||
| 'h'::r -> cs := r; Some Sh
|
|
||||||
| 'j'::r -> cs := r; Some Sj
|
|
||||||
| 'z'::r -> cs := r; Some Sz
|
|
||||||
| 't'::r -> cs := r; Some St
|
|
||||||
| _ -> None
|
|
||||||
in
|
|
||||||
(* 5. Read the conversion operation *)
|
|
||||||
let convop =
|
|
||||||
match !cs with
|
|
||||||
| 'a'::r -> cs := r; Ca
|
|
||||||
| 'A'::r -> cs := r; CA
|
|
||||||
| 'c'::r -> cs := r; Cc
|
|
||||||
| 'd'::r -> cs := r; Cd
|
|
||||||
| 'e'::r -> cs := r; Ce
|
|
||||||
| 'E'::r -> cs := r; CE
|
|
||||||
| 'f'::r -> cs := r; Cf
|
|
||||||
| 'g'::r -> cs := r; Cg
|
|
||||||
| 'G'::r -> cs := r; CG
|
|
||||||
| 'i'::r -> cs := r; Ci
|
|
||||||
| 'n'::r -> cs := r; Cn
|
|
||||||
| 'o'::r -> cs := r; Co
|
|
||||||
| 'p'::r -> cs := r; Cp
|
|
||||||
| 's'::r -> cs := r; Cs
|
|
||||||
| 'u'::r -> cs := r; Cu
|
|
||||||
| 'x'::r -> cs := r; Cx
|
|
||||||
| 'X'::r -> cs := r; CX
|
|
||||||
| '%'::r -> cs := r; CPercent
|
|
||||||
| _ -> raise (Failure "sec_of_format")
|
|
||||||
in
|
|
||||||
{leftjustify = !lj;
|
|
||||||
padzero = !pz;
|
|
||||||
signalways = !sa;
|
|
||||||
space = !sp;
|
|
||||||
variant = !va;
|
|
||||||
minwidth = minwidth;
|
|
||||||
precision = precision;
|
|
||||||
sizespec = sizespec;
|
|
||||||
convop = convop},
|
|
||||||
!cs
|
|
||||||
|
|
||||||
let rec sections_of_string_inner secs currstr = function
|
|
||||||
| '%'::m ->
|
|
||||||
let sec, rest = sec_of_format m in
|
|
||||||
if currstr = []
|
|
||||||
then sections_of_string_inner (Format sec::secs) currstr rest
|
|
||||||
else sections_of_string_inner (Format sec::String (implode (rev currstr))::secs) [] rest
|
|
||||||
| x::xs ->
|
|
||||||
sections_of_string_inner secs (x::currstr) xs
|
|
||||||
| [] ->
|
|
||||||
if currstr = [] then rev secs else rev (String (implode (rev currstr))::secs)
|
|
||||||
|
|
||||||
(* Take a format string, and split it into sections *)
|
|
||||||
let sections_of_string s =
|
|
||||||
try
|
|
||||||
sections_of_string_inner [] [] (explode s)
|
|
||||||
with
|
|
||||||
_ -> raise (PrintfFailure "Couldn't parse Printf format")
|
|
||||||
|
|
||||||
(* Substitute an integer into a format, returning the empty string if the format is not suitable. *)
|
|
||||||
|
|
||||||
(* For now, just 'd', 'u', 'i' *)
|
|
||||||
let sub_int i f =
|
|
||||||
(*i Printf.printf "Substituting format |%s|\n" (string_of_format f); i*)
|
|
||||||
let str = string_of_int i
|
|
||||||
in let padding = if f.padzero then '0' else ' ' in
|
|
||||||
if f.minwidth <> None && String.length str < unopt f.minwidth then
|
|
||||||
let padding = many padding (unopt f.minwidth - String.length str) in
|
|
||||||
if f.leftjustify then str ^ implode padding else implode padding ^ str
|
|
||||||
else
|
|
||||||
str
|
|
||||||
|
|
||||||
(* Given a list of integers, substitute into integer formats *)
|
|
||||||
let rec substitute_inner donesections sections = function
|
|
||||||
| [] -> rev donesections @ sections
|
|
||||||
| i::is ->
|
|
||||||
match sections with
|
|
||||||
| [] -> rev donesections @ sections
|
|
||||||
| String s::more -> substitute_inner (String s::donesections) more (i::is)
|
|
||||||
| Format f::more -> substitute_inner (String (sub_int i f)::donesections) more is
|
|
||||||
|
|
||||||
let substitute x =
|
|
||||||
try substitute_inner [] x with
|
|
||||||
_ -> raise (PrintfFailure "Failed to substitute integer")
|
|
||||||
|
|
||||||
(* Flatten a set of sections to a string *)
|
|
||||||
let string_of_section = function
|
|
||||||
| String s -> s
|
|
||||||
| Format f -> string_of_format f
|
|
||||||
|
|
||||||
let string_of_sections sections =
|
|
||||||
try fold_left ( ^ ) "" (map string_of_section sections) with
|
|
||||||
_ -> raise (PrintfFailure "Failed to build string from Printf sections")
|
|
||||||
|
|
||||||
type encoding =
|
type encoding =
|
||||||
| Raw
|
| Raw
|
||||||
| UTF8
|
| UTF8
|
||||||
|
@ -1324,13 +1134,7 @@ let process_others marks pdf splitlevel filename sequence startpage endpage s =
|
||||||
in
|
in
|
||||||
implode (procss [] (explode s))
|
implode (procss [] (explode s))
|
||||||
|
|
||||||
let name_of_spec printf marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
|
let name_of_spec marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
|
||||||
if printf then
|
|
||||||
let spec =
|
|
||||||
string_of_sections (substitute (sections_of_string spec) [n])
|
|
||||||
in
|
|
||||||
process_others marks pdf splitlevel filename n startpage endpage spec
|
|
||||||
else
|
|
||||||
let fill l n =
|
let fill l n =
|
||||||
let chars = explode (string_of_int n) in
|
let chars = explode (string_of_int n) in
|
||||||
if length chars > l
|
if length chars > l
|
||||||
|
@ -1383,22 +1187,22 @@ let really_write_pdf ~preserve_objstm ~create_objstm ?(encryption = None) ?(cpdf
|
||||||
raise (Pdf.PDFError "linearizer failed")
|
raise (Pdf.PDFError "linearizer failed")
|
||||||
end
|
end
|
||||||
|
|
||||||
let fast_write_split_pdfs enc printf splitlevel original_filename linearize ?(cpdflin = None) preserve_objstm create_objstm sq nobble spec main_pdf pagenums pdf_pages =
|
let fast_write_split_pdfs enc splitlevel original_filename linearize ?(cpdflin = None) preserve_objstm create_objstm sq nobble spec main_pdf pagenums pdf_pages =
|
||||||
let marks = Pdfmarks.read_bookmarks main_pdf in
|
let marks = Pdfmarks.read_bookmarks main_pdf in
|
||||||
iter2
|
iter2
|
||||||
(fun number pagenums ->
|
(fun number pagenums ->
|
||||||
let pdf = nobble (Pdfpage.pdf_of_pages main_pdf pagenums) in
|
let pdf = nobble (Pdfpage.pdf_of_pages main_pdf pagenums) in
|
||||||
let startpage, endpage = extremes pagenums in
|
let startpage, endpage = extremes pagenums in
|
||||||
let name = name_of_spec printf marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
|
let name = name_of_spec marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
|
||||||
Pdf.remove_unreferenced pdf;
|
Pdf.remove_unreferenced pdf;
|
||||||
if sq then squeeze pdf;
|
if sq then squeeze pdf;
|
||||||
really_write_pdf ~preserve_objstm ~create_objstm ~encryption:enc linearize (not (enc = None)) pdf name)
|
really_write_pdf ~preserve_objstm ~create_objstm ~encryption:enc linearize (not (enc = None)) pdf name)
|
||||||
(indx pagenums)
|
(indx pagenums)
|
||||||
pagenums
|
pagenums
|
||||||
|
|
||||||
let split_pdf enc printf original_filename chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble spec pdf =
|
let split_pdf enc original_filename chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble spec pdf =
|
||||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
fast_write_split_pdfs enc printf 0 original_filename linearize preserve_objstm
|
fast_write_split_pdfs enc 0 original_filename linearize preserve_objstm
|
||||||
create_objstm squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages
|
create_objstm squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages
|
||||||
|
|
||||||
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
|
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
|
||||||
|
@ -1415,7 +1219,7 @@ let split_at_bookmarks original_filename linearize ~cpdflin ~preserve_objstm ~cr
|
||||||
lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points)
|
lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points)
|
||||||
in
|
in
|
||||||
let pts = splitat points (indx pdf_pages) in
|
let pts = splitat points (indx pdf_pages) in
|
||||||
fast_write_split_pdfs None false level
|
fast_write_split_pdfs None level
|
||||||
original_filename linearize preserve_objstm create_objstm squeeze nobble spec pdf pts pdf_pages
|
original_filename linearize preserve_objstm create_objstm squeeze nobble spec pdf pts pdf_pages
|
||||||
|
|
||||||
(* Called from cpdflib.ml - different from above *)
|
(* Called from cpdflib.ml - different from above *)
|
||||||
|
@ -3045,7 +2849,7 @@ let xmltree_of_bytes b =
|
||||||
and data d = D d in
|
and data d = D d in
|
||||||
Xmlm.input_doc_tree ~el ~data i
|
Xmlm.input_doc_tree ~el ~data i
|
||||||
|
|
||||||
let rec string_of_xmltree = function
|
(*let rec string_of_xmltree = function
|
||||||
D d ->
|
D d ->
|
||||||
Printf.sprintf "DATA **%s**" d
|
Printf.sprintf "DATA **%s**" d
|
||||||
| E (tag, trees) ->
|
| E (tag, trees) ->
|
||||||
|
@ -3067,7 +2871,7 @@ and string_of_attributes attrs =
|
||||||
|
|
||||||
and string_of_xmltrees trees =
|
and string_of_xmltrees trees =
|
||||||
fold_left
|
fold_left
|
||||||
(fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
|
(fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)*)
|
||||||
|
|
||||||
let rec get_data_for namespace name = function
|
let rec get_data_for namespace name = function
|
||||||
D _ -> None
|
D _ -> None
|
||||||
|
|
4
cpdf.mli
4
cpdf.mli
|
@ -90,7 +90,7 @@ endpage] makes format substitutions in [spec] to make an output file name:
|
||||||
}
|
}
|
||||||
[printf] is undocumented and should be set to [false].
|
[printf] is undocumented and should be set to [false].
|
||||||
*)
|
*)
|
||||||
val name_of_spec : bool -> Pdfmarks.t list -> Pdf.t -> int -> string -> int -> string -> int -> int -> string
|
val name_of_spec : Pdfmarks.t list -> Pdf.t -> int -> string -> int -> string -> int -> int -> string
|
||||||
|
|
||||||
(** {2 Compress and Decompress} *)
|
(** {2 Compress and Decompress} *)
|
||||||
|
|
||||||
|
@ -220,7 +220,7 @@ each output is linearized. If [preserve_objstm] is true, object streams will
|
||||||
be used if the input file had them. If [create_objstm] is true, object
|
be used if the input file had them. If [create_objstm] is true, object
|
||||||
streams will be created in any event. [printf] and [nobble] are
|
streams will be created in any event. [printf] and [nobble] are
|
||||||
undocumented and should be false. *)
|
undocumented and should be false. *)
|
||||||
val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool -> cpdflin:string option ->
|
val split_pdf : Pdfwrite.encryption option -> string -> int -> bool -> cpdflin:string option ->
|
||||||
preserve_objstm:bool -> create_objstm:bool -> squeeze:bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit
|
preserve_objstm:bool -> create_objstm:bool -> squeeze:bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit
|
||||||
|
|
||||||
(** {2 Listing fonts} *)
|
(** {2 Listing fonts} *)
|
||||||
|
|
285
cpdfcommand.ml
285
cpdfcommand.ml
|
@ -8,6 +8,8 @@ let version_date = "(unreleased, 16th September 2014)"
|
||||||
open Pdfutil
|
open Pdfutil
|
||||||
open Pdfio
|
open Pdfio
|
||||||
|
|
||||||
|
let initial_file_size = ref 0
|
||||||
|
|
||||||
(* Wrap up the file reading functions to exit with code 1 when an encryption
|
(* Wrap up the file reading functions to exit with code 1 when an encryption
|
||||||
problem occurs. This happens when object streams are in an encrypted document
|
problem occurs. This happens when object streams are in an encrypted document
|
||||||
and so it can't be read without the right password... The existing error
|
and so it can't be read without the right password... The existing error
|
||||||
|
@ -144,8 +146,6 @@ type op =
|
||||||
| ExtractImages
|
| ExtractImages
|
||||||
| ImageResolution of float
|
| ImageResolution of float
|
||||||
| MissingFonts
|
| MissingFonts
|
||||||
| DumpData
|
|
||||||
| UpdateInfo of string
|
|
||||||
| RemoveUnusedResources
|
| RemoveUnusedResources
|
||||||
| ExtractFontFile
|
| ExtractFontFile
|
||||||
| ExtractText
|
| ExtractText
|
||||||
|
@ -461,25 +461,9 @@ let rec decrypt_if_necessary (a, b, c, user_pw, owner_pw) op pdf =
|
||||||
| Some pdf, permissions ->
|
| Some pdf, permissions ->
|
||||||
if operation_allowed permissions op
|
if operation_allowed permissions op
|
||||||
then pdf
|
then pdf
|
||||||
else if args.do_ask
|
|
||||||
then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf
|
|
||||||
else soft_error "User password cannot give permission for this operation"
|
else soft_error "User password cannot give permission for this operation"
|
||||||
| _ ->
|
| _ ->
|
||||||
if args.do_ask
|
soft_error "Failed to decrypt file: wrong password?"
|
||||||
then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf
|
|
||||||
else soft_error "Failed to decrypt file: wrong password?"
|
|
||||||
|
|
||||||
and decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf =
|
|
||||||
let name = match a with InFile x -> x | StdIn -> "Standard input" | AlreadyInMemory _ -> "PDF" in
|
|
||||||
flprint "The password supplied for input PDF:\n";
|
|
||||||
flprint (" " ^ name);
|
|
||||||
flprint "\n did not work. The PDF is encrypted, so you must supply the\n";
|
|
||||||
flprint " owner password to open it. To quit, enter a blank password\n";
|
|
||||||
flprint "Please enter the password to use on the input PDF:\n";
|
|
||||||
flprint (" " ^ name ^ ".\n");
|
|
||||||
match Pervasives.read_line () with
|
|
||||||
| "" -> soft_error "Failed to decrypt file: wrong password?"
|
|
||||||
| x -> decrypt_if_necessary (a, b, c, user_pw, x) op pdf
|
|
||||||
|
|
||||||
let nobble pdf =
|
let nobble pdf =
|
||||||
if not demo then pdf else
|
if not demo then pdf else
|
||||||
|
@ -523,19 +507,9 @@ let setmethod s =
|
||||||
| _ -> error "Unsupported encryption method"
|
| _ -> error "Unsupported encryption method"
|
||||||
|
|
||||||
let setowner s =
|
let setowner s =
|
||||||
match s with
|
|
||||||
| "PROMPT" ->
|
|
||||||
flprint "Enter owner password to use on the output PDF.\n";
|
|
||||||
args.owner <- Pervasives.read_line ()
|
|
||||||
| s ->
|
|
||||||
args.owner <- s
|
args.owner <- s
|
||||||
|
|
||||||
let setuser s =
|
let setuser s =
|
||||||
match s with
|
|
||||||
| "PROMPT" ->
|
|
||||||
flprint "Enter user password to use on the output PDF.\n";
|
|
||||||
args.user <- Pervasives.read_line ()
|
|
||||||
| s ->
|
|
||||||
args.user <- s
|
args.user <- s
|
||||||
|
|
||||||
let anon_fun s =
|
let anon_fun s =
|
||||||
|
@ -872,14 +846,6 @@ let setstampunder f =
|
||||||
setop (StampUnder f) ();
|
setop (StampUnder f) ();
|
||||||
if args.position = Cpdf.TopLeft 100. then args.position <- Cpdf.BottomLeft 0.
|
if args.position = Cpdf.TopLeft 100. then args.position <- Cpdf.BottomLeft 0.
|
||||||
|
|
||||||
let setstamponmulti f =
|
|
||||||
setop (StampOn f) ();
|
|
||||||
args.ismulti <- true
|
|
||||||
|
|
||||||
let setstampundermulti f =
|
|
||||||
setop (StampUnder f) ();
|
|
||||||
args.ismulti <- true
|
|
||||||
|
|
||||||
let setcombinepages f =
|
let setcombinepages f =
|
||||||
setop (CombinePages f) ()
|
setop (CombinePages f) ()
|
||||||
|
|
||||||
|
@ -1150,39 +1116,12 @@ let setkeepthisid () =
|
||||||
| (InFile s, _, _, _, _)::_ -> args.keep_this_id <- Some s
|
| (InFile s, _, _, _, _)::_ -> args.keep_this_id <- Some s
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
|
|
||||||
let setupdateinfo s =
|
|
||||||
args.op <- Some (UpdateInfo s)
|
|
||||||
|
|
||||||
let setdoask () =
|
let setdoask () =
|
||||||
args.do_ask <- true
|
args.do_ask <- true
|
||||||
|
|
||||||
let setverbose () =
|
let setverbose () =
|
||||||
args.verbose <- true
|
args.verbose <- true
|
||||||
|
|
||||||
let promptinputs () =
|
|
||||||
flprint "Please enter a filename for an input PDF:\n";
|
|
||||||
set_input (Pervasives.read_line ())
|
|
||||||
|
|
||||||
let promptinputpasswords () =
|
|
||||||
flprint "Please enter the open password to use on the input PDF:\n ";
|
|
||||||
match args.inputs with
|
|
||||||
| (InFile s, b, c, d, _)::more ->
|
|
||||||
flprint s;
|
|
||||||
flprint ".\n It can be empty, or have a maximum of 32 characters:\n";
|
|
||||||
let pw = Pervasives.read_line () in
|
|
||||||
args.inputs <- (InFile s, b, c, d, pw)::more
|
|
||||||
| _ -> ()
|
|
||||||
|
|
||||||
let promptoutput () =
|
|
||||||
flprint "Please enter a name for the output:\n";
|
|
||||||
args.out <- File (Pervasives.read_line ())
|
|
||||||
|
|
||||||
let setdontoverwriteexistingfiles () =
|
|
||||||
args.dont_overwrite_existing_files <- true
|
|
||||||
|
|
||||||
let setdontoverwriteinputs () =
|
|
||||||
args.dont_overwrite_inputs <- true
|
|
||||||
|
|
||||||
let setmakenewid () =
|
let setmakenewid () =
|
||||||
args.makenewid <- true
|
args.makenewid <- true
|
||||||
|
|
||||||
|
@ -1736,7 +1675,7 @@ and specs =
|
||||||
Arg.Int setlabelstartval,
|
Arg.Int setlabelstartval,
|
||||||
" Set label start value (default 1)");
|
" Set label start value (default 1)");
|
||||||
(* These items are for cpdftk *)
|
(* These items are for cpdftk *)
|
||||||
("-update-info", Arg.String setupdateinfo, "");
|
(*("-update-info", Arg.String setupdateinfo, "");
|
||||||
("-printf-format", Arg.Unit setprintfformat, "");
|
("-printf-format", Arg.Unit setprintfformat, "");
|
||||||
|
|
||||||
("-dump-data", Arg.Unit (setop DumpData), "");
|
("-dump-data", Arg.Unit (setop DumpData), "");
|
||||||
|
@ -1753,7 +1692,7 @@ and specs =
|
||||||
("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), "");
|
("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), "");
|
||||||
("-stamp-under-multi", Arg.String setstampundermulti, "");
|
("-stamp-under-multi", Arg.String setstampundermulti, "");
|
||||||
("-stamp-on-multi", Arg.String setstamponmulti, "");
|
("-stamp-on-multi", Arg.String setstamponmulti, "");
|
||||||
("-list-annotations-more", Arg.Unit (setop ListAnnotationsMore), "");
|
("-list-annotations-more", Arg.Unit (setop ListAnnotationsMore), "");*)
|
||||||
(*These items are undocumented *)
|
(*These items are undocumented *)
|
||||||
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
|
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
|
||||||
("-extract-images", Arg.Unit setextractimages, "");
|
("-extract-images", Arg.Unit setextractimages, "");
|
||||||
|
@ -1780,20 +1719,14 @@ or \"1-6,9-end\" or \"even\" or \"odd\" or \"reverse\".\n\nOperations (See \
|
||||||
manual for full details):\n"
|
manual for full details):\n"
|
||||||
|
|
||||||
(* Reading and writing *)
|
(* Reading and writing *)
|
||||||
let rec writing_ok outname =
|
let filesize name =
|
||||||
if args.dont_overwrite_inputs && mem outname !all_inputs then
|
try
|
||||||
error ("Error: The output filename: " ^ outname ^"\n is the same as an input filename.\n");
|
let x = open_in_bin name in
|
||||||
if args.dont_overwrite_existing_files && Sys.file_exists outname then
|
let r = in_channel_length x in
|
||||||
begin
|
close_in x;
|
||||||
flprint ("Output file: " ^ outname ^ " already exists. Overwrite? (y/n)\n");
|
r
|
||||||
match explode (Pervasives.read_line ()) with
|
with
|
||||||
| ('y' | 'Y')::_ -> outname
|
_ -> 0
|
||||||
| _ ->
|
|
||||||
flprint "Enter a name for the output:\n";
|
|
||||||
writing_ok (Pervasives.read_line ())
|
|
||||||
end
|
|
||||||
else
|
|
||||||
outname
|
|
||||||
|
|
||||||
let really_write_pdf ?(encryption = None) mk_id pdf outname =
|
let really_write_pdf ?(encryption = None) mk_id pdf outname =
|
||||||
let outname' =
|
let outname' =
|
||||||
|
@ -1805,6 +1738,7 @@ let really_write_pdf ?(encryption = None) mk_id pdf outname =
|
||||||
~preserve_objstm:args.preserve_objstm
|
~preserve_objstm:args.preserve_objstm
|
||||||
~generate_objstm:args.create_objstm
|
~generate_objstm:args.create_objstm
|
||||||
false encryption mk_id pdf outname';
|
false encryption mk_id pdf outname';
|
||||||
|
begin
|
||||||
if args.linearize then
|
if args.linearize then
|
||||||
let cpdflin =
|
let cpdflin =
|
||||||
match Cpdf.find_cpdflin args.cpdflin with
|
match Cpdf.find_cpdflin args.cpdflin with
|
||||||
|
@ -1819,6 +1753,13 @@ let really_write_pdf ?(encryption = None) mk_id pdf outname =
|
||||||
begin try Sys.remove outname with _ -> () end;
|
begin try Sys.remove outname with _ -> () end;
|
||||||
raise (Pdf.PDFError "linearizer failed")
|
raise (Pdf.PDFError "linearizer failed")
|
||||||
end
|
end
|
||||||
|
end;
|
||||||
|
if args.squeeze then
|
||||||
|
let s = filesize outname in
|
||||||
|
Printf.printf
|
||||||
|
"Final file size is %i bytes, %.2f%% of original.\n"
|
||||||
|
s
|
||||||
|
((float s /. float !initial_file_size) *. 100.)
|
||||||
|
|
||||||
let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
|
let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
|
||||||
if args.create_objstm && not args.keepversion
|
if args.create_objstm && not args.keepversion
|
||||||
|
@ -1828,7 +1769,6 @@ let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
|
||||||
| NoOutputSpecified ->
|
| NoOutputSpecified ->
|
||||||
output_pdfs =| pdf
|
output_pdfs =| pdf
|
||||||
| File outname ->
|
| File outname ->
|
||||||
let outname = writing_ok outname in
|
|
||||||
begin match encryption with
|
begin match encryption with
|
||||||
None ->
|
None ->
|
||||||
ignore (nobble pdf);
|
ignore (nobble pdf);
|
||||||
|
@ -1880,6 +1820,8 @@ let pdf_of_stdin user_pw owner_pw =
|
||||||
let get_single_pdf op read_lazy =
|
let get_single_pdf op read_lazy =
|
||||||
match args.inputs with
|
match args.inputs with
|
||||||
| (InFile inname, _, _, u, o) as input::_ ->
|
| (InFile inname, _, _, u, o) as input::_ ->
|
||||||
|
if args.squeeze then
|
||||||
|
Printf.printf "Initial file size is %i bytes\n" (filesize inname);
|
||||||
let pdf =
|
let pdf =
|
||||||
if read_lazy then
|
if read_lazy then
|
||||||
pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname)
|
pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname)
|
||||||
|
@ -1905,6 +1847,12 @@ file once *)
|
||||||
let get_pdf_from_input_kind ((_, _, _, u, o) as input) op = function
|
let get_pdf_from_input_kind ((_, _, _, u, o) as input) op = function
|
||||||
| AlreadyInMemory pdf -> pdf
|
| AlreadyInMemory pdf -> pdf
|
||||||
| InFile s ->
|
| InFile s ->
|
||||||
|
if args.squeeze then
|
||||||
|
begin
|
||||||
|
let size = filesize s in
|
||||||
|
initial_file_size := size;
|
||||||
|
Printf.printf "Initial file size is %i bytes\n" size
|
||||||
|
end;
|
||||||
begin try Hashtbl.find filenames s with
|
begin try Hashtbl.find filenames s with
|
||||||
Not_found ->
|
Not_found ->
|
||||||
let pdf = decrypt_if_necessary input op (pdfread_pdf_of_file (optstring u) (optstring o) s) in
|
let pdf = decrypt_if_necessary input op (pdfread_pdf_of_file (optstring u) (optstring o) s) in
|
||||||
|
@ -2047,7 +1995,7 @@ let extract_images pdf range stem =
|
||||||
if images <> [] then
|
if images <> [] then
|
||||||
(let names =
|
(let names =
|
||||||
map
|
map
|
||||||
(function n -> let r = Cpdf.name_of_spec false [] (*FIXME *) pdf 0 stem n "" 0 0 in (*i flprint r; flprint "\n"; i*) r)
|
(function n -> let r = Cpdf.name_of_spec [] (*FIXME *) pdf 0 stem n "" 0 0 in (*i flprint r; flprint "\n"; i*) r)
|
||||||
(ilist 1 (length images))
|
(ilist 1 (length images))
|
||||||
in
|
in
|
||||||
iter2 (write_image pdf page.Pdfpage.resources) names images))
|
iter2 (write_image pdf page.Pdfpage.resources) names images))
|
||||||
|
@ -2492,170 +2440,6 @@ let dump_attached_files pdf out =
|
||||||
with
|
with
|
||||||
_ -> error "Couldn't dump attached files"
|
_ -> error "Couldn't dump attached files"
|
||||||
|
|
||||||
(* Prerotate a pdf *)
|
|
||||||
let prerotate_pdf pdf r =
|
|
||||||
let setto angle = Cpdf.rotate_pdf angle pdf (ilist 1 (Pdfpage.endpage pdf))
|
|
||||||
and setby angle = Cpdf.rotate_pdf_by angle pdf (ilist 1 (Pdfpage.endpage pdf)) in
|
|
||||||
match r with
|
|
||||||
| Pdfmerge.DNR -> pdf
|
|
||||||
| Pdfmerge.N -> setto 0
|
|
||||||
| Pdfmerge.S -> setto 180
|
|
||||||
| Pdfmerge.E -> setto 90
|
|
||||||
| Pdfmerge.W -> setto 270
|
|
||||||
| Pdfmerge.L -> setby ~-90
|
|
||||||
| Pdfmerge.R -> setby 90
|
|
||||||
| Pdfmerge.D -> setby 180
|
|
||||||
|
|
||||||
(* Convert from unicode or PDFDocencoded to ASCII string with HTML entities in it. *)
|
|
||||||
let html_of_unicode s =
|
|
||||||
implode
|
|
||||||
(flatten
|
|
||||||
(map
|
|
||||||
(function 60 -> explode "<"
|
|
||||||
| 62 -> explode ">"
|
|
||||||
| 38 -> explode "&"
|
|
||||||
| 34 -> explode """
|
|
||||||
| x when x >= 0x20 && x <= 0x7e -> [char_of_int x]
|
|
||||||
| x -> ['&';'#'] @ explode (string_of_int x) @ [';'])
|
|
||||||
(Pdftext.codepoints_of_pdfdocstring s)))
|
|
||||||
|
|
||||||
(* Convert from HTML entities to a PDF string which is unicode-encoded (if there are any non-ASCII chars, or PDFDocEncoded if there aren't) . *)
|
|
||||||
let unicode_of_html s =
|
|
||||||
let rec codepoints_of_html ps = function
|
|
||||||
| '&'::'l'::'t'::';'::r -> codepoints_of_html (60::ps) r
|
|
||||||
| '&'::'g'::'t'::';'::r -> codepoints_of_html (62::ps) r
|
|
||||||
| '&'::'a'::'m'::'p'::';'::r -> codepoints_of_html (38::ps) r
|
|
||||||
| '&'::'q'::'u'::'o'::'t'::';'::r -> codepoints_of_html (34::ps) r
|
|
||||||
| '&'::'#'::r ->
|
|
||||||
begin match cleavewhile (function '0'..'9' -> true | _ -> false) r with
|
|
||||||
| [], r -> codepoints_of_html ps r
|
|
||||||
| cs, (';'::r) ->
|
|
||||||
let i = try int_of_string (implode cs) with _ -> error "bad HTML literal in update_info" in
|
|
||||||
codepoints_of_html (i::ps) r
|
|
||||||
| _ -> error "bad HTML literal in update_info 2"
|
|
||||||
end
|
|
||||||
| x::r when int_of_char x >= 0x20 && int_of_char x <= 0x7e -> codepoints_of_html (int_of_char x::ps) r
|
|
||||||
| _::r -> codepoints_of_html ps r
|
|
||||||
| [] -> rev ps
|
|
||||||
in
|
|
||||||
Pdftext.pdfdocstring_of_codepoints (codepoints_of_html [] (explode s))
|
|
||||||
|
|
||||||
let dump_data pdf out =
|
|
||||||
let channel =
|
|
||||||
match out with
|
|
||||||
| NoOutputSpecified -> stdout
|
|
||||||
| Stdout -> stdout
|
|
||||||
| File f -> open_out_bin f
|
|
||||||
in
|
|
||||||
let prs s = Pervasives.output_string channel s in
|
|
||||||
(* 1. Info keys *)
|
|
||||||
begin match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
|
||||||
| Some (Pdf.Dictionary d) ->
|
|
||||||
iter
|
|
||||||
(function (name, pdfobj) ->
|
|
||||||
match pdfobj with
|
|
||||||
| Pdf.String s ->
|
|
||||||
begin match s with "" -> () | _ ->
|
|
||||||
begin match explode name with
|
|
||||||
| [] -> ()
|
|
||||||
| h::t -> prs (Printf.sprintf "InfoKey: %s\nInfoValue: %s\n" (implode t) (html_of_unicode s))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
| _ -> ())
|
|
||||||
d
|
|
||||||
| _ -> flprint "Warning: no info dictionary found\n"; ()
|
|
||||||
end;
|
|
||||||
let hex s =
|
|
||||||
fold_left ( ^ ) "" (map (Printf.sprintf "%02x") (map int_of_char (explode s)))
|
|
||||||
in
|
|
||||||
(* 2. IDs *)
|
|
||||||
begin match Pdf.lookup_direct pdf "/ID" pdf.Pdf.trailerdict with
|
|
||||||
| Some (Pdf.Array [Pdf.String s; Pdf.String t]) -> prs (Printf.sprintf "PdfID0: %s\nPdfID1: %s\n" (hex s) (hex t))
|
|
||||||
| _ -> ()
|
|
||||||
end;
|
|
||||||
(* 3. No of pages *)
|
|
||||||
prs (Printf.sprintf "NumberOfPages: %i\n" (Pdfpage.endpage pdf));
|
|
||||||
(* 4. Outlines *)
|
|
||||||
iter
|
|
||||||
(function m ->
|
|
||||||
prs (Printf.sprintf "BookmarkTitle: %s\n" (html_of_unicode m.Pdfmarks.text));
|
|
||||||
prs (Printf.sprintf "BookmarkLevel: %i\n" (m.Pdfmarks.level + 1));
|
|
||||||
prs (Printf.sprintf "BookmarkPageNumber: %i\n" (Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target)))
|
|
||||||
(Pdfmarks.read_bookmarks pdf);
|
|
||||||
(* 5. Close and finish *)
|
|
||||||
match out with File _ -> close_out channel | _ -> flush stdout
|
|
||||||
|
|
||||||
(* Parse and update info *)
|
|
||||||
let update_info pdf source =
|
|
||||||
let channel =
|
|
||||||
match source with
|
|
||||||
| "use-stdin" -> stdin
|
|
||||||
| x -> open_in_bin x
|
|
||||||
in
|
|
||||||
let rec read_lines prev channel =
|
|
||||||
try read_lines (input_line channel::prev) channel with End_of_file -> rev prev
|
|
||||||
in
|
|
||||||
let lines = read_lines [] channel in
|
|
||||||
let kvpairs =
|
|
||||||
map
|
|
||||||
(function l -> let k, v = cleavewhile (neq ':') (explode l) in implode k, implode (tail_no_fail (tail_no_fail v)))
|
|
||||||
lines
|
|
||||||
in
|
|
||||||
(*i iter
|
|
||||||
(function (k, v) -> Printf.printf "(%s,%s)\n" k v)
|
|
||||||
kvpairs; i*)
|
|
||||||
(* Split into 1) info keys / values 2) PdfIDs, Bookmarks *)
|
|
||||||
let infolines =
|
|
||||||
keep (function (("InfoKey" | "InfoValue"), _) -> true | _ -> false) kvpairs;
|
|
||||||
and pdfidlines =
|
|
||||||
keep (function (("PdfID0" | "PdfID1"), _) -> true | _ -> false) kvpairs
|
|
||||||
and bookmarklines =
|
|
||||||
keep (function (("BookmarkTitle" | "BookmarkLevel" | "BookmarkPageNumber"), _) -> true | _ -> false) kvpairs
|
|
||||||
in
|
|
||||||
(* 1. Add/Replace info keys *)
|
|
||||||
let kvpairs =
|
|
||||||
map
|
|
||||||
(function [(_, k); (_, v)] -> k, v | _ -> error "Mismatched info Key/Value pairs")
|
|
||||||
(splitinto 2 infolines)
|
|
||||||
in
|
|
||||||
let pdf =
|
|
||||||
{pdf with Pdf.trailerdict =
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info"
|
|
||||||
(Pdf.Dictionary
|
|
||||||
(fold_left
|
|
||||||
(fun d (k, v) -> add k v d)
|
|
||||||
(match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some (Pdf.Dictionary d) -> d | _ -> [])
|
|
||||||
(map (function (k, v) -> "/" ^ k, Pdf.String (unicode_of_html v)) kvpairs)))}
|
|
||||||
in
|
|
||||||
(* 2. Add/Replace PDF Id *)
|
|
||||||
let pdf =
|
|
||||||
let unhex s =
|
|
||||||
match Pdfread.lex_hexstring (Pdfio.input_of_string ("<" ^ s ^ ">")) with
|
|
||||||
| Pdfgenlex.LexString s -> s
|
|
||||||
| _ -> error "PDFId wrongly formed in update_info file"
|
|
||||||
in
|
|
||||||
match pdfidlines with
|
|
||||||
| ["PdfID0", a; "PdfID1", b] ->
|
|
||||||
{pdf with Pdf.trailerdict =
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/ID" (Pdf.Array [Pdf.String (unhex a); Pdf.String (unhex b)])}
|
|
||||||
| _ -> pdf
|
|
||||||
in
|
|
||||||
(* 3. Replace Bookmarks *)
|
|
||||||
let marks =
|
|
||||||
map
|
|
||||||
(function
|
|
||||||
| [("BookmarkTitle", a); ("BookmarkLevel", b); ("BookmarkPageNumber", c)] ->
|
|
||||||
{Pdfmarks.level = int_of_string b - 1;
|
|
||||||
Pdfmarks.text = unicode_of_html a;
|
|
||||||
Pdfmarks.target = Pdfpage.target_of_pagenumber pdf (int_of_string c);
|
|
||||||
Pdfmarks.isopen = false}
|
|
||||||
| _ -> error "Bookmark entries malformed in update_info file")
|
|
||||||
(splitinto 3 bookmarklines)
|
|
||||||
in
|
|
||||||
let pdf = Pdfmarks.add_bookmarks marks pdf in
|
|
||||||
begin match source with "use-stdin" -> () | _ -> close_in channel end;
|
|
||||||
pdf
|
|
||||||
|
|
||||||
(* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *)
|
(* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *)
|
||||||
let equalize_pages_extend main stamp =
|
let equalize_pages_extend main stamp =
|
||||||
let length_stamp = Pdfpage.endpage stamp
|
let length_stamp = Pdfpage.endpage stamp
|
||||||
|
@ -2817,7 +2601,6 @@ let go () =
|
||||||
input file, and we're just extracting pages, might we use a
|
input file, and we're just extracting pages, might we use a
|
||||||
lazy read? *)
|
lazy read? *)
|
||||||
if hd ranges <> "all" || hd rotations <> Pdfmerge.DNR || !Pdfpage.flat_pagetrees then
|
if hd ranges <> "all" || hd rotations <> Pdfmerge.DNR || !Pdfpage.flat_pagetrees then
|
||||||
let pdf = if hd rotations <> Pdfmerge.DNR then prerotate_pdf pdf (hd rotations) else pdf in
|
|
||||||
let range = parse_pagespec pdf (hd ranges) in
|
let range = parse_pagespec pdf (hd ranges) in
|
||||||
let newpdf = Pdfpage.pdf_of_pages ~retain_numbering:args.retain_numbering pdf range in
|
let newpdf = Pdfpage.pdf_of_pages ~retain_numbering:args.retain_numbering pdf range in
|
||||||
write_pdf false newpdf
|
write_pdf false newpdf
|
||||||
|
@ -3125,7 +2908,7 @@ let go () =
|
||||||
Pdfwrite.permissions = banlist_of_args ()}
|
Pdfwrite.permissions = banlist_of_args ()}
|
||||||
in
|
in
|
||||||
Cpdf.split_pdf
|
Cpdf.split_pdf
|
||||||
enc args.printf_format args.original_filename args.chunksize args.linearize args.cpdflin
|
enc args.original_filename args.chunksize args.linearize args.cpdflin
|
||||||
args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *)
|
args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *)
|
||||||
args.squeeze nobble output_spec pdf
|
args.squeeze nobble output_spec pdf
|
||||||
| _, Stdout -> error "Can't split to standard output"
|
| _, Stdout -> error "Can't split to standard output"
|
||||||
|
@ -3438,12 +3221,6 @@ let go () =
|
||||||
| _ ->
|
| _ ->
|
||||||
Printf.eprintf "CSP3: Too many input files or input not a file"
|
Printf.eprintf "CSP3: Too many input files or input not a file"
|
||||||
end
|
end
|
||||||
| Some DumpData ->
|
|
||||||
let pdf = get_single_pdf args.op true in
|
|
||||||
dump_data pdf args.out
|
|
||||||
| Some (UpdateInfo source) ->
|
|
||||||
let pdf = get_single_pdf args.op false in
|
|
||||||
write_pdf false (update_info pdf source)
|
|
||||||
| Some ExtractText ->
|
| Some ExtractText ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec pdf (get_pagespec ()) in
|
let range = parse_pagespec pdf (get_pagespec ()) in
|
||||||
|
|
Loading…
Reference in New Issue