Began to excise cpdftk support
This commit is contained in:
parent
f42cb56ea0
commit
ded4c5f22d
212
cpdf.ml
212
cpdf.ml
|
@ -251,196 +251,6 @@ let squeeze pdf =
|
|||
with
|
||||
e -> raise (Pdf.PDFError "Squeeze failed. No output written")
|
||||
|
||||
(* Printf implementation *)
|
||||
exception PrintfFailure of string
|
||||
|
||||
type convop = Ca | CA | Cc | Cd | Ce | CE | Cf | Cg | CG | Ci | Cn | Co | Cp | Cs | Cu | Cx | CX | CPercent
|
||||
|
||||
type sizespec = Sll | Sl | SL | Sh | Shh | Sj | Sz | St
|
||||
|
||||
type pformat =
|
||||
{leftjustify : bool;
|
||||
padzero : bool;
|
||||
signalways : bool;
|
||||
space : bool;
|
||||
variant : bool;
|
||||
minwidth : int option;
|
||||
precision : int option;
|
||||
sizespec : sizespec option;
|
||||
convop : convop}
|
||||
|
||||
let string_of_options f =
|
||||
(if f.leftjustify then "-" else "") ^
|
||||
(if f.padzero then "0" else "") ^
|
||||
(if f.signalways then "+" else "") ^
|
||||
(if f.space then " " else "") ^
|
||||
(if f.variant then "#" else "")
|
||||
|
||||
let string_of_minwidth = function
|
||||
| None -> ""
|
||||
| Some x -> string_of_int x
|
||||
|
||||
let string_of_precision = function
|
||||
| None -> ""
|
||||
| Some x -> "." ^ string_of_int x
|
||||
|
||||
let string_of_sizespec = function
|
||||
| None -> ""
|
||||
| Some s->
|
||||
match s with
|
||||
| Sll -> "ll" | Sl -> "l" | SL -> "L" | Sh -> "h"
|
||||
| Shh -> "hh" | Sj -> "j" | Sz -> "z" | St -> "t"
|
||||
|
||||
let string_of_convop = function
|
||||
| Ca -> "a" | CA -> "A" | Cc -> "c" | Cd -> "d"
|
||||
| Ce -> "e" | CE -> "E" | Cf -> "f" | Cg -> "g"
|
||||
| CG -> "G" | Ci -> "i" | Cn -> "n" | Co -> "o"
|
||||
| Cp -> "p" | Cs -> "s" | Cu -> "u" | Cx -> "x"
|
||||
| CX -> "X" | CPercent -> "%"
|
||||
|
||||
let string_of_format f =
|
||||
"%" ^
|
||||
string_of_options f ^
|
||||
string_of_minwidth f.minwidth ^
|
||||
string_of_precision f.precision ^
|
||||
string_of_sizespec f.sizespec ^
|
||||
string_of_convop f.convop
|
||||
|
||||
type section =
|
||||
| Format of pformat
|
||||
| String of string
|
||||
|
||||
let sec_of_format cs =
|
||||
(* 1. Read zero or more flags -, +, 0, #, <space> *)
|
||||
let cs = ref cs in
|
||||
let lj, pz, sa, sp, va, fin =
|
||||
ref false, ref false, ref false, ref false, ref false, ref false
|
||||
in
|
||||
while not !fin do
|
||||
match !cs with
|
||||
| '-'::_ -> set lj; cs := tl !cs
|
||||
| '+'::_ -> set sa; cs := tl !cs
|
||||
| '0'::_ -> set pz; cs := tl !cs
|
||||
| ' '::_ -> set sp; cs := tl !cs
|
||||
| '#'::_ -> set va; cs := tl !cs
|
||||
| _ -> set fin
|
||||
done;
|
||||
(* 2. Read a possible minimum field width *)
|
||||
let minwidth =
|
||||
let fwchars, rest = cleavewhile isdigit !cs in
|
||||
cs := rest;
|
||||
if fwchars = [] then None else Some (int_of_string (implode fwchars))
|
||||
in
|
||||
(* 3. Read an optional precision specification *)
|
||||
let precision =
|
||||
match !cs with
|
||||
| '.'::more ->
|
||||
cs := more;
|
||||
let pchars, rest = cleavewhile isdigit !cs in
|
||||
cs := rest;
|
||||
if pchars = [] then None else Some (int_of_string (implode pchars))
|
||||
| _ -> None
|
||||
in
|
||||
(* 4. Read an optional size specification *)
|
||||
let sizespec =
|
||||
match !cs with
|
||||
| 'l'::'l'::r -> cs := r; Some Sll
|
||||
| 'l'::r -> cs := r; Some Sl
|
||||
| 'L'::r -> cs := r; Some SL
|
||||
| 'h'::'h'::r -> cs := r; Some Shh
|
||||
| 'h'::r -> cs := r; Some Sh
|
||||
| 'j'::r -> cs := r; Some Sj
|
||||
| 'z'::r -> cs := r; Some Sz
|
||||
| 't'::r -> cs := r; Some St
|
||||
| _ -> None
|
||||
in
|
||||
(* 5. Read the conversion operation *)
|
||||
let convop =
|
||||
match !cs with
|
||||
| 'a'::r -> cs := r; Ca
|
||||
| 'A'::r -> cs := r; CA
|
||||
| 'c'::r -> cs := r; Cc
|
||||
| 'd'::r -> cs := r; Cd
|
||||
| 'e'::r -> cs := r; Ce
|
||||
| 'E'::r -> cs := r; CE
|
||||
| 'f'::r -> cs := r; Cf
|
||||
| 'g'::r -> cs := r; Cg
|
||||
| 'G'::r -> cs := r; CG
|
||||
| 'i'::r -> cs := r; Ci
|
||||
| 'n'::r -> cs := r; Cn
|
||||
| 'o'::r -> cs := r; Co
|
||||
| 'p'::r -> cs := r; Cp
|
||||
| 's'::r -> cs := r; Cs
|
||||
| 'u'::r -> cs := r; Cu
|
||||
| 'x'::r -> cs := r; Cx
|
||||
| 'X'::r -> cs := r; CX
|
||||
| '%'::r -> cs := r; CPercent
|
||||
| _ -> raise (Failure "sec_of_format")
|
||||
in
|
||||
{leftjustify = !lj;
|
||||
padzero = !pz;
|
||||
signalways = !sa;
|
||||
space = !sp;
|
||||
variant = !va;
|
||||
minwidth = minwidth;
|
||||
precision = precision;
|
||||
sizespec = sizespec;
|
||||
convop = convop},
|
||||
!cs
|
||||
|
||||
let rec sections_of_string_inner secs currstr = function
|
||||
| '%'::m ->
|
||||
let sec, rest = sec_of_format m in
|
||||
if currstr = []
|
||||
then sections_of_string_inner (Format sec::secs) currstr rest
|
||||
else sections_of_string_inner (Format sec::String (implode (rev currstr))::secs) [] rest
|
||||
| x::xs ->
|
||||
sections_of_string_inner secs (x::currstr) xs
|
||||
| [] ->
|
||||
if currstr = [] then rev secs else rev (String (implode (rev currstr))::secs)
|
||||
|
||||
(* Take a format string, and split it into sections *)
|
||||
let sections_of_string s =
|
||||
try
|
||||
sections_of_string_inner [] [] (explode s)
|
||||
with
|
||||
_ -> raise (PrintfFailure "Couldn't parse Printf format")
|
||||
|
||||
(* Substitute an integer into a format, returning the empty string if the format is not suitable. *)
|
||||
|
||||
(* For now, just 'd', 'u', 'i' *)
|
||||
let sub_int i f =
|
||||
(*i Printf.printf "Substituting format |%s|\n" (string_of_format f); i*)
|
||||
let str = string_of_int i
|
||||
in let padding = if f.padzero then '0' else ' ' in
|
||||
if f.minwidth <> None && String.length str < unopt f.minwidth then
|
||||
let padding = many padding (unopt f.minwidth - String.length str) in
|
||||
if f.leftjustify then str ^ implode padding else implode padding ^ str
|
||||
else
|
||||
str
|
||||
|
||||
(* Given a list of integers, substitute into integer formats *)
|
||||
let rec substitute_inner donesections sections = function
|
||||
| [] -> rev donesections @ sections
|
||||
| i::is ->
|
||||
match sections with
|
||||
| [] -> rev donesections @ sections
|
||||
| String s::more -> substitute_inner (String s::donesections) more (i::is)
|
||||
| Format f::more -> substitute_inner (String (sub_int i f)::donesections) more is
|
||||
|
||||
let substitute x =
|
||||
try substitute_inner [] x with
|
||||
_ -> raise (PrintfFailure "Failed to substitute integer")
|
||||
|
||||
(* Flatten a set of sections to a string *)
|
||||
let string_of_section = function
|
||||
| String s -> s
|
||||
| Format f -> string_of_format f
|
||||
|
||||
let string_of_sections sections =
|
||||
try fold_left ( ^ ) "" (map string_of_section sections) with
|
||||
_ -> raise (PrintfFailure "Failed to build string from Printf sections")
|
||||
|
||||
type encoding =
|
||||
| Raw
|
||||
| UTF8
|
||||
|
@ -1324,13 +1134,7 @@ let process_others marks pdf splitlevel filename sequence startpage endpage s =
|
|||
in
|
||||
implode (procss [] (explode s))
|
||||
|
||||
let name_of_spec printf marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
|
||||
if printf then
|
||||
let spec =
|
||||
string_of_sections (substitute (sections_of_string spec) [n])
|
||||
in
|
||||
process_others marks pdf splitlevel filename n startpage endpage spec
|
||||
else
|
||||
let name_of_spec marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
|
||||
let fill l n =
|
||||
let chars = explode (string_of_int n) in
|
||||
if length chars > l
|
||||
|
@ -1383,22 +1187,22 @@ let really_write_pdf ~preserve_objstm ~create_objstm ?(encryption = None) ?(cpdf
|
|||
raise (Pdf.PDFError "linearizer failed")
|
||||
end
|
||||
|
||||
let fast_write_split_pdfs enc printf splitlevel original_filename linearize ?(cpdflin = None) preserve_objstm create_objstm sq nobble spec main_pdf pagenums pdf_pages =
|
||||
let fast_write_split_pdfs enc splitlevel original_filename linearize ?(cpdflin = None) preserve_objstm create_objstm sq nobble spec main_pdf pagenums pdf_pages =
|
||||
let marks = Pdfmarks.read_bookmarks main_pdf in
|
||||
iter2
|
||||
(fun number pagenums ->
|
||||
let pdf = nobble (Pdfpage.pdf_of_pages main_pdf pagenums) in
|
||||
let startpage, endpage = extremes pagenums in
|
||||
let name = name_of_spec printf marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
|
||||
let name = name_of_spec marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
|
||||
Pdf.remove_unreferenced pdf;
|
||||
if sq then squeeze pdf;
|
||||
really_write_pdf ~preserve_objstm ~create_objstm ~encryption:enc linearize (not (enc = None)) pdf name)
|
||||
(indx pagenums)
|
||||
pagenums
|
||||
|
||||
let split_pdf enc printf original_filename chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble spec pdf =
|
||||
let split_pdf enc original_filename chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble spec pdf =
|
||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||
fast_write_split_pdfs enc printf 0 original_filename linearize preserve_objstm
|
||||
fast_write_split_pdfs enc 0 original_filename linearize preserve_objstm
|
||||
create_objstm squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages
|
||||
|
||||
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
|
||||
|
@ -1415,7 +1219,7 @@ let split_at_bookmarks original_filename linearize ~cpdflin ~preserve_objstm ~cr
|
|||
lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points)
|
||||
in
|
||||
let pts = splitat points (indx pdf_pages) in
|
||||
fast_write_split_pdfs None false level
|
||||
fast_write_split_pdfs None level
|
||||
original_filename linearize preserve_objstm create_objstm squeeze nobble spec pdf pts pdf_pages
|
||||
|
||||
(* Called from cpdflib.ml - different from above *)
|
||||
|
@ -3045,7 +2849,7 @@ let xmltree_of_bytes b =
|
|||
and data d = D d in
|
||||
Xmlm.input_doc_tree ~el ~data i
|
||||
|
||||
let rec string_of_xmltree = function
|
||||
(*let rec string_of_xmltree = function
|
||||
D d ->
|
||||
Printf.sprintf "DATA **%s**" d
|
||||
| E (tag, trees) ->
|
||||
|
@ -3067,7 +2871,7 @@ and string_of_attributes attrs =
|
|||
|
||||
and string_of_xmltrees trees =
|
||||
fold_left
|
||||
(fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
|
||||
(fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)*)
|
||||
|
||||
let rec get_data_for namespace name = function
|
||||
D _ -> None
|
||||
|
|
4
cpdf.mli
4
cpdf.mli
|
@ -90,7 +90,7 @@ endpage] makes format substitutions in [spec] to make an output file name:
|
|||
}
|
||||
[printf] is undocumented and should be set to [false].
|
||||
*)
|
||||
val name_of_spec : bool -> Pdfmarks.t list -> Pdf.t -> int -> string -> int -> string -> int -> int -> string
|
||||
val name_of_spec : Pdfmarks.t list -> Pdf.t -> int -> string -> int -> string -> int -> int -> string
|
||||
|
||||
(** {2 Compress and Decompress} *)
|
||||
|
||||
|
@ -220,7 +220,7 @@ each output is linearized. If [preserve_objstm] is true, object streams will
|
|||
be used if the input file had them. If [create_objstm] is true, object
|
||||
streams will be created in any event. [printf] and [nobble] are
|
||||
undocumented and should be false. *)
|
||||
val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool -> cpdflin:string option ->
|
||||
val split_pdf : Pdfwrite.encryption option -> string -> int -> bool -> cpdflin:string option ->
|
||||
preserve_objstm:bool -> create_objstm:bool -> squeeze:bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit
|
||||
|
||||
(** {2 Listing fonts} *)
|
||||
|
|
285
cpdfcommand.ml
285
cpdfcommand.ml
|
@ -8,6 +8,8 @@ let version_date = "(unreleased, 16th September 2014)"
|
|||
open Pdfutil
|
||||
open Pdfio
|
||||
|
||||
let initial_file_size = ref 0
|
||||
|
||||
(* Wrap up the file reading functions to exit with code 1 when an encryption
|
||||
problem occurs. This happens when object streams are in an encrypted document
|
||||
and so it can't be read without the right password... The existing error
|
||||
|
@ -144,8 +146,6 @@ type op =
|
|||
| ExtractImages
|
||||
| ImageResolution of float
|
||||
| MissingFonts
|
||||
| DumpData
|
||||
| UpdateInfo of string
|
||||
| RemoveUnusedResources
|
||||
| ExtractFontFile
|
||||
| ExtractText
|
||||
|
@ -461,25 +461,9 @@ let rec decrypt_if_necessary (a, b, c, user_pw, owner_pw) op pdf =
|
|||
| Some pdf, permissions ->
|
||||
if operation_allowed permissions op
|
||||
then pdf
|
||||
else if args.do_ask
|
||||
then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf
|
||||
else soft_error "User password cannot give permission for this operation"
|
||||
| _ ->
|
||||
if args.do_ask
|
||||
then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf
|
||||
else soft_error "Failed to decrypt file: wrong password?"
|
||||
|
||||
and decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf =
|
||||
let name = match a with InFile x -> x | StdIn -> "Standard input" | AlreadyInMemory _ -> "PDF" in
|
||||
flprint "The password supplied for input PDF:\n";
|
||||
flprint (" " ^ name);
|
||||
flprint "\n did not work. The PDF is encrypted, so you must supply the\n";
|
||||
flprint " owner password to open it. To quit, enter a blank password\n";
|
||||
flprint "Please enter the password to use on the input PDF:\n";
|
||||
flprint (" " ^ name ^ ".\n");
|
||||
match Pervasives.read_line () with
|
||||
| "" -> soft_error "Failed to decrypt file: wrong password?"
|
||||
| x -> decrypt_if_necessary (a, b, c, user_pw, x) op pdf
|
||||
soft_error "Failed to decrypt file: wrong password?"
|
||||
|
||||
let nobble pdf =
|
||||
if not demo then pdf else
|
||||
|
@ -523,19 +507,9 @@ let setmethod s =
|
|||
| _ -> error "Unsupported encryption method"
|
||||
|
||||
let setowner s =
|
||||
match s with
|
||||
| "PROMPT" ->
|
||||
flprint "Enter owner password to use on the output PDF.\n";
|
||||
args.owner <- Pervasives.read_line ()
|
||||
| s ->
|
||||
args.owner <- s
|
||||
|
||||
let setuser s =
|
||||
match s with
|
||||
| "PROMPT" ->
|
||||
flprint "Enter user password to use on the output PDF.\n";
|
||||
args.user <- Pervasives.read_line ()
|
||||
| s ->
|
||||
args.user <- s
|
||||
|
||||
let anon_fun s =
|
||||
|
@ -872,14 +846,6 @@ let setstampunder f =
|
|||
setop (StampUnder f) ();
|
||||
if args.position = Cpdf.TopLeft 100. then args.position <- Cpdf.BottomLeft 0.
|
||||
|
||||
let setstamponmulti f =
|
||||
setop (StampOn f) ();
|
||||
args.ismulti <- true
|
||||
|
||||
let setstampundermulti f =
|
||||
setop (StampUnder f) ();
|
||||
args.ismulti <- true
|
||||
|
||||
let setcombinepages f =
|
||||
setop (CombinePages f) ()
|
||||
|
||||
|
@ -1150,39 +1116,12 @@ let setkeepthisid () =
|
|||
| (InFile s, _, _, _, _)::_ -> args.keep_this_id <- Some s
|
||||
| _ -> ()
|
||||
|
||||
let setupdateinfo s =
|
||||
args.op <- Some (UpdateInfo s)
|
||||
|
||||
let setdoask () =
|
||||
args.do_ask <- true
|
||||
|
||||
let setverbose () =
|
||||
args.verbose <- true
|
||||
|
||||
let promptinputs () =
|
||||
flprint "Please enter a filename for an input PDF:\n";
|
||||
set_input (Pervasives.read_line ())
|
||||
|
||||
let promptinputpasswords () =
|
||||
flprint "Please enter the open password to use on the input PDF:\n ";
|
||||
match args.inputs with
|
||||
| (InFile s, b, c, d, _)::more ->
|
||||
flprint s;
|
||||
flprint ".\n It can be empty, or have a maximum of 32 characters:\n";
|
||||
let pw = Pervasives.read_line () in
|
||||
args.inputs <- (InFile s, b, c, d, pw)::more
|
||||
| _ -> ()
|
||||
|
||||
let promptoutput () =
|
||||
flprint "Please enter a name for the output:\n";
|
||||
args.out <- File (Pervasives.read_line ())
|
||||
|
||||
let setdontoverwriteexistingfiles () =
|
||||
args.dont_overwrite_existing_files <- true
|
||||
|
||||
let setdontoverwriteinputs () =
|
||||
args.dont_overwrite_inputs <- true
|
||||
|
||||
let setmakenewid () =
|
||||
args.makenewid <- true
|
||||
|
||||
|
@ -1736,7 +1675,7 @@ and specs =
|
|||
Arg.Int setlabelstartval,
|
||||
" Set label start value (default 1)");
|
||||
(* These items are for cpdftk *)
|
||||
("-update-info", Arg.String setupdateinfo, "");
|
||||
(*("-update-info", Arg.String setupdateinfo, "");
|
||||
("-printf-format", Arg.Unit setprintfformat, "");
|
||||
|
||||
("-dump-data", Arg.Unit (setop DumpData), "");
|
||||
|
@ -1753,7 +1692,7 @@ and specs =
|
|||
("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), "");
|
||||
("-stamp-under-multi", Arg.String setstampundermulti, "");
|
||||
("-stamp-on-multi", Arg.String setstamponmulti, "");
|
||||
("-list-annotations-more", Arg.Unit (setop ListAnnotationsMore), "");
|
||||
("-list-annotations-more", Arg.Unit (setop ListAnnotationsMore), "");*)
|
||||
(*These items are undocumented *)
|
||||
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
|
||||
("-extract-images", Arg.Unit setextractimages, "");
|
||||
|
@ -1780,20 +1719,14 @@ or \"1-6,9-end\" or \"even\" or \"odd\" or \"reverse\".\n\nOperations (See \
|
|||
manual for full details):\n"
|
||||
|
||||
(* Reading and writing *)
|
||||
let rec writing_ok outname =
|
||||
if args.dont_overwrite_inputs && mem outname !all_inputs then
|
||||
error ("Error: The output filename: " ^ outname ^"\n is the same as an input filename.\n");
|
||||
if args.dont_overwrite_existing_files && Sys.file_exists outname then
|
||||
begin
|
||||
flprint ("Output file: " ^ outname ^ " already exists. Overwrite? (y/n)\n");
|
||||
match explode (Pervasives.read_line ()) with
|
||||
| ('y' | 'Y')::_ -> outname
|
||||
| _ ->
|
||||
flprint "Enter a name for the output:\n";
|
||||
writing_ok (Pervasives.read_line ())
|
||||
end
|
||||
else
|
||||
outname
|
||||
let filesize name =
|
||||
try
|
||||
let x = open_in_bin name in
|
||||
let r = in_channel_length x in
|
||||
close_in x;
|
||||
r
|
||||
with
|
||||
_ -> 0
|
||||
|
||||
let really_write_pdf ?(encryption = None) mk_id pdf outname =
|
||||
let outname' =
|
||||
|
@ -1805,6 +1738,7 @@ let really_write_pdf ?(encryption = None) mk_id pdf outname =
|
|||
~preserve_objstm:args.preserve_objstm
|
||||
~generate_objstm:args.create_objstm
|
||||
false encryption mk_id pdf outname';
|
||||
begin
|
||||
if args.linearize then
|
||||
let cpdflin =
|
||||
match Cpdf.find_cpdflin args.cpdflin with
|
||||
|
@ -1819,6 +1753,13 @@ let really_write_pdf ?(encryption = None) mk_id pdf outname =
|
|||
begin try Sys.remove outname with _ -> () end;
|
||||
raise (Pdf.PDFError "linearizer failed")
|
||||
end
|
||||
end;
|
||||
if args.squeeze then
|
||||
let s = filesize outname in
|
||||
Printf.printf
|
||||
"Final file size is %i bytes, %.2f%% of original.\n"
|
||||
s
|
||||
((float s /. float !initial_file_size) *. 100.)
|
||||
|
||||
let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
|
||||
if args.create_objstm && not args.keepversion
|
||||
|
@ -1828,7 +1769,6 @@ let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
|
|||
| NoOutputSpecified ->
|
||||
output_pdfs =| pdf
|
||||
| File outname ->
|
||||
let outname = writing_ok outname in
|
||||
begin match encryption with
|
||||
None ->
|
||||
ignore (nobble pdf);
|
||||
|
@ -1880,6 +1820,8 @@ let pdf_of_stdin user_pw owner_pw =
|
|||
let get_single_pdf op read_lazy =
|
||||
match args.inputs with
|
||||
| (InFile inname, _, _, u, o) as input::_ ->
|
||||
if args.squeeze then
|
||||
Printf.printf "Initial file size is %i bytes\n" (filesize inname);
|
||||
let pdf =
|
||||
if read_lazy then
|
||||
pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname)
|
||||
|
@ -1905,6 +1847,12 @@ file once *)
|
|||
let get_pdf_from_input_kind ((_, _, _, u, o) as input) op = function
|
||||
| AlreadyInMemory pdf -> pdf
|
||||
| InFile s ->
|
||||
if args.squeeze then
|
||||
begin
|
||||
let size = filesize s in
|
||||
initial_file_size := size;
|
||||
Printf.printf "Initial file size is %i bytes\n" size
|
||||
end;
|
||||
begin try Hashtbl.find filenames s with
|
||||
Not_found ->
|
||||
let pdf = decrypt_if_necessary input op (pdfread_pdf_of_file (optstring u) (optstring o) s) in
|
||||
|
@ -2047,7 +1995,7 @@ let extract_images pdf range stem =
|
|||
if images <> [] then
|
||||
(let names =
|
||||
map
|
||||
(function n -> let r = Cpdf.name_of_spec false [] (*FIXME *) pdf 0 stem n "" 0 0 in (*i flprint r; flprint "\n"; i*) r)
|
||||
(function n -> let r = Cpdf.name_of_spec [] (*FIXME *) pdf 0 stem n "" 0 0 in (*i flprint r; flprint "\n"; i*) r)
|
||||
(ilist 1 (length images))
|
||||
in
|
||||
iter2 (write_image pdf page.Pdfpage.resources) names images))
|
||||
|
@ -2492,170 +2440,6 @@ let dump_attached_files pdf out =
|
|||
with
|
||||
_ -> error "Couldn't dump attached files"
|
||||
|
||||
(* Prerotate a pdf *)
|
||||
let prerotate_pdf pdf r =
|
||||
let setto angle = Cpdf.rotate_pdf angle pdf (ilist 1 (Pdfpage.endpage pdf))
|
||||
and setby angle = Cpdf.rotate_pdf_by angle pdf (ilist 1 (Pdfpage.endpage pdf)) in
|
||||
match r with
|
||||
| Pdfmerge.DNR -> pdf
|
||||
| Pdfmerge.N -> setto 0
|
||||
| Pdfmerge.S -> setto 180
|
||||
| Pdfmerge.E -> setto 90
|
||||
| Pdfmerge.W -> setto 270
|
||||
| Pdfmerge.L -> setby ~-90
|
||||
| Pdfmerge.R -> setby 90
|
||||
| Pdfmerge.D -> setby 180
|
||||
|
||||
(* Convert from unicode or PDFDocencoded to ASCII string with HTML entities in it. *)
|
||||
let html_of_unicode s =
|
||||
implode
|
||||
(flatten
|
||||
(map
|
||||
(function 60 -> explode "<"
|
||||
| 62 -> explode ">"
|
||||
| 38 -> explode "&"
|
||||
| 34 -> explode """
|
||||
| x when x >= 0x20 && x <= 0x7e -> [char_of_int x]
|
||||
| x -> ['&';'#'] @ explode (string_of_int x) @ [';'])
|
||||
(Pdftext.codepoints_of_pdfdocstring s)))
|
||||
|
||||
(* Convert from HTML entities to a PDF string which is unicode-encoded (if there are any non-ASCII chars, or PDFDocEncoded if there aren't) . *)
|
||||
let unicode_of_html s =
|
||||
let rec codepoints_of_html ps = function
|
||||
| '&'::'l'::'t'::';'::r -> codepoints_of_html (60::ps) r
|
||||
| '&'::'g'::'t'::';'::r -> codepoints_of_html (62::ps) r
|
||||
| '&'::'a'::'m'::'p'::';'::r -> codepoints_of_html (38::ps) r
|
||||
| '&'::'q'::'u'::'o'::'t'::';'::r -> codepoints_of_html (34::ps) r
|
||||
| '&'::'#'::r ->
|
||||
begin match cleavewhile (function '0'..'9' -> true | _ -> false) r with
|
||||
| [], r -> codepoints_of_html ps r
|
||||
| cs, (';'::r) ->
|
||||
let i = try int_of_string (implode cs) with _ -> error "bad HTML literal in update_info" in
|
||||
codepoints_of_html (i::ps) r
|
||||
| _ -> error "bad HTML literal in update_info 2"
|
||||
end
|
||||
| x::r when int_of_char x >= 0x20 && int_of_char x <= 0x7e -> codepoints_of_html (int_of_char x::ps) r
|
||||
| _::r -> codepoints_of_html ps r
|
||||
| [] -> rev ps
|
||||
in
|
||||
Pdftext.pdfdocstring_of_codepoints (codepoints_of_html [] (explode s))
|
||||
|
||||
let dump_data pdf out =
|
||||
let channel =
|
||||
match out with
|
||||
| NoOutputSpecified -> stdout
|
||||
| Stdout -> stdout
|
||||
| File f -> open_out_bin f
|
||||
in
|
||||
let prs s = Pervasives.output_string channel s in
|
||||
(* 1. Info keys *)
|
||||
begin match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
||||
| Some (Pdf.Dictionary d) ->
|
||||
iter
|
||||
(function (name, pdfobj) ->
|
||||
match pdfobj with
|
||||
| Pdf.String s ->
|
||||
begin match s with "" -> () | _ ->
|
||||
begin match explode name with
|
||||
| [] -> ()
|
||||
| h::t -> prs (Printf.sprintf "InfoKey: %s\nInfoValue: %s\n" (implode t) (html_of_unicode s))
|
||||
end
|
||||
end
|
||||
| _ -> ())
|
||||
d
|
||||
| _ -> flprint "Warning: no info dictionary found\n"; ()
|
||||
end;
|
||||
let hex s =
|
||||
fold_left ( ^ ) "" (map (Printf.sprintf "%02x") (map int_of_char (explode s)))
|
||||
in
|
||||
(* 2. IDs *)
|
||||
begin match Pdf.lookup_direct pdf "/ID" pdf.Pdf.trailerdict with
|
||||
| Some (Pdf.Array [Pdf.String s; Pdf.String t]) -> prs (Printf.sprintf "PdfID0: %s\nPdfID1: %s\n" (hex s) (hex t))
|
||||
| _ -> ()
|
||||
end;
|
||||
(* 3. No of pages *)
|
||||
prs (Printf.sprintf "NumberOfPages: %i\n" (Pdfpage.endpage pdf));
|
||||
(* 4. Outlines *)
|
||||
iter
|
||||
(function m ->
|
||||
prs (Printf.sprintf "BookmarkTitle: %s\n" (html_of_unicode m.Pdfmarks.text));
|
||||
prs (Printf.sprintf "BookmarkLevel: %i\n" (m.Pdfmarks.level + 1));
|
||||
prs (Printf.sprintf "BookmarkPageNumber: %i\n" (Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target)))
|
||||
(Pdfmarks.read_bookmarks pdf);
|
||||
(* 5. Close and finish *)
|
||||
match out with File _ -> close_out channel | _ -> flush stdout
|
||||
|
||||
(* Parse and update info *)
|
||||
let update_info pdf source =
|
||||
let channel =
|
||||
match source with
|
||||
| "use-stdin" -> stdin
|
||||
| x -> open_in_bin x
|
||||
in
|
||||
let rec read_lines prev channel =
|
||||
try read_lines (input_line channel::prev) channel with End_of_file -> rev prev
|
||||
in
|
||||
let lines = read_lines [] channel in
|
||||
let kvpairs =
|
||||
map
|
||||
(function l -> let k, v = cleavewhile (neq ':') (explode l) in implode k, implode (tail_no_fail (tail_no_fail v)))
|
||||
lines
|
||||
in
|
||||
(*i iter
|
||||
(function (k, v) -> Printf.printf "(%s,%s)\n" k v)
|
||||
kvpairs; i*)
|
||||
(* Split into 1) info keys / values 2) PdfIDs, Bookmarks *)
|
||||
let infolines =
|
||||
keep (function (("InfoKey" | "InfoValue"), _) -> true | _ -> false) kvpairs;
|
||||
and pdfidlines =
|
||||
keep (function (("PdfID0" | "PdfID1"), _) -> true | _ -> false) kvpairs
|
||||
and bookmarklines =
|
||||
keep (function (("BookmarkTitle" | "BookmarkLevel" | "BookmarkPageNumber"), _) -> true | _ -> false) kvpairs
|
||||
in
|
||||
(* 1. Add/Replace info keys *)
|
||||
let kvpairs =
|
||||
map
|
||||
(function [(_, k); (_, v)] -> k, v | _ -> error "Mismatched info Key/Value pairs")
|
||||
(splitinto 2 infolines)
|
||||
in
|
||||
let pdf =
|
||||
{pdf with Pdf.trailerdict =
|
||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info"
|
||||
(Pdf.Dictionary
|
||||
(fold_left
|
||||
(fun d (k, v) -> add k v d)
|
||||
(match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some (Pdf.Dictionary d) -> d | _ -> [])
|
||||
(map (function (k, v) -> "/" ^ k, Pdf.String (unicode_of_html v)) kvpairs)))}
|
||||
in
|
||||
(* 2. Add/Replace PDF Id *)
|
||||
let pdf =
|
||||
let unhex s =
|
||||
match Pdfread.lex_hexstring (Pdfio.input_of_string ("<" ^ s ^ ">")) with
|
||||
| Pdfgenlex.LexString s -> s
|
||||
| _ -> error "PDFId wrongly formed in update_info file"
|
||||
in
|
||||
match pdfidlines with
|
||||
| ["PdfID0", a; "PdfID1", b] ->
|
||||
{pdf with Pdf.trailerdict =
|
||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/ID" (Pdf.Array [Pdf.String (unhex a); Pdf.String (unhex b)])}
|
||||
| _ -> pdf
|
||||
in
|
||||
(* 3. Replace Bookmarks *)
|
||||
let marks =
|
||||
map
|
||||
(function
|
||||
| [("BookmarkTitle", a); ("BookmarkLevel", b); ("BookmarkPageNumber", c)] ->
|
||||
{Pdfmarks.level = int_of_string b - 1;
|
||||
Pdfmarks.text = unicode_of_html a;
|
||||
Pdfmarks.target = Pdfpage.target_of_pagenumber pdf (int_of_string c);
|
||||
Pdfmarks.isopen = false}
|
||||
| _ -> error "Bookmark entries malformed in update_info file")
|
||||
(splitinto 3 bookmarklines)
|
||||
in
|
||||
let pdf = Pdfmarks.add_bookmarks marks pdf in
|
||||
begin match source with "use-stdin" -> () | _ -> close_in channel end;
|
||||
pdf
|
||||
|
||||
(* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *)
|
||||
let equalize_pages_extend main stamp =
|
||||
let length_stamp = Pdfpage.endpage stamp
|
||||
|
@ -2817,7 +2601,6 @@ let go () =
|
|||
input file, and we're just extracting pages, might we use a
|
||||
lazy read? *)
|
||||
if hd ranges <> "all" || hd rotations <> Pdfmerge.DNR || !Pdfpage.flat_pagetrees then
|
||||
let pdf = if hd rotations <> Pdfmerge.DNR then prerotate_pdf pdf (hd rotations) else pdf in
|
||||
let range = parse_pagespec pdf (hd ranges) in
|
||||
let newpdf = Pdfpage.pdf_of_pages ~retain_numbering:args.retain_numbering pdf range in
|
||||
write_pdf false newpdf
|
||||
|
@ -3125,7 +2908,7 @@ let go () =
|
|||
Pdfwrite.permissions = banlist_of_args ()}
|
||||
in
|
||||
Cpdf.split_pdf
|
||||
enc args.printf_format args.original_filename args.chunksize args.linearize args.cpdflin
|
||||
enc args.original_filename args.chunksize args.linearize args.cpdflin
|
||||
args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *)
|
||||
args.squeeze nobble output_spec pdf
|
||||
| _, Stdout -> error "Can't split to standard output"
|
||||
|
@ -3438,12 +3221,6 @@ let go () =
|
|||
| _ ->
|
||||
Printf.eprintf "CSP3: Too many input files or input not a file"
|
||||
end
|
||||
| Some DumpData ->
|
||||
let pdf = get_single_pdf args.op true in
|
||||
dump_data pdf args.out
|
||||
| Some (UpdateInfo source) ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
write_pdf false (update_info pdf source)
|
||||
| Some ExtractText ->
|
||||
let pdf = get_single_pdf args.op true in
|
||||
let range = parse_pagespec pdf (get_pagespec ()) in
|
||||
|
|
Loading…
Reference in New Issue