Began to excise cpdftk support

This commit is contained in:
John Whitington 2014-10-14 19:36:57 +01:00
parent f42cb56ea0
commit ded4c5f22d
3 changed files with 89 additions and 508 deletions

212
cpdf.ml
View File

@ -251,196 +251,6 @@ let squeeze pdf =
with
e -> raise (Pdf.PDFError "Squeeze failed. No output written")
(* Printf implementation *)
exception PrintfFailure of string
type convop = Ca | CA | Cc | Cd | Ce | CE | Cf | Cg | CG | Ci | Cn | Co | Cp | Cs | Cu | Cx | CX | CPercent
type sizespec = Sll | Sl | SL | Sh | Shh | Sj | Sz | St
type pformat =
{leftjustify : bool;
padzero : bool;
signalways : bool;
space : bool;
variant : bool;
minwidth : int option;
precision : int option;
sizespec : sizespec option;
convop : convop}
let string_of_options f =
(if f.leftjustify then "-" else "") ^
(if f.padzero then "0" else "") ^
(if f.signalways then "+" else "") ^
(if f.space then " " else "") ^
(if f.variant then "#" else "")
let string_of_minwidth = function
| None -> ""
| Some x -> string_of_int x
let string_of_precision = function
| None -> ""
| Some x -> "." ^ string_of_int x
let string_of_sizespec = function
| None -> ""
| Some s->
match s with
| Sll -> "ll" | Sl -> "l" | SL -> "L" | Sh -> "h"
| Shh -> "hh" | Sj -> "j" | Sz -> "z" | St -> "t"
let string_of_convop = function
| Ca -> "a" | CA -> "A" | Cc -> "c" | Cd -> "d"
| Ce -> "e" | CE -> "E" | Cf -> "f" | Cg -> "g"
| CG -> "G" | Ci -> "i" | Cn -> "n" | Co -> "o"
| Cp -> "p" | Cs -> "s" | Cu -> "u" | Cx -> "x"
| CX -> "X" | CPercent -> "%"
let string_of_format f =
"%" ^
string_of_options f ^
string_of_minwidth f.minwidth ^
string_of_precision f.precision ^
string_of_sizespec f.sizespec ^
string_of_convop f.convop
type section =
| Format of pformat
| String of string
let sec_of_format cs =
(* 1. Read zero or more flags -, +, 0, #, <space> *)
let cs = ref cs in
let lj, pz, sa, sp, va, fin =
ref false, ref false, ref false, ref false, ref false, ref false
in
while not !fin do
match !cs with
| '-'::_ -> set lj; cs := tl !cs
| '+'::_ -> set sa; cs := tl !cs
| '0'::_ -> set pz; cs := tl !cs
| ' '::_ -> set sp; cs := tl !cs
| '#'::_ -> set va; cs := tl !cs
| _ -> set fin
done;
(* 2. Read a possible minimum field width *)
let minwidth =
let fwchars, rest = cleavewhile isdigit !cs in
cs := rest;
if fwchars = [] then None else Some (int_of_string (implode fwchars))
in
(* 3. Read an optional precision specification *)
let precision =
match !cs with
| '.'::more ->
cs := more;
let pchars, rest = cleavewhile isdigit !cs in
cs := rest;
if pchars = [] then None else Some (int_of_string (implode pchars))
| _ -> None
in
(* 4. Read an optional size specification *)
let sizespec =
match !cs with
| 'l'::'l'::r -> cs := r; Some Sll
| 'l'::r -> cs := r; Some Sl
| 'L'::r -> cs := r; Some SL
| 'h'::'h'::r -> cs := r; Some Shh
| 'h'::r -> cs := r; Some Sh
| 'j'::r -> cs := r; Some Sj
| 'z'::r -> cs := r; Some Sz
| 't'::r -> cs := r; Some St
| _ -> None
in
(* 5. Read the conversion operation *)
let convop =
match !cs with
| 'a'::r -> cs := r; Ca
| 'A'::r -> cs := r; CA
| 'c'::r -> cs := r; Cc
| 'd'::r -> cs := r; Cd
| 'e'::r -> cs := r; Ce
| 'E'::r -> cs := r; CE
| 'f'::r -> cs := r; Cf
| 'g'::r -> cs := r; Cg
| 'G'::r -> cs := r; CG
| 'i'::r -> cs := r; Ci
| 'n'::r -> cs := r; Cn
| 'o'::r -> cs := r; Co
| 'p'::r -> cs := r; Cp
| 's'::r -> cs := r; Cs
| 'u'::r -> cs := r; Cu
| 'x'::r -> cs := r; Cx
| 'X'::r -> cs := r; CX
| '%'::r -> cs := r; CPercent
| _ -> raise (Failure "sec_of_format")
in
{leftjustify = !lj;
padzero = !pz;
signalways = !sa;
space = !sp;
variant = !va;
minwidth = minwidth;
precision = precision;
sizespec = sizespec;
convop = convop},
!cs
let rec sections_of_string_inner secs currstr = function
| '%'::m ->
let sec, rest = sec_of_format m in
if currstr = []
then sections_of_string_inner (Format sec::secs) currstr rest
else sections_of_string_inner (Format sec::String (implode (rev currstr))::secs) [] rest
| x::xs ->
sections_of_string_inner secs (x::currstr) xs
| [] ->
if currstr = [] then rev secs else rev (String (implode (rev currstr))::secs)
(* Take a format string, and split it into sections *)
let sections_of_string s =
try
sections_of_string_inner [] [] (explode s)
with
_ -> raise (PrintfFailure "Couldn't parse Printf format")
(* Substitute an integer into a format, returning the empty string if the format is not suitable. *)
(* For now, just 'd', 'u', 'i' *)
let sub_int i f =
(*i Printf.printf "Substituting format |%s|\n" (string_of_format f); i*)
let str = string_of_int i
in let padding = if f.padzero then '0' else ' ' in
if f.minwidth <> None && String.length str < unopt f.minwidth then
let padding = many padding (unopt f.minwidth - String.length str) in
if f.leftjustify then str ^ implode padding else implode padding ^ str
else
str
(* Given a list of integers, substitute into integer formats *)
let rec substitute_inner donesections sections = function
| [] -> rev donesections @ sections
| i::is ->
match sections with
| [] -> rev donesections @ sections
| String s::more -> substitute_inner (String s::donesections) more (i::is)
| Format f::more -> substitute_inner (String (sub_int i f)::donesections) more is
let substitute x =
try substitute_inner [] x with
_ -> raise (PrintfFailure "Failed to substitute integer")
(* Flatten a set of sections to a string *)
let string_of_section = function
| String s -> s
| Format f -> string_of_format f
let string_of_sections sections =
try fold_left ( ^ ) "" (map string_of_section sections) with
_ -> raise (PrintfFailure "Failed to build string from Printf sections")
type encoding =
| Raw
| UTF8
@ -1324,13 +1134,7 @@ let process_others marks pdf splitlevel filename sequence startpage endpage s =
in
implode (procss [] (explode s))
let name_of_spec printf marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
if printf then
let spec =
string_of_sections (substitute (sections_of_string spec) [n])
in
process_others marks pdf splitlevel filename n startpage endpage spec
else
let name_of_spec marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
let fill l n =
let chars = explode (string_of_int n) in
if length chars > l
@ -1383,22 +1187,22 @@ let really_write_pdf ~preserve_objstm ~create_objstm ?(encryption = None) ?(cpdf
raise (Pdf.PDFError "linearizer failed")
end
let fast_write_split_pdfs enc printf splitlevel original_filename linearize ?(cpdflin = None) preserve_objstm create_objstm sq nobble spec main_pdf pagenums pdf_pages =
let fast_write_split_pdfs enc splitlevel original_filename linearize ?(cpdflin = None) preserve_objstm create_objstm sq nobble spec main_pdf pagenums pdf_pages =
let marks = Pdfmarks.read_bookmarks main_pdf in
iter2
(fun number pagenums ->
let pdf = nobble (Pdfpage.pdf_of_pages main_pdf pagenums) in
let startpage, endpage = extremes pagenums in
let name = name_of_spec printf marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
let name = name_of_spec marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in
Pdf.remove_unreferenced pdf;
if sq then squeeze pdf;
really_write_pdf ~preserve_objstm ~create_objstm ~encryption:enc linearize (not (enc = None)) pdf name)
(indx pagenums)
pagenums
let split_pdf enc printf original_filename chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble spec pdf =
let split_pdf enc original_filename chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze nobble spec pdf =
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
fast_write_split_pdfs enc printf 0 original_filename linearize preserve_objstm
fast_write_split_pdfs enc 0 original_filename linearize preserve_objstm
create_objstm squeeze nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
@ -1415,7 +1219,7 @@ let split_at_bookmarks original_filename linearize ~cpdflin ~preserve_objstm ~cr
lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points)
in
let pts = splitat points (indx pdf_pages) in
fast_write_split_pdfs None false level
fast_write_split_pdfs None level
original_filename linearize preserve_objstm create_objstm squeeze nobble spec pdf pts pdf_pages
(* Called from cpdflib.ml - different from above *)
@ -3045,7 +2849,7 @@ let xmltree_of_bytes b =
and data d = D d in
Xmlm.input_doc_tree ~el ~data i
let rec string_of_xmltree = function
(*let rec string_of_xmltree = function
D d ->
Printf.sprintf "DATA **%s**" d
| E (tag, trees) ->
@ -3067,7 +2871,7 @@ and string_of_attributes attrs =
and string_of_xmltrees trees =
fold_left
(fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
(fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)*)
let rec get_data_for namespace name = function
D _ -> None

View File

@ -90,7 +90,7 @@ endpage] makes format substitutions in [spec] to make an output file name:
}
[printf] is undocumented and should be set to [false].
*)
val name_of_spec : bool -> Pdfmarks.t list -> Pdf.t -> int -> string -> int -> string -> int -> int -> string
val name_of_spec : Pdfmarks.t list -> Pdf.t -> int -> string -> int -> string -> int -> int -> string
(** {2 Compress and Decompress} *)
@ -220,7 +220,7 @@ each output is linearized. If [preserve_objstm] is true, object streams will
be used if the input file had them. If [create_objstm] is true, object
streams will be created in any event. [printf] and [nobble] are
undocumented and should be false. *)
val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool -> cpdflin:string option ->
val split_pdf : Pdfwrite.encryption option -> string -> int -> bool -> cpdflin:string option ->
preserve_objstm:bool -> create_objstm:bool -> squeeze:bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit
(** {2 Listing fonts} *)

View File

@ -8,6 +8,8 @@ let version_date = "(unreleased, 16th September 2014)"
open Pdfutil
open Pdfio
let initial_file_size = ref 0
(* Wrap up the file reading functions to exit with code 1 when an encryption
problem occurs. This happens when object streams are in an encrypted document
and so it can't be read without the right password... The existing error
@ -144,8 +146,6 @@ type op =
| ExtractImages
| ImageResolution of float
| MissingFonts
| DumpData
| UpdateInfo of string
| RemoveUnusedResources
| ExtractFontFile
| ExtractText
@ -461,25 +461,9 @@ let rec decrypt_if_necessary (a, b, c, user_pw, owner_pw) op pdf =
| Some pdf, permissions ->
if operation_allowed permissions op
then pdf
else if args.do_ask
then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf
else soft_error "User password cannot give permission for this operation"
| _ ->
if args.do_ask
then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf
else soft_error "Failed to decrypt file: wrong password?"
and decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf =
let name = match a with InFile x -> x | StdIn -> "Standard input" | AlreadyInMemory _ -> "PDF" in
flprint "The password supplied for input PDF:\n";
flprint (" " ^ name);
flprint "\n did not work. The PDF is encrypted, so you must supply the\n";
flprint " owner password to open it. To quit, enter a blank password\n";
flprint "Please enter the password to use on the input PDF:\n";
flprint (" " ^ name ^ ".\n");
match Pervasives.read_line () with
| "" -> soft_error "Failed to decrypt file: wrong password?"
| x -> decrypt_if_necessary (a, b, c, user_pw, x) op pdf
soft_error "Failed to decrypt file: wrong password?"
let nobble pdf =
if not demo then pdf else
@ -523,19 +507,9 @@ let setmethod s =
| _ -> error "Unsupported encryption method"
let setowner s =
match s with
| "PROMPT" ->
flprint "Enter owner password to use on the output PDF.\n";
args.owner <- Pervasives.read_line ()
| s ->
args.owner <- s
let setuser s =
match s with
| "PROMPT" ->
flprint "Enter user password to use on the output PDF.\n";
args.user <- Pervasives.read_line ()
| s ->
args.user <- s
let anon_fun s =
@ -872,14 +846,6 @@ let setstampunder f =
setop (StampUnder f) ();
if args.position = Cpdf.TopLeft 100. then args.position <- Cpdf.BottomLeft 0.
let setstamponmulti f =
setop (StampOn f) ();
args.ismulti <- true
let setstampundermulti f =
setop (StampUnder f) ();
args.ismulti <- true
let setcombinepages f =
setop (CombinePages f) ()
@ -1150,39 +1116,12 @@ let setkeepthisid () =
| (InFile s, _, _, _, _)::_ -> args.keep_this_id <- Some s
| _ -> ()
let setupdateinfo s =
args.op <- Some (UpdateInfo s)
let setdoask () =
args.do_ask <- true
let setverbose () =
args.verbose <- true
let promptinputs () =
flprint "Please enter a filename for an input PDF:\n";
set_input (Pervasives.read_line ())
let promptinputpasswords () =
flprint "Please enter the open password to use on the input PDF:\n ";
match args.inputs with
| (InFile s, b, c, d, _)::more ->
flprint s;
flprint ".\n It can be empty, or have a maximum of 32 characters:\n";
let pw = Pervasives.read_line () in
args.inputs <- (InFile s, b, c, d, pw)::more
| _ -> ()
let promptoutput () =
flprint "Please enter a name for the output:\n";
args.out <- File (Pervasives.read_line ())
let setdontoverwriteexistingfiles () =
args.dont_overwrite_existing_files <- true
let setdontoverwriteinputs () =
args.dont_overwrite_inputs <- true
let setmakenewid () =
args.makenewid <- true
@ -1736,7 +1675,7 @@ and specs =
Arg.Int setlabelstartval,
" Set label start value (default 1)");
(* These items are for cpdftk *)
("-update-info", Arg.String setupdateinfo, "");
(*("-update-info", Arg.String setupdateinfo, "");
("-printf-format", Arg.Unit setprintfformat, "");
("-dump-data", Arg.Unit (setop DumpData), "");
@ -1753,7 +1692,7 @@ and specs =
("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), "");
("-stamp-under-multi", Arg.String setstampundermulti, "");
("-stamp-on-multi", Arg.String setstamponmulti, "");
("-list-annotations-more", Arg.Unit (setop ListAnnotationsMore), "");
("-list-annotations-more", Arg.Unit (setop ListAnnotationsMore), "");*)
(*These items are undocumented *)
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
("-extract-images", Arg.Unit setextractimages, "");
@ -1780,20 +1719,14 @@ or \"1-6,9-end\" or \"even\" or \"odd\" or \"reverse\".\n\nOperations (See \
manual for full details):\n"
(* Reading and writing *)
let rec writing_ok outname =
if args.dont_overwrite_inputs && mem outname !all_inputs then
error ("Error: The output filename: " ^ outname ^"\n is the same as an input filename.\n");
if args.dont_overwrite_existing_files && Sys.file_exists outname then
begin
flprint ("Output file: " ^ outname ^ " already exists. Overwrite? (y/n)\n");
match explode (Pervasives.read_line ()) with
| ('y' | 'Y')::_ -> outname
| _ ->
flprint "Enter a name for the output:\n";
writing_ok (Pervasives.read_line ())
end
else
outname
let filesize name =
try
let x = open_in_bin name in
let r = in_channel_length x in
close_in x;
r
with
_ -> 0
let really_write_pdf ?(encryption = None) mk_id pdf outname =
let outname' =
@ -1805,6 +1738,7 @@ let really_write_pdf ?(encryption = None) mk_id pdf outname =
~preserve_objstm:args.preserve_objstm
~generate_objstm:args.create_objstm
false encryption mk_id pdf outname';
begin
if args.linearize then
let cpdflin =
match Cpdf.find_cpdflin args.cpdflin with
@ -1819,6 +1753,13 @@ let really_write_pdf ?(encryption = None) mk_id pdf outname =
begin try Sys.remove outname with _ -> () end;
raise (Pdf.PDFError "linearizer failed")
end
end;
if args.squeeze then
let s = filesize outname in
Printf.printf
"Final file size is %i bytes, %.2f%% of original.\n"
s
((float s /. float !initial_file_size) *. 100.)
let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
if args.create_objstm && not args.keepversion
@ -1828,7 +1769,6 @@ let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
| NoOutputSpecified ->
output_pdfs =| pdf
| File outname ->
let outname = writing_ok outname in
begin match encryption with
None ->
ignore (nobble pdf);
@ -1880,6 +1820,8 @@ let pdf_of_stdin user_pw owner_pw =
let get_single_pdf op read_lazy =
match args.inputs with
| (InFile inname, _, _, u, o) as input::_ ->
if args.squeeze then
Printf.printf "Initial file size is %i bytes\n" (filesize inname);
let pdf =
if read_lazy then
pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname)
@ -1905,6 +1847,12 @@ file once *)
let get_pdf_from_input_kind ((_, _, _, u, o) as input) op = function
| AlreadyInMemory pdf -> pdf
| InFile s ->
if args.squeeze then
begin
let size = filesize s in
initial_file_size := size;
Printf.printf "Initial file size is %i bytes\n" size
end;
begin try Hashtbl.find filenames s with
Not_found ->
let pdf = decrypt_if_necessary input op (pdfread_pdf_of_file (optstring u) (optstring o) s) in
@ -2047,7 +1995,7 @@ let extract_images pdf range stem =
if images <> [] then
(let names =
map
(function n -> let r = Cpdf.name_of_spec false [] (*FIXME *) pdf 0 stem n "" 0 0 in (*i flprint r; flprint "\n"; i*) r)
(function n -> let r = Cpdf.name_of_spec [] (*FIXME *) pdf 0 stem n "" 0 0 in (*i flprint r; flprint "\n"; i*) r)
(ilist 1 (length images))
in
iter2 (write_image pdf page.Pdfpage.resources) names images))
@ -2492,170 +2440,6 @@ let dump_attached_files pdf out =
with
_ -> error "Couldn't dump attached files"
(* Prerotate a pdf *)
let prerotate_pdf pdf r =
let setto angle = Cpdf.rotate_pdf angle pdf (ilist 1 (Pdfpage.endpage pdf))
and setby angle = Cpdf.rotate_pdf_by angle pdf (ilist 1 (Pdfpage.endpage pdf)) in
match r with
| Pdfmerge.DNR -> pdf
| Pdfmerge.N -> setto 0
| Pdfmerge.S -> setto 180
| Pdfmerge.E -> setto 90
| Pdfmerge.W -> setto 270
| Pdfmerge.L -> setby ~-90
| Pdfmerge.R -> setby 90
| Pdfmerge.D -> setby 180
(* Convert from unicode or PDFDocencoded to ASCII string with HTML entities in it. *)
let html_of_unicode s =
implode
(flatten
(map
(function 60 -> explode "&lt;"
| 62 -> explode "&gt;"
| 38 -> explode "&amp;"
| 34 -> explode "&quot;"
| x when x >= 0x20 && x <= 0x7e -> [char_of_int x]
| x -> ['&';'#'] @ explode (string_of_int x) @ [';'])
(Pdftext.codepoints_of_pdfdocstring s)))
(* Convert from HTML entities to a PDF string which is unicode-encoded (if there are any non-ASCII chars, or PDFDocEncoded if there aren't) . *)
let unicode_of_html s =
let rec codepoints_of_html ps = function
| '&'::'l'::'t'::';'::r -> codepoints_of_html (60::ps) r
| '&'::'g'::'t'::';'::r -> codepoints_of_html (62::ps) r
| '&'::'a'::'m'::'p'::';'::r -> codepoints_of_html (38::ps) r
| '&'::'q'::'u'::'o'::'t'::';'::r -> codepoints_of_html (34::ps) r
| '&'::'#'::r ->
begin match cleavewhile (function '0'..'9' -> true | _ -> false) r with
| [], r -> codepoints_of_html ps r
| cs, (';'::r) ->
let i = try int_of_string (implode cs) with _ -> error "bad HTML literal in update_info" in
codepoints_of_html (i::ps) r
| _ -> error "bad HTML literal in update_info 2"
end
| x::r when int_of_char x >= 0x20 && int_of_char x <= 0x7e -> codepoints_of_html (int_of_char x::ps) r
| _::r -> codepoints_of_html ps r
| [] -> rev ps
in
Pdftext.pdfdocstring_of_codepoints (codepoints_of_html [] (explode s))
let dump_data pdf out =
let channel =
match out with
| NoOutputSpecified -> stdout
| Stdout -> stdout
| File f -> open_out_bin f
in
let prs s = Pervasives.output_string channel s in
(* 1. Info keys *)
begin match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
| Some (Pdf.Dictionary d) ->
iter
(function (name, pdfobj) ->
match pdfobj with
| Pdf.String s ->
begin match s with "" -> () | _ ->
begin match explode name with
| [] -> ()
| h::t -> prs (Printf.sprintf "InfoKey: %s\nInfoValue: %s\n" (implode t) (html_of_unicode s))
end
end
| _ -> ())
d
| _ -> flprint "Warning: no info dictionary found\n"; ()
end;
let hex s =
fold_left ( ^ ) "" (map (Printf.sprintf "%02x") (map int_of_char (explode s)))
in
(* 2. IDs *)
begin match Pdf.lookup_direct pdf "/ID" pdf.Pdf.trailerdict with
| Some (Pdf.Array [Pdf.String s; Pdf.String t]) -> prs (Printf.sprintf "PdfID0: %s\nPdfID1: %s\n" (hex s) (hex t))
| _ -> ()
end;
(* 3. No of pages *)
prs (Printf.sprintf "NumberOfPages: %i\n" (Pdfpage.endpage pdf));
(* 4. Outlines *)
iter
(function m ->
prs (Printf.sprintf "BookmarkTitle: %s\n" (html_of_unicode m.Pdfmarks.text));
prs (Printf.sprintf "BookmarkLevel: %i\n" (m.Pdfmarks.level + 1));
prs (Printf.sprintf "BookmarkPageNumber: %i\n" (Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target)))
(Pdfmarks.read_bookmarks pdf);
(* 5. Close and finish *)
match out with File _ -> close_out channel | _ -> flush stdout
(* Parse and update info *)
let update_info pdf source =
let channel =
match source with
| "use-stdin" -> stdin
| x -> open_in_bin x
in
let rec read_lines prev channel =
try read_lines (input_line channel::prev) channel with End_of_file -> rev prev
in
let lines = read_lines [] channel in
let kvpairs =
map
(function l -> let k, v = cleavewhile (neq ':') (explode l) in implode k, implode (tail_no_fail (tail_no_fail v)))
lines
in
(*i iter
(function (k, v) -> Printf.printf "(%s,%s)\n" k v)
kvpairs; i*)
(* Split into 1) info keys / values 2) PdfIDs, Bookmarks *)
let infolines =
keep (function (("InfoKey" | "InfoValue"), _) -> true | _ -> false) kvpairs;
and pdfidlines =
keep (function (("PdfID0" | "PdfID1"), _) -> true | _ -> false) kvpairs
and bookmarklines =
keep (function (("BookmarkTitle" | "BookmarkLevel" | "BookmarkPageNumber"), _) -> true | _ -> false) kvpairs
in
(* 1. Add/Replace info keys *)
let kvpairs =
map
(function [(_, k); (_, v)] -> k, v | _ -> error "Mismatched info Key/Value pairs")
(splitinto 2 infolines)
in
let pdf =
{pdf with Pdf.trailerdict =
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info"
(Pdf.Dictionary
(fold_left
(fun d (k, v) -> add k v d)
(match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some (Pdf.Dictionary d) -> d | _ -> [])
(map (function (k, v) -> "/" ^ k, Pdf.String (unicode_of_html v)) kvpairs)))}
in
(* 2. Add/Replace PDF Id *)
let pdf =
let unhex s =
match Pdfread.lex_hexstring (Pdfio.input_of_string ("<" ^ s ^ ">")) with
| Pdfgenlex.LexString s -> s
| _ -> error "PDFId wrongly formed in update_info file"
in
match pdfidlines with
| ["PdfID0", a; "PdfID1", b] ->
{pdf with Pdf.trailerdict =
Pdf.add_dict_entry pdf.Pdf.trailerdict "/ID" (Pdf.Array [Pdf.String (unhex a); Pdf.String (unhex b)])}
| _ -> pdf
in
(* 3. Replace Bookmarks *)
let marks =
map
(function
| [("BookmarkTitle", a); ("BookmarkLevel", b); ("BookmarkPageNumber", c)] ->
{Pdfmarks.level = int_of_string b - 1;
Pdfmarks.text = unicode_of_html a;
Pdfmarks.target = Pdfpage.target_of_pagenumber pdf (int_of_string c);
Pdfmarks.isopen = false}
| _ -> error "Bookmark entries malformed in update_info file")
(splitinto 3 bookmarklines)
in
let pdf = Pdfmarks.add_bookmarks marks pdf in
begin match source with "use-stdin" -> () | _ -> close_in channel end;
pdf
(* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *)
let equalize_pages_extend main stamp =
let length_stamp = Pdfpage.endpage stamp
@ -2817,7 +2601,6 @@ let go () =
input file, and we're just extracting pages, might we use a
lazy read? *)
if hd ranges <> "all" || hd rotations <> Pdfmerge.DNR || !Pdfpage.flat_pagetrees then
let pdf = if hd rotations <> Pdfmerge.DNR then prerotate_pdf pdf (hd rotations) else pdf in
let range = parse_pagespec pdf (hd ranges) in
let newpdf = Pdfpage.pdf_of_pages ~retain_numbering:args.retain_numbering pdf range in
write_pdf false newpdf
@ -3125,7 +2908,7 @@ let go () =
Pdfwrite.permissions = banlist_of_args ()}
in
Cpdf.split_pdf
enc args.printf_format args.original_filename args.chunksize args.linearize args.cpdflin
enc args.original_filename args.chunksize args.linearize args.cpdflin
args.preserve_objstm args.preserve_objstm (*yes--always create if preserving *)
args.squeeze nobble output_spec pdf
| _, Stdout -> error "Can't split to standard output"
@ -3438,12 +3221,6 @@ let go () =
| _ ->
Printf.eprintf "CSP3: Too many input files or input not a file"
end
| Some DumpData ->
let pdf = get_single_pdf args.op true in
dump_data pdf args.out
| Some (UpdateInfo source) ->
let pdf = get_single_pdf args.op false in
write_pdf false (update_info pdf source)
| Some ExtractText ->
let pdf = get_single_pdf args.op true in
let range = parse_pagespec pdf (get_pagespec ()) in