Squeeze basics done. Encryption, xobjects to to.

This commit is contained in:
John Whitington 2014-09-24 12:07:14 +01:00
parent 274d580f98
commit 27a40e84b4
2 changed files with 44 additions and 37 deletions

View File

@ -1767,19 +1767,13 @@ let rec writing_ok outname =
else else
outname outname
(* Equality on PDF streams *) (* Equality on PDF objects *)
let pdfobjeq pdf x y = let pdfobjeq pdf x y =
let x = Pdf.lookup_obj pdf x let x = Pdf.lookup_obj pdf x
and y = Pdf.lookup_obj pdf y in and y = Pdf.lookup_obj pdf y in
begin match x with Pdf.Stream _ -> Pdf.getstream x | _ -> () end; begin match x with Pdf.Stream _ -> Pdf.getstream x | _ -> () end;
begin match y with Pdf.Stream _ -> Pdf.getstream y | _ -> () end; begin match y with Pdf.Stream _ -> Pdf.getstream y | _ -> () end;
match x with compare x y
(*Pdf.Dictionary _
when
Pdf.lookup_direct pdf "/Type" x = Some (Pdf.Name "/Page")
->
(-1)*) (* FIXME *)
| _ -> compare x y
(* FIXME: We need to be able to do squeeze on encrypted files, which at the (* FIXME: We need to be able to do squeeze on encrypted files, which at the
* moment thinks it has a permissions problem. *) * moment thinks it has a permissions problem. *)
@ -1791,25 +1785,34 @@ let really_squeeze pdf =
(fun x -> length x > 1) (fun x -> length x > 1)
(collate (pdfobjeq pdf) (sort (pdfobjeq pdf) !objs)) (collate (pdfobjeq pdf) (sort (pdfobjeq pdf) !objs))
in in
(*Printf.printf "Found %i pools of duplicate objects to coalesce\n" (length toprocess);*) (* Remove any pools of objects which are page objects, since Adobe Reader
(*List.iter * gets confused when there are duplicate page objects. *)
(fun x -> Printf.printf "\n\nPool: "; List.iter (Printf.printf "%i ") x) let toprocess =
toprocess;*) option_map
let pdfr = ref pdf in (function
let changetable = Hashtbl.create 100 in [] -> assert false
iter | h::_ as l ->
(function [] -> assert false | h::t -> match Pdf.lookup_direct pdf "/Type" (Pdf.lookup_obj pdf h) with
iter (fun e -> Hashtbl.add changetable e h) t) Some (Pdf.Name "/Page") -> None
toprocess; | _ -> Some l)
(* For a unknown reason, the output file is much smaller if toprocess
Pdf.renumber is run twice. This is bizarre, since Pdf.renumber is in
an old, well-understood function in use for years -- what is let pdfr = ref pdf in
going on? *) let changetable = Hashtbl.create 100 in
pdfr := Pdf.renumber changetable !pdfr; iter
pdfr := Pdf.renumber changetable !pdfr; (function [] -> assert false | h::t ->
pdf.Pdf.root <- !pdfr.Pdf.root; iter (fun e -> Hashtbl.add changetable e h) t)
pdf.Pdf.objects <- !pdfr.Pdf.objects; toprocess;
pdf.Pdf.trailerdict <- !pdfr.Pdf.trailerdict (* For a unknown reason, the output file is much smaller if
Pdf.renumber is run twice. This is bizarre, since Pdf.renumber is
an old, well-understood function in use for years -- what is
going on? *)
pdfr := Pdf.renumber changetable !pdfr;
pdfr := Pdf.renumber changetable !pdfr;
Pdf.remove_unreferenced !pdfr;
pdf.Pdf.root <- !pdfr.Pdf.root;
pdf.Pdf.objects <- !pdfr.Pdf.objects;
pdf.Pdf.trailerdict <- !pdfr.Pdf.trailerdict
(* For each object in the PDF marked with /Type /Page, for each /Contents (* For each object in the PDF marked with /Type /Page, for each /Contents
indirect reference or array of such, decode and recode that content stream. *) indirect reference or array of such, decode and recode that content stream. *)
@ -1856,16 +1859,19 @@ let squeeze_all_content_streams pdf =
(* We run squeeze enough times to reach a fixed point in the cardinality of the (* We run squeeze enough times to reach a fixed point in the cardinality of the
* object map *) * object map *)
let squeeze pdf = let squeeze pdf =
let n = ref (Pdf.objcard pdf) in try
Printf.printf "Beginning squeeze: %i objects\n%!" (Pdf.objcard pdf); let n = ref (Pdf.objcard pdf) in
while !n > (ignore (really_squeeze pdf); Pdf.objcard pdf) do Printf.printf "Beginning squeeze: %i objects\n%!" (Pdf.objcard pdf);
n := Pdf.objcard pdf; while !n > (ignore (really_squeeze pdf); Pdf.objcard pdf) do
Printf.printf "Squeezing... Down to %i objects\n%!" (Pdf.objcard pdf); n := Pdf.objcard pdf;
done; Printf.printf "Squeezing... Down to %i objects\n%!" (Pdf.objcard pdf);
Printf.printf "Squeezing page data\n%!"; done;
squeeze_all_content_streams pdf; Printf.printf "Squeezing page data\n%!";
Printf.printf "Recompressing document\n%!"; squeeze_all_content_streams pdf;
ignore (Cpdf.recompress_pdf pdf) Printf.printf "Recompressing document\n%!";
ignore (Cpdf.recompress_pdf pdf)
with
e -> raise (Pdf.PDFError "Squeeze failed. No output written")
let write_pdf mk_id pdf = let write_pdf mk_id pdf =
if args.create_objstm && not args.keepversion if args.create_objstm && not args.keepversion

View File

@ -9,3 +9,4 @@ val go_withargv : string array -> unit
(**/**) (**/**)
val demo : bool val demo : bool
val pdfobjeq : Pdf.t -> int -> int -> int