Squeeze basics done. Encryption, xobjects to to.

This commit is contained in:
John Whitington 2014-09-24 12:07:14 +01:00
parent 274d580f98
commit 27a40e84b4
2 changed files with 44 additions and 37 deletions

View File

@ -1767,19 +1767,13 @@ let rec writing_ok outname =
else else
outname outname
(* Equality on PDF streams *) (* Equality on PDF objects *)
let pdfobjeq pdf x y = let pdfobjeq pdf x y =
let x = Pdf.lookup_obj pdf x let x = Pdf.lookup_obj pdf x
and y = Pdf.lookup_obj pdf y in and y = Pdf.lookup_obj pdf y in
begin match x with Pdf.Stream _ -> Pdf.getstream x | _ -> () end; begin match x with Pdf.Stream _ -> Pdf.getstream x | _ -> () end;
begin match y with Pdf.Stream _ -> Pdf.getstream y | _ -> () end; begin match y with Pdf.Stream _ -> Pdf.getstream y | _ -> () end;
match x with compare x y
(*Pdf.Dictionary _
when
Pdf.lookup_direct pdf "/Type" x = Some (Pdf.Name "/Page")
->
(-1)*) (* FIXME *)
| _ -> compare x y
(* FIXME: We need to be able to do squeeze on encrypted files, which at the (* FIXME: We need to be able to do squeeze on encrypted files, which at the
* moment thinks it has a permissions problem. *) * moment thinks it has a permissions problem. *)
@ -1791,10 +1785,18 @@ let really_squeeze pdf =
(fun x -> length x > 1) (fun x -> length x > 1)
(collate (pdfobjeq pdf) (sort (pdfobjeq pdf) !objs)) (collate (pdfobjeq pdf) (sort (pdfobjeq pdf) !objs))
in in
(*Printf.printf "Found %i pools of duplicate objects to coalesce\n" (length toprocess);*) (* Remove any pools of objects which are page objects, since Adobe Reader
(*List.iter * gets confused when there are duplicate page objects. *)
(fun x -> Printf.printf "\n\nPool: "; List.iter (Printf.printf "%i ") x) let toprocess =
toprocess;*) option_map
(function
[] -> assert false
| h::_ as l ->
match Pdf.lookup_direct pdf "/Type" (Pdf.lookup_obj pdf h) with
Some (Pdf.Name "/Page") -> None
| _ -> Some l)
toprocess
in
let pdfr = ref pdf in let pdfr = ref pdf in
let changetable = Hashtbl.create 100 in let changetable = Hashtbl.create 100 in
iter iter
@ -1807,6 +1809,7 @@ let really_squeeze pdf =
going on? *) going on? *)
pdfr := Pdf.renumber changetable !pdfr; pdfr := Pdf.renumber changetable !pdfr;
pdfr := Pdf.renumber changetable !pdfr; pdfr := Pdf.renumber changetable !pdfr;
Pdf.remove_unreferenced !pdfr;
pdf.Pdf.root <- !pdfr.Pdf.root; pdf.Pdf.root <- !pdfr.Pdf.root;
pdf.Pdf.objects <- !pdfr.Pdf.objects; pdf.Pdf.objects <- !pdfr.Pdf.objects;
pdf.Pdf.trailerdict <- !pdfr.Pdf.trailerdict pdf.Pdf.trailerdict <- !pdfr.Pdf.trailerdict
@ -1856,6 +1859,7 @@ let squeeze_all_content_streams pdf =
(* We run squeeze enough times to reach a fixed point in the cardinality of the (* We run squeeze enough times to reach a fixed point in the cardinality of the
* object map *) * object map *)
let squeeze pdf = let squeeze pdf =
try
let n = ref (Pdf.objcard pdf) in let n = ref (Pdf.objcard pdf) in
Printf.printf "Beginning squeeze: %i objects\n%!" (Pdf.objcard pdf); Printf.printf "Beginning squeeze: %i objects\n%!" (Pdf.objcard pdf);
while !n > (ignore (really_squeeze pdf); Pdf.objcard pdf) do while !n > (ignore (really_squeeze pdf); Pdf.objcard pdf) do
@ -1866,6 +1870,8 @@ let squeeze pdf =
squeeze_all_content_streams pdf; squeeze_all_content_streams pdf;
Printf.printf "Recompressing document\n%!"; Printf.printf "Recompressing document\n%!";
ignore (Cpdf.recompress_pdf pdf) ignore (Cpdf.recompress_pdf pdf)
with
e -> raise (Pdf.PDFError "Squeeze failed. No output written")
let write_pdf mk_id pdf = let write_pdf mk_id pdf =
if args.create_objstm && not args.keepversion if args.create_objstm && not args.keepversion

View File

@ -9,3 +9,4 @@ val go_withargv : string array -> unit
(**/**) (**/**)
val demo : bool val demo : bool
val pdfobjeq : Pdf.t -> int -> int -> int