Squeeze basics done. Encryption, xobjects to to.
This commit is contained in:
parent
274d580f98
commit
27a40e84b4
|
@ -1767,19 +1767,13 @@ let rec writing_ok outname =
|
||||||
else
|
else
|
||||||
outname
|
outname
|
||||||
|
|
||||||
(* Equality on PDF streams *)
|
(* Equality on PDF objects *)
|
||||||
let pdfobjeq pdf x y =
|
let pdfobjeq pdf x y =
|
||||||
let x = Pdf.lookup_obj pdf x
|
let x = Pdf.lookup_obj pdf x
|
||||||
and y = Pdf.lookup_obj pdf y in
|
and y = Pdf.lookup_obj pdf y in
|
||||||
begin match x with Pdf.Stream _ -> Pdf.getstream x | _ -> () end;
|
begin match x with Pdf.Stream _ -> Pdf.getstream x | _ -> () end;
|
||||||
begin match y with Pdf.Stream _ -> Pdf.getstream y | _ -> () end;
|
begin match y with Pdf.Stream _ -> Pdf.getstream y | _ -> () end;
|
||||||
match x with
|
compare x y
|
||||||
(*Pdf.Dictionary _
|
|
||||||
when
|
|
||||||
Pdf.lookup_direct pdf "/Type" x = Some (Pdf.Name "/Page")
|
|
||||||
->
|
|
||||||
(-1)*) (* FIXME *)
|
|
||||||
| _ -> compare x y
|
|
||||||
|
|
||||||
(* FIXME: We need to be able to do squeeze on encrypted files, which at the
|
(* FIXME: We need to be able to do squeeze on encrypted files, which at the
|
||||||
* moment thinks it has a permissions problem. *)
|
* moment thinks it has a permissions problem. *)
|
||||||
|
@ -1791,25 +1785,34 @@ let really_squeeze pdf =
|
||||||
(fun x -> length x > 1)
|
(fun x -> length x > 1)
|
||||||
(collate (pdfobjeq pdf) (sort (pdfobjeq pdf) !objs))
|
(collate (pdfobjeq pdf) (sort (pdfobjeq pdf) !objs))
|
||||||
in
|
in
|
||||||
(*Printf.printf "Found %i pools of duplicate objects to coalesce\n" (length toprocess);*)
|
(* Remove any pools of objects which are page objects, since Adobe Reader
|
||||||
(*List.iter
|
* gets confused when there are duplicate page objects. *)
|
||||||
(fun x -> Printf.printf "\n\nPool: "; List.iter (Printf.printf "%i ") x)
|
let toprocess =
|
||||||
toprocess;*)
|
option_map
|
||||||
let pdfr = ref pdf in
|
(function
|
||||||
let changetable = Hashtbl.create 100 in
|
[] -> assert false
|
||||||
iter
|
| h::_ as l ->
|
||||||
(function [] -> assert false | h::t ->
|
match Pdf.lookup_direct pdf "/Type" (Pdf.lookup_obj pdf h) with
|
||||||
iter (fun e -> Hashtbl.add changetable e h) t)
|
Some (Pdf.Name "/Page") -> None
|
||||||
toprocess;
|
| _ -> Some l)
|
||||||
(* For a unknown reason, the output file is much smaller if
|
toprocess
|
||||||
Pdf.renumber is run twice. This is bizarre, since Pdf.renumber is
|
in
|
||||||
an old, well-understood function in use for years -- what is
|
let pdfr = ref pdf in
|
||||||
going on? *)
|
let changetable = Hashtbl.create 100 in
|
||||||
pdfr := Pdf.renumber changetable !pdfr;
|
iter
|
||||||
pdfr := Pdf.renumber changetable !pdfr;
|
(function [] -> assert false | h::t ->
|
||||||
pdf.Pdf.root <- !pdfr.Pdf.root;
|
iter (fun e -> Hashtbl.add changetable e h) t)
|
||||||
pdf.Pdf.objects <- !pdfr.Pdf.objects;
|
toprocess;
|
||||||
pdf.Pdf.trailerdict <- !pdfr.Pdf.trailerdict
|
(* For a unknown reason, the output file is much smaller if
|
||||||
|
Pdf.renumber is run twice. This is bizarre, since Pdf.renumber is
|
||||||
|
an old, well-understood function in use for years -- what is
|
||||||
|
going on? *)
|
||||||
|
pdfr := Pdf.renumber changetable !pdfr;
|
||||||
|
pdfr := Pdf.renumber changetable !pdfr;
|
||||||
|
Pdf.remove_unreferenced !pdfr;
|
||||||
|
pdf.Pdf.root <- !pdfr.Pdf.root;
|
||||||
|
pdf.Pdf.objects <- !pdfr.Pdf.objects;
|
||||||
|
pdf.Pdf.trailerdict <- !pdfr.Pdf.trailerdict
|
||||||
|
|
||||||
(* For each object in the PDF marked with /Type /Page, for each /Contents
|
(* For each object in the PDF marked with /Type /Page, for each /Contents
|
||||||
indirect reference or array of such, decode and recode that content stream. *)
|
indirect reference or array of such, decode and recode that content stream. *)
|
||||||
|
@ -1856,16 +1859,19 @@ let squeeze_all_content_streams pdf =
|
||||||
(* We run squeeze enough times to reach a fixed point in the cardinality of the
|
(* We run squeeze enough times to reach a fixed point in the cardinality of the
|
||||||
* object map *)
|
* object map *)
|
||||||
let squeeze pdf =
|
let squeeze pdf =
|
||||||
let n = ref (Pdf.objcard pdf) in
|
try
|
||||||
Printf.printf "Beginning squeeze: %i objects\n%!" (Pdf.objcard pdf);
|
let n = ref (Pdf.objcard pdf) in
|
||||||
while !n > (ignore (really_squeeze pdf); Pdf.objcard pdf) do
|
Printf.printf "Beginning squeeze: %i objects\n%!" (Pdf.objcard pdf);
|
||||||
n := Pdf.objcard pdf;
|
while !n > (ignore (really_squeeze pdf); Pdf.objcard pdf) do
|
||||||
Printf.printf "Squeezing... Down to %i objects\n%!" (Pdf.objcard pdf);
|
n := Pdf.objcard pdf;
|
||||||
done;
|
Printf.printf "Squeezing... Down to %i objects\n%!" (Pdf.objcard pdf);
|
||||||
Printf.printf "Squeezing page data\n%!";
|
done;
|
||||||
squeeze_all_content_streams pdf;
|
Printf.printf "Squeezing page data\n%!";
|
||||||
Printf.printf "Recompressing document\n%!";
|
squeeze_all_content_streams pdf;
|
||||||
ignore (Cpdf.recompress_pdf pdf)
|
Printf.printf "Recompressing document\n%!";
|
||||||
|
ignore (Cpdf.recompress_pdf pdf)
|
||||||
|
with
|
||||||
|
e -> raise (Pdf.PDFError "Squeeze failed. No output written")
|
||||||
|
|
||||||
let write_pdf mk_id pdf =
|
let write_pdf mk_id pdf =
|
||||||
if args.create_objstm && not args.keepversion
|
if args.create_objstm && not args.keepversion
|
||||||
|
|
|
@ -9,3 +9,4 @@ val go_withargv : string array -> unit
|
||||||
(**/**)
|
(**/**)
|
||||||
val demo : bool
|
val demo : bool
|
||||||
|
|
||||||
|
val pdfobjeq : Pdf.t -> int -> int -> int
|
||||||
|
|
Loading…
Reference in New Issue