Squeeze now doesn't do too much work on duplicate references to xobjects

This commit is contained in:
John Whitington 2014-10-08 17:03:13 +01:00
parent 9a2ccc39e0
commit 6bb8715f23
1 changed files with 30 additions and 26 deletions

56
cpdf.ml
View File

@ -104,35 +104,40 @@ let really_squeeze pdf =
(* Squeeze the form xobject at objnum. FIXME: For old PDFs (< v1.2) any (* Squeeze the form xobject at objnum. FIXME: For old PDFs (< v1.2) any
resources from the page (or its ancestors in the page tree!) are also needed - resources from the page (or its ancestors in the page tree!) are also needed -
we must merge them with the ones from the xobject itself. *) we must merge them with the ones from the xobject itself. *)
let xobjects_done = ref []
let squeeze_form_xobject pdf objnum = let squeeze_form_xobject pdf objnum =
let obj = Pdf.lookup_obj pdf objnum in if mem objnum !xobjects_done then () else
match Pdf.lookup_direct pdf "/Subtype" obj with xobjects_done := objnum :: !xobjects_done;
Some (Pdf.Name "/Form") -> let obj = Pdf.lookup_obj pdf objnum in
let resources = match Pdf.lookup_direct pdf "/Subtype" obj with
match Pdf.lookup_direct pdf "/Resources" obj with Some (Pdf.Name "/Form") ->
Some d -> d let resources =
| None -> Pdf.Dictionary [] match Pdf.lookup_direct pdf "/Resources" obj with
in Some d -> d
begin match | None -> Pdf.Dictionary []
Pdfops.stream_of_ops in
(Pdfops.parse_operators pdf resources [Pdf.Indirect objnum]) begin match
with Pdfops.stream_of_ops
Pdf.Stream {contents = (_, Pdf.Got data)} -> (Pdfops.parse_operators pdf resources [Pdf.Indirect objnum])
(* Put replacement data in original stream, and overwrite /Length *) with
begin match obj with Pdf.Stream {contents = (_, Pdf.Got data)} ->
Pdf.Stream ({contents = (d, _)} as str) -> (* Put replacement data in original stream, and overwrite /Length *)
str := begin match obj with
(Pdf.add_dict_entry d "/Length" (Pdf.Integer (bytes_size data)), Pdf.Stream ({contents = (d, _)} as str) ->
Pdf.Got data) str :=
| _ -> failwith "squeeze_form_xobject" (Pdf.add_dict_entry d "/Length" (Pdf.Integer (bytes_size data)),
end Pdf.Got data)
| _ -> failwith "squeeze_form_xobject" | _ -> failwith "squeeze_form_xobject"
end end
| _ -> () | _ -> failwith "squeeze_form_xobject"
end
| _ -> ()
(* For each object in the PDF marked with /Type /Page, for each /Contents (* For each object in the PDF marked with /Type /Page, for each /Contents
indirect reference or array of such, decode and recode that content stream. *) indirect reference or array of such, decode and recode that content stream. *)
let squeeze_all_content_streams pdf = let squeeze_all_content_streams pdf =
xobjects_done := [];
Pdf.objiter Pdf.objiter
(fun objnum _ -> (fun objnum _ ->
match Pdf.lookup_obj pdf objnum with match Pdf.lookup_obj pdf objnum with
@ -165,8 +170,7 @@ let squeeze_all_content_streams pdf =
d "/Contents" (Pdf.Indirect (Pdf.addobj pdf newstream)) d "/Contents" (Pdf.Indirect (Pdf.addobj pdf newstream))
in in
Pdf.addobj_given_num pdf (objnum, newdict); Pdf.addobj_given_num pdf (objnum, newdict);
(* Now process all xobjects related to this page (* Now process all xobjects related to this page *)
FIXME due to their shared nature, we are overdoing this work! *)
begin match Pdf.lookup_direct pdf "/XObject" resources with begin match Pdf.lookup_direct pdf "/XObject" resources with
Some (Pdf.Dictionary xobjs) -> Some (Pdf.Dictionary xobjs) ->
iter iter