diff --git a/cpdf.ml b/cpdf.ml index 35fd853..7a1d99b 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -104,35 +104,40 @@ let really_squeeze pdf = (* Squeeze the form xobject at objnum. FIXME: For old PDFs (< v1.2) any resources from the page (or its ancestors in the page tree!) are also needed - we must merge them with the ones from the xobject itself. *) +let xobjects_done = ref [] + let squeeze_form_xobject pdf objnum = - let obj = Pdf.lookup_obj pdf objnum in - match Pdf.lookup_direct pdf "/Subtype" obj with - Some (Pdf.Name "/Form") -> - let resources = - match Pdf.lookup_direct pdf "/Resources" obj with - Some d -> d - | None -> Pdf.Dictionary [] - in - begin match - Pdfops.stream_of_ops - (Pdfops.parse_operators pdf resources [Pdf.Indirect objnum]) - with - Pdf.Stream {contents = (_, Pdf.Got data)} -> - (* Put replacement data in original stream, and overwrite /Length *) - begin match obj with - Pdf.Stream ({contents = (d, _)} as str) -> - str := - (Pdf.add_dict_entry d "/Length" (Pdf.Integer (bytes_size data)), - Pdf.Got data) - | _ -> failwith "squeeze_form_xobject" - end - | _ -> failwith "squeeze_form_xobject" - end - | _ -> () + if mem objnum !xobjects_done then () else + xobjects_done := objnum :: !xobjects_done; + let obj = Pdf.lookup_obj pdf objnum in + match Pdf.lookup_direct pdf "/Subtype" obj with + Some (Pdf.Name "/Form") -> + let resources = + match Pdf.lookup_direct pdf "/Resources" obj with + Some d -> d + | None -> Pdf.Dictionary [] + in + begin match + Pdfops.stream_of_ops + (Pdfops.parse_operators pdf resources [Pdf.Indirect objnum]) + with + Pdf.Stream {contents = (_, Pdf.Got data)} -> + (* Put replacement data in original stream, and overwrite /Length *) + begin match obj with + Pdf.Stream ({contents = (d, _)} as str) -> + str := + (Pdf.add_dict_entry d "/Length" (Pdf.Integer (bytes_size data)), + Pdf.Got data) + | _ -> failwith "squeeze_form_xobject" + end + | _ -> failwith "squeeze_form_xobject" + end + | _ -> () (* For each object in the PDF marked with /Type /Page, for each /Contents indirect reference or array of such, decode and recode that content stream. *) let squeeze_all_content_streams pdf = + xobjects_done := []; Pdf.objiter (fun objnum _ -> match Pdf.lookup_obj pdf objnum with @@ -165,8 +170,7 @@ let squeeze_all_content_streams pdf = d "/Contents" (Pdf.Indirect (Pdf.addobj pdf newstream)) in Pdf.addobj_given_num pdf (objnum, newdict); - (* Now process all xobjects related to this page - FIXME due to their shared nature, we are overdoing this work! *) + (* Now process all xobjects related to this page *) begin match Pdf.lookup_direct pdf "/XObject" resources with Some (Pdf.Dictionary xobjs) -> iter