From 5793a24e2894a4a2c00f1f9bfae563f1903e3a90 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 1 Oct 2014 12:05:13 +0100 Subject: [PATCH] -squeeze now deals with form xobjects --- cpdf.ml | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/cpdf.ml b/cpdf.ml index 1ddea61..fb5841d 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -74,10 +74,35 @@ let really_squeeze pdf = pdf.Pdf.objects <- !pdfr.Pdf.objects; pdf.Pdf.trailerdict <- !pdfr.Pdf.trailerdict -(* Squeeze the form xobject at objnum. Any resources from the page (or its -ancestors in the page tree!) are also needed - we must merge them with the -ones from the xobject itself. *) -let squeeze_form_xobjects page_resources pdf objnum = () +(* Squeeze the form xobject at objnum. FIXME: For old PDFs (< v1.2) any +resources from the page (or its ancestors in the page tree!) are also needed - +we must merge them with the ones from the xobject itself. *) +let squeeze_form_xobject pdf objnum = + let obj = Pdf.lookup_obj pdf objnum in + match Pdf.lookup_direct pdf "/Subtype" obj with + Some (Pdf.Name "/Form") -> + Printf.printf "squeeze_form_xobject at object %i\n%!" objnum; + let resources = + match Pdf.lookup_direct pdf "/Resources" obj with + Some d -> d + | None -> Pdf.Dictionary [] + in + begin match + Pdfops.stream_of_ops + (Pdfops.parse_operators pdf resources [Pdf.Indirect objnum]) + with + Pdf.Stream {contents = (_, Pdf.Got data)} -> + (* Put replacement data in original stream, and overwrite /Length *) + begin match obj with + Pdf.Stream ({contents = (d, _)} as str) -> + str := + (Pdf.add_dict_entry d "/Length" (Pdf.Integer (bytes_size data)), + Pdf.Got data) + | _ -> failwith "squeeze_form_xobject" + end + | _ -> failwith "squeeze_form_xobject" + end + | _ -> () (* For each object in the PDF marked with /Type /Page, for each /Contents indirect reference or array of such, decode and recode that content stream. *) @@ -113,9 +138,21 @@ let squeeze_all_content_streams pdf = Pdf.add_dict_entry d "/Contents" (Pdf.Indirect (Pdf.addobj pdf newstream)) in - Pdf.addobj_given_num pdf (objnum, newdict) + Pdf.addobj_given_num pdf (objnum, newdict); + (* Now process all xobjects related to this page *) + begin match Pdf.lookup_direct pdf "/XObject" resources with + Some (Pdf.Dictionary xobjs) -> + iter + (function + (_, Pdf.Indirect i) -> squeeze_form_xobject pdf i + | _ -> failwith "squeeze_xobject") + xobjs + | _ -> () + end with - (* No /Contents, which is ok. *) + (* No /Contents, which is ok. Or a parsing failure due to + uninherited resources. FIXME: Add support for inherited + resources. *) Not_found -> () end | _ -> ())