From c04627eae8625801146458d33c8b0e7baa5797a0 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Tue, 11 Apr 2023 16:08:51 +0100 Subject: [PATCH] Soft mask transformation procedure --- cpdfpage.ml | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/cpdfpage.ml b/cpdfpage.ml index 39021a6..43e3426 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -4,10 +4,80 @@ open Cpdferror (* FIXME: Need to take account of inherited resources (among Xobjects and their children - pages are regularized upon loading). Would be nice to see a failing example first though. FIXME: What would happen if a pattern was used in a transforming and non-transforming way - we - would have to depulicate - again, no failing example available. + would have to dedupulicate - again, no failing example available. FIXME: combine change_annotation_matrices, change_pattern_matrices and change_softmask_matrices into one *) +(* Transparency group soft masks appear to need altering with the inverse of + the transformation matrix. We find them all, deduplicate, and then process + in place. *) + +(* FIXME sub-xobjects! *) +(* For each xobject, look in /Resources -> /ExtGState -> /G, and get object number. *) +let rec change_softmask_matrices_xobject pdf xobject = + Printf.printf "change_softmask_matrices_xobject: %s\n" (Pdfwrite.string_of_pdf xobject); + let objnums = ref [] in + begin match xobject with + | Pdf.Indirect i -> + let dict = Pdf.lookup_obj pdf i in + begin match Pdf.lookup_direct pdf "/Resources" dict with + | Some d -> + begin match Pdf.lookup_direct pdf "/ExtGState" d with + | Some (Pdf.Dictionary extgstates) -> + Printf.printf "Found %i extgstates to examine\n" (length extgstates); + iter + (function extgstate -> + match Pdf.direct pdf extgstate with + | Pdf.Dictionary d -> + begin match lookup "/SMask" d with + | Some (Pdf.Dictionary d) -> + begin match lookup "/G" d with + | Some (Pdf.Indirect i) -> + Printf.printf "Collecting objnum %i\n" i; + objnums := i::!objnums + | _ -> () + end + | _ -> () + end + | _ -> ()) + (map snd extgstates) + | _ -> () + end + | _ -> () + end + | _ -> () + end; + let subxobjects = + match Pdf.lookup_direct pdf "/Resources" xobject with + | Some d -> + begin match Pdf.lookup_direct pdf "/XObject" d with + | Some (Pdf.Dictionary d) -> map snd d + | _ -> [] + end + | _ -> [] + in + let descendants = + flatten (map (change_softmask_matrices_xobject pdf) subxobjects) + in + descendants @ !objnums + +let change_softmask_matrices_page pdf tr page = + let xobjects = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary d) -> d + | _ -> [] + in + Printf.printf "** page has %i xobjects to process\n" (length xobjects); + let objnums = setify (flatten (map (change_softmask_matrices_xobject pdf) (map snd xobjects))) in + iter + (fun objnum -> + let dict = Pdf.lookup_obj pdf objnum in + let matrix = Pdf.parse_matrix pdf "/Matrix" dict in + let matrix' = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) matrix in + let dict = Pdf.add_dict_entry dict "/Matrix" (Pdf.make_matrix matrix') in + Pdf.addobj_given_num pdf (objnum, dict)) + objnums + (* When we transform a page by wrapping in an [Op_cm], we must also change any /Matrix entries in (some) pattern dictionaries, including inside xobjects. We only change the ones used with scn, to avoid pattern dictionaries @@ -76,15 +146,14 @@ and change_pattern_matrices_xobject pdf tr xobj xobjnum = | _ -> () let change_pattern_matrices_page pdf tr page = + change_softmask_matrices_page pdf tr page; let used = patterns_used pdf page.Pdfpage.content page.Pdfpage.resources in Printf.printf "Patterns for translation, due to being used as cs / CS"; Hashtbl.iter (fun x _ -> Printf.printf "%s " x) used; Printf.printf "\n"; {page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources used} -(* Transparency group soft masks appear to need altering with the inverse of the transformation matrix. We -find them all, deduplicate, and then process in place. *) -let change_softmask_matrices_page pdf tr page = () +(* Concatenate inverse matrix to each /Matrix entry in those object numbers. *) (* Output information for each page *) let output_page_info pdf range =