This commit is contained in:
John Whitington 2023-04-04 16:16:04 +01:00
parent c8e808c8e9
commit d786ee2769
1 changed files with 30 additions and 40 deletions

View File

@ -3,13 +3,26 @@ open Cpdferror
(* When we transform a page by wrapping in an [Op_cm], we must also (* When we transform a page by wrapping in an [Op_cm], we must also
change any /Matrix entries in (some) pattern dictionaries, including inside xobjects *) change any /Matrix entries in (some) pattern dictionaries, including inside xobjects *)
let patterns_used pdf content resources =
let used = null_hash () in
match Pdf.lookup_direct pdf "/Pattern" resources with
| None -> used
| Some _ ->
let ops = Pdfops.parse_operators pdf resources content in
iter
(function Pdfops.Op_scnName (x, []) | Pdfops.Op_SCNName (x, []) -> Hashtbl.replace used x () | _ -> ())
ops;
used
let rec change_pattern_matrices_resources pdf tr resources names_used_with_scn = let rec change_pattern_matrices_resources pdf tr resources names_used_with_scn =
begin match Pdf.lookup_direct pdf "/XObject" resources with begin match Pdf.lookup_direct pdf "/XObject" resources with
| Some (Pdf.Dictionary elts) -> | Some (Pdf.Dictionary elts) ->
iter iter
(fun (k, v) -> (fun (k, v) ->
match v with match v with
| Pdf.Indirect i -> change_pattern_matrices_xobject pdf tr k v i | Pdf.Indirect i ->
Printf.printf "Processing form xobject %s for patterns\n" k;
change_pattern_matrices_xobject pdf tr v i
| _ -> raise (Pdf.PDFError "change_pattern_matrices_page")) | _ -> raise (Pdf.PDFError "change_pattern_matrices_page"))
elts elts
| _ -> () | _ -> ()
@ -36,51 +49,28 @@ let rec change_pattern_matrices_resources pdf tr resources names_used_with_scn =
| _ -> resources | _ -> resources
end end
and change_pattern_matrices_xobject pdf tr k v i = and change_pattern_matrices_xobject pdf tr xobj xobjnum =
match Pdf.lookup_direct pdf "/Subtype" v with match Pdf.lookup_direct pdf "/Subtype" xobj with
| Some (Pdf.Name "/Form") -> | Some (Pdf.Name "/Form") ->
Printf.printf "Processing form xobject %s for patterns\n" k; Pdfcodec.decode_pdfstream pdf xobj;
let form_xobject = Pdf.lookup_obj pdf i in let resources = match Pdf.lookup_direct pdf "/Resources" xobj with Some d -> d | None -> Pdf.Dictionary [] in
begin match Pdf.lookup_direct pdf "/Resources" form_xobject with let used = patterns_used pdf [xobj] resources in
| Some resources -> begin match Pdf.lookup_direct pdf "/Resources" xobj with
let form_xobject' = | Some resources ->
Pdf.add_dict_entry form_xobject "/Resources" (change_pattern_matrices_resources pdf tr resources (null_hash ()) (*FIXME*)) let xobj' =
in Pdf.add_dict_entry xobj "/Resources" (change_pattern_matrices_resources pdf tr resources used)
Pdf.addobj_given_num pdf (i, form_xobject') in
| _ -> () Pdf.addobj_given_num pdf (xobjnum, xobj')
end | _ -> ()
end
| _ -> () | _ -> ()
(* FIXME will we end up parsing page ops twice? Pass them to this instead, optionally re: -fast. *)
let patterns_used pdf content resources =
let used = null_hash () in
match Pdf.lookup_direct pdf "/Pattern" resources with
| None -> used
| Some _ ->
let ops = Pdfops.parse_operators pdf resources content in
iter
(function Pdfops.Op_scnName (x, []) | Pdfops.Op_SCNName (x, []) -> Hashtbl.replace used x () | _ -> ())
ops;
used
let change_pattern_matrices_page pdf tr page = let change_pattern_matrices_page pdf tr page =
let used = patterns_used pdf page.Pdfpage.content page.Pdfpage.resources in let used = patterns_used pdf page.Pdfpage.content page.Pdfpage.resources in
Printf.printf "Patterns for translation, due to being used as cs / CS"; Printf.printf "Patterns for translation, due to being used as cs / CS";
Hashtbl.iter (fun x _ -> Printf.printf "%s " x) used; Hashtbl.iter (fun x _ -> Printf.printf "%s " x) used;
Printf.printf "\n"; Printf.printf "\n";
let page =
{page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources used} {page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources used}
in
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some (Pdf.Dictionary elts) ->
iter
(fun (k, v) ->
match v with
| Pdf.Indirect i -> change_pattern_matrices_xobject pdf tr k v i
| _ -> raise (Pdf.PDFError "change_pattern_matrices_page"))
elts;
page
| _ -> page
(* Output information for each page *) (* Output information for each page *)
let output_page_info pdf range = let output_page_info pdf range =