From 84014abf5cea21bf097dac47e557e876ce430647 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 17 Jan 2025 11:30:01 +0800 Subject: [PATCH] Work on 28_005 --- cpdfimage.ml | 2 +- cpdfua.ml | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/cpdfimage.ml b/cpdfimage.ml index eef8200..3c78bf5 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -845,7 +845,7 @@ let recompress_1bpp_ccitt_lossless ~pixel_threshold ~length_threshold pdf s dict let old = !reference in let restore () = reference := old in let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in - let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in + (*let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in*) (*if w * h < pixel_threshold then (if !debug_image_processing then Printf.printf "pixel threshold not met\n%!") else*) let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in (*if size < length_threshold then (if !debug_image_processing then Printf.printf "length threshold not met\n%!") else*) diff --git a/cpdfua.ml b/cpdfua.ml index f6bb5a3..8410b27 100644 --- a/cpdfua.ml +++ b/cpdfua.ml @@ -702,8 +702,43 @@ let matterhorn_28_004 _ _ pdf = (* A form field does not have a TU entry and does not have an alternative description (in the form of an Alt entry in the enclosing structure element). *) +let get_field_object_numbers pdf = + let rec get_field_object_numbers_inner obj = + match obj with + | Pdf.Indirect i -> + (* Is this referenced item a field (rather than an annotation alone?). + If so, count it, and recurse on an /Kids. *) + begin match Pdf.lookup_direct pdf "/T" (Pdf.Indirect i) with + | Some _ -> + begin match Pdf.lookup_direct pdf "/Kids" (Pdf.Indirect i) with + | None -> [i] + | Some kids -> i::get_field_object_numbers_inner kids + end + | None -> [] + end + | _ -> + Pdfe.log "get_field_object_numbers_inner: non-indirect Kid"; + [] + in + match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/AcroForm"; "/Fields"] with + | Some (Pdf.Array toplevelfields) -> + flatten (map get_field_object_numbers_inner toplevelfields) + | _ -> + [] + let matterhorn_28_005 _ _ pdf = - unimpl () + let missing_tu = + option_map + (function objnum -> + match Pdf.lookup_direct pdf "/T" (Pdf.Indirect objnum) with + | Some _ -> None + | None -> Some objnum) + (get_field_object_numbers pdf) + in + if missing_tu = [] then () else + (* Check for alts in enclosing. We look for /StructParent (from merged annoation, not field) *) + let parent_tree = read_parent_tree pdf in + () (* An annotation with subtype undefined in ISO 32000 does not meet 7.18.1. *) let matterhorn_28_006 _ _ pdf =