This commit is contained in:
John Whitington 2023-04-13 20:23:59 +01:00
parent 90c41b3e57
commit 0400539bea
1 changed files with 17 additions and 8 deletions

View File

@ -21,6 +21,7 @@ let find_composition_fonts pdf i obj marked =
| Some (Pdf.Name "/Font") -> | Some (Pdf.Name "/Font") ->
iter iter
(fun i -> (fun i ->
(*Printf.printf "Object %i\n%s\n" i (Pdfwrite.string_of_pdf (Pdf.lookup_obj pdf i));*)
match Hashtbl.find marked i with match Hashtbl.find marked i with
| () -> () | () -> ()
| exception Not_found -> l += size pdf i; Hashtbl.add marked i ()) | exception Not_found -> l += size pdf i; Hashtbl.add marked i ())
@ -33,16 +34,21 @@ let find_composition_content_streams pdf i obj marked =
match Hashtbl.find marked i with () -> 0 | exception Not_found -> match Hashtbl.find marked i with () -> 0 | exception Not_found ->
match Pdf.lookup_direct pdf "/Type" obj with match Pdf.lookup_direct pdf "/Type" obj with
| Some (Pdf.Name "/Page") -> | Some (Pdf.Name "/Page") ->
(*Printf.printf "Found a page...%s\n" (Pdfwrite.string_of_pdf (Pdf.direct pdf obj));*)
let cs = let cs =
begin match Pdf.lookup_direct pdf "/Contents" obj with match obj with Pdf.Dictionary d ->
begin match lookup "/Contents" d with
| Some (Pdf.Indirect i) -> [i] | Some (Pdf.Indirect i) -> [i]
| Some (Pdf.Array is) -> option_map (function Pdf.Indirect i -> Some i | _ -> None) is | Some (Pdf.Array is) -> option_map (function Pdf.Indirect i -> Some i | _ -> None) is
| _ -> [] | _ -> []
end end
| _ -> []
in in
(*Printf.printf "Found %i content streams\n" (length cs);*)
let l = ref 0 in let l = ref 0 in
iter iter
(fun i -> (fun i ->
(*Printf.printf "Considering content stream %i\n" i;*)
match Hashtbl.find marked i with match Hashtbl.find marked i with
| () -> () | () -> ()
| exception Not_found -> Hashtbl.add marked i (); l += size pdf i) | exception Not_found -> Hashtbl.add marked i (); l += size pdf i)
@ -66,12 +72,15 @@ let find_composition pdf =
let embedded_files = ref 0 in let embedded_files = ref 0 in
Pdf.objiter Pdf.objiter
(fun i obj -> (fun i obj ->
(*Printf.printf "Marked objects at beginning: ";
Hashtbl.iter (fun k () -> Printf.printf "%i " k) marked;
Printf.printf "\n";*)
match Hashtbl.find marked i with _ -> () | exception Not_found -> match Hashtbl.find marked i with _ -> () | exception Not_found ->
embedded_files += find_composition_embedded_files pdf i obj marked; (*embedded_files += find_composition_embedded_files pdf i obj marked;
images += find_composition_images pdf i obj marked; images += find_composition_images pdf i obj marked;*)
content_streams += find_composition_content_streams pdf i obj marked; content_streams += find_composition_content_streams pdf i obj marked;
structure_info += find_composition_structure_info pdf i obj marked; (*structure_info += find_composition_structure_info pdf i obj marked;
link_annotations += find_composition_link_annotations pdf i obj marked; link_annotations += find_composition_link_annotations pdf i obj marked;*)
fonts += find_composition_fonts pdf i obj marked) fonts += find_composition_fonts pdf i obj marked)
pdf; pdf;