This commit is contained in:
John Whitington 2023-04-13 19:11:29 +01:00
parent 96d6e2472d
commit 90c41b3e57
1 changed files with 34 additions and 8 deletions

View File

@ -1,16 +1,36 @@
open Pdfutil open Pdfutil
let size pdf i =
String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i))
(* FIXME Add soft masks *)
let find_composition_images pdf i obj marked = let find_composition_images pdf i obj marked =
match Hashtbl.find marked i with () -> 0 | exception Not_found ->
match Pdf.lookup_direct pdf "/Subtype" obj with match Pdf.lookup_direct pdf "/Subtype" obj with
| Some (Pdf.Name "/Image") -> | Some (Pdf.Name "/Image") ->
Hashtbl.add marked i (); Hashtbl.add marked i ();
String.length (Pdfwrite.string_of_pdf_including_data obj); String.length (Pdfwrite.string_of_pdf_including_data obj)
| _ -> 0 | _ -> 0
let find_composition_fonts pdf i obj marked = 0 (* If it has /Font, find all objects referenced from it, and add
any not already marked to the count *)
let find_composition_fonts pdf i obj marked =
match Hashtbl.find marked i with () -> 0 | exception Not_found ->
let l = ref 0 in
match Pdf.lookup_direct pdf "/Type" obj with
| Some (Pdf.Name "/Font") ->
iter
(fun i ->
match Hashtbl.find marked i with
| () -> ()
| exception Not_found -> l += size pdf i; Hashtbl.add marked i ())
(Pdf.objects_referenced [] [] pdf (Pdf.Indirect i));
!l
| _ -> 0
(* Also includes xobjects *) (* FIXME: Add xobjects *)
let find_composition_content_streams pdf i obj marked = let find_composition_content_streams pdf i obj marked =
match Hashtbl.find marked i with () -> 0 | exception Not_found ->
match Pdf.lookup_direct pdf "/Type" obj with match Pdf.lookup_direct pdf "/Type" obj with
| Some (Pdf.Name "/Page") -> | Some (Pdf.Name "/Page") ->
let cs = let cs =
@ -23,8 +43,9 @@ let find_composition_content_streams pdf i obj marked =
let l = ref 0 in let l = ref 0 in
iter iter
(fun i -> (fun i ->
Hashtbl.add marked i (); match Hashtbl.find marked i with
l += String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i))) | () -> ()
| exception Not_found -> Hashtbl.add marked i (); l += size pdf i)
cs; cs;
!l !l
| _ -> 0 | _ -> 0
@ -46,12 +67,13 @@ let find_composition pdf =
Pdf.objiter Pdf.objiter
(fun i obj -> (fun i obj ->
match Hashtbl.find marked i with _ -> () | exception Not_found -> match Hashtbl.find marked i with _ -> () | exception Not_found ->
embedded_files += find_composition_embedded_files pdf i obj marked;
images += find_composition_images pdf i obj marked; images += find_composition_images pdf i obj marked;
fonts += find_composition_fonts pdf i obj marked;
content_streams += find_composition_content_streams pdf i obj marked; content_streams += find_composition_content_streams pdf i obj marked;
structure_info += find_composition_structure_info pdf i obj marked; structure_info += find_composition_structure_info pdf i obj marked;
link_annotations += find_composition_link_annotations pdf i obj marked; link_annotations += find_composition_link_annotations pdf i obj marked;
embedded_files += find_composition_embedded_files pdf i obj marked) fonts += find_composition_fonts pdf i obj marked)
pdf; pdf;
(!images, !fonts, !content_streams, !structure_info, !link_annotations, !embedded_files) (!images, !fonts, !content_streams, !structure_info, !link_annotations, !embedded_files)
@ -76,5 +98,9 @@ let show_composition filesize json pdf =
if json then (flprint (J.pretty_to_string j); flprint "\n") else if json then (flprint (J.pretty_to_string j); flprint "\n") else
match j with match j with
| `List js -> | `List js ->
iter (function `Tuple [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.2f%%)\n" a b c | _ -> ()) js iter
(function
| `Tuple [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.2f%%)\n" a b c
| _ -> ())
js
| _ -> () | _ -> ()