This commit is contained in:
John Whitington 2023-04-13 16:51:11 +01:00
parent 4df1f67001
commit 64d9c94024
4 changed files with 83 additions and 76 deletions

View File

@ -7,7 +7,7 @@ DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \
cpdfembed cpdfaddtext cpdffont cpdftype cpdfpad cpdfocg \
cpdfsqueeze cpdfdraft cpdfspot cpdfpagelabels cpdfcreate cpdfannot \
cpdfxobject cpdfimpose cpdftweak cpdftexttopdf cpdftoc cpdfjpeg \
cpdfpng cpdfimage cpdfdraw cpdfcommand
cpdfpng cpdfimage cpdfdraw cpdfcomposition cpdfcommand

View File

@ -3385,80 +3385,6 @@ let warn_prerotate range pdf =
let prerotate range pdf =
Cpdfpage.upright range pdf
let find_composition_images pdf i obj marked =
match Pdf.lookup_direct pdf "/Subtype" obj with
| Some (Pdf.Name "/Image") ->
Hashtbl.add marked i ();
String.length (Pdfwrite.string_of_pdf_including_data obj);
| _ -> 0
let find_composition_fonts pdf i obj marked = 0
let find_composition_content_streams pdf i obj marked =
match Pdf.lookup_direct pdf "/Type" obj with
| Some (Pdf.Name "/Page") ->
let cs =
begin match Pdf.lookup_direct pdf "/Contents" obj with
| Some (Pdf.Indirect i) -> [i]
| Some (Pdf.Array is) -> option_map (function Pdf.Indirect i -> Some i | _ -> None) is
| _ -> []
let l = ref 0 in
(fun i ->
Hashtbl.add marked i ();
l += String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i)))
| _ -> 0
let find_composition_structure_info pdf i obj marked = 0
let find_composition_link_annotations pdf i obj marked = 0
let find_composition_embedded_files pdf i obj marked = 0
let find_composition pdf =
let marked = null_hash () in
let images = ref 0 in
let fonts = ref 0 in
let content_streams = ref 0 in
let structure_info = ref 0 in
let link_annotations = ref 0 in
let embedded_files = ref 0 in
(fun i obj ->
match Hashtbl.find marked i with _ -> () | exception Not_found ->
images += find_composition_images pdf i obj marked;
fonts += find_composition_fonts pdf i obj marked;
content_streams += find_composition_content_streams pdf i obj marked;
structure_info += find_composition_structure_info pdf i obj marked;
link_annotations += find_composition_link_annotations pdf i obj marked;
embedded_files += find_composition_embedded_files pdf i obj marked)
(!images, !fonts, !content_streams, !structure_info, !link_annotations, !embedded_files)
(* First go: images, fonts, content streams, structure info, link annotations, embedded files *)
let show_composition_json filesize pdf =
let perc x = float_of_int x /. float_of_int filesize *. 100. in
let images, fonts, content_streams, structure_info, link_annotations, embedded_files =
find_composition pdf
let r = images + fonts + content_streams + structure_info + link_annotations + embedded_files in
`List [`Tuple [`String "Images"; `Int images; `Float (perc images)];
`Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)];
`Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]]
let show_composition filesize json pdf =
let module J = Cpdfyojson.Safe in
let j = show_composition_json filesize pdf in
if json then (flprint (J.pretty_to_string j); flprint "\n") else
match j with
| `List js ->
iter (function `Tuple [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.1f%%)\n" a b c | _ -> ()) js
| _ -> ()
let embed_font () =
match args.font with
| StandardFont f ->
@ -4429,7 +4355,7 @@ let go () =
| (InFile inname, _, _, _, _, _)::_ -> filesize inname
| _ -> 0
show_composition filesize json pdf
Cpdfcomposition.show_composition filesize json pdf
(* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all. *)

80 Normal file
View File

@ -0,0 +1,80 @@
open Pdfutil
let find_composition_images pdf i obj marked =
match Pdf.lookup_direct pdf "/Subtype" obj with
| Some (Pdf.Name "/Image") ->
Hashtbl.add marked i ();
String.length (Pdfwrite.string_of_pdf_including_data obj);
| _ -> 0
let find_composition_fonts pdf i obj marked = 0
(* Also includes xobjects *)
let find_composition_content_streams pdf i obj marked =
match Pdf.lookup_direct pdf "/Type" obj with
| Some (Pdf.Name "/Page") ->
let cs =
begin match Pdf.lookup_direct pdf "/Contents" obj with
| Some (Pdf.Indirect i) -> [i]
| Some (Pdf.Array is) -> option_map (function Pdf.Indirect i -> Some i | _ -> None) is
| _ -> []
let l = ref 0 in
(fun i ->
Hashtbl.add marked i ();
l += String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i)))
| _ -> 0
let find_composition_structure_info pdf i obj marked = 0
let find_composition_link_annotations pdf i obj marked = 0
let find_composition_embedded_files pdf i obj marked = 0
let find_composition pdf =
let marked = null_hash () in
let images = ref 0 in
let fonts = ref 0 in
let content_streams = ref 0 in
let structure_info = ref 0 in
let link_annotations = ref 0 in
let embedded_files = ref 0 in
(fun i obj ->
match Hashtbl.find marked i with _ -> () | exception Not_found ->
images += find_composition_images pdf i obj marked;
fonts += find_composition_fonts pdf i obj marked;
content_streams += find_composition_content_streams pdf i obj marked;
structure_info += find_composition_structure_info pdf i obj marked;
link_annotations += find_composition_link_annotations pdf i obj marked;
embedded_files += find_composition_embedded_files pdf i obj marked)
(!images, !fonts, !content_streams, !structure_info, !link_annotations, !embedded_files)
(* First go: images, fonts, content streams, structure info, link annotations, embedded files *)
let show_composition_json filesize pdf =
let perc x = float_of_int x /. float_of_int filesize *. 100. in
let images, fonts, content_streams, structure_info, link_annotations, embedded_files =
find_composition pdf
let r = images + fonts + content_streams + structure_info + link_annotations + embedded_files in
`List [`Tuple [`String "Images"; `Int images; `Float (perc images)];
`Tuple [`String "Fonts"; `Int fonts; `Float (perc fonts)];
`Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)];
`Tuple [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)];
`Tuple [`String "Link Annotations"; `Int link_annotations; `Float (perc link_annotations)];
`Tuple [`String "Embedded Files"; `Int embedded_files; `Float (perc embedded_files)];
`Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]]
let show_composition filesize json pdf =
let module J = Cpdfyojson.Safe in
let j = show_composition_json filesize pdf in
if json then (flprint (J.pretty_to_string j); flprint "\n") else
match j with
| `List js ->
iter (function `Tuple [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.2f%%)\n" a b c | _ -> ()) js
| _ -> ()

cpdfcomposition.mli Normal file
View File

@ -0,0 +1 @@
val show_composition : int -> bool -> Pdf.t -> unit