diff --git a/Makefile b/Makefile index 45511c8..e971697 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \ cpdfembed cpdfaddtext cpdffont cpdftype cpdfpad cpdfocg \ cpdfsqueeze cpdfdraft cpdfspot cpdfpagelabels cpdfcreate cpdfannot \ cpdfxobject cpdfimpose cpdftweak cpdftexttopdf cpdftoc cpdfjpeg \ - cpdfpng cpdfimage cpdfdraw cpdfcommand + cpdfpng cpdfimage cpdfdraw cpdfcomposition cpdfcommand MODS = $(NONDOC) $(DOC) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 0b7f579..fb6c724 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3385,80 +3385,6 @@ let warn_prerotate range pdf = let prerotate range pdf = Cpdfpage.upright ~fast:args.fast range pdf -let find_composition_images pdf i obj marked = - match Pdf.lookup_direct pdf "/Subtype" obj with - | Some (Pdf.Name "/Image") -> - Hashtbl.add marked i (); - String.length (Pdfwrite.string_of_pdf_including_data obj); - | _ -> 0 - -let find_composition_fonts pdf i obj marked = 0 - -let find_composition_content_streams pdf i obj marked = - match Pdf.lookup_direct pdf "/Type" obj with - | Some (Pdf.Name "/Page") -> - let cs = - begin match Pdf.lookup_direct pdf "/Contents" obj with - | Some (Pdf.Indirect i) -> [i] - | Some (Pdf.Array is) -> option_map (function Pdf.Indirect i -> Some i | _ -> None) is - | _ -> [] - end - in - let l = ref 0 in - iter - (fun i -> - Hashtbl.add marked i (); - l += String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i))) - cs; - !l - | _ -> 0 - -let find_composition_structure_info pdf i obj marked = 0 - -let find_composition_link_annotations pdf i obj marked = 0 - -let find_composition_embedded_files pdf i obj marked = 0 - -let find_composition pdf = - let marked = null_hash () in - let images = ref 0 in - let fonts = ref 0 in - let content_streams = ref 0 in - let structure_info = ref 0 in - let link_annotations = ref 0 in - let embedded_files = ref 0 in - Pdf.objiter - (fun i obj -> - match Hashtbl.find marked i with _ -> () | exception Not_found -> - images += find_composition_images pdf i obj marked; - fonts += find_composition_fonts pdf i obj marked; - content_streams += find_composition_content_streams pdf i obj marked; - structure_info += find_composition_structure_info pdf i obj marked; - link_annotations += find_composition_link_annotations pdf i obj marked; - embedded_files += find_composition_embedded_files pdf i obj marked) - pdf; - (!images, !fonts, !content_streams, !structure_info, !link_annotations, !embedded_files) - -(* First go: images, fonts, content streams, structure info, link annotations, embedded files *) -let show_composition_json filesize pdf = - let perc x = float_of_int x /. float_of_int filesize *. 100. in - let images, fonts, content_streams, structure_info, link_annotations, embedded_files = - find_composition pdf - in - let r = images + fonts + content_streams + structure_info + link_annotations + embedded_files in - `List [`Tuple [`String "Images"; `Int images; `Float (perc images)]; - `Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)]; - `Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]] - -let show_composition filesize json pdf = - let module J = Cpdfyojson.Safe in - let j = show_composition_json filesize pdf in - if json then (flprint (J.pretty_to_string j); flprint "\n") else - match j with - | `List js -> - iter (function `Tuple [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.1f%%)\n" a b c | _ -> ()) js - | _ -> () - let embed_font () = match args.font with | StandardFont f -> @@ -4429,7 +4355,7 @@ let go () = | (InFile inname, _, _, _, _, _)::_ -> filesize inname | _ -> 0 in - show_composition filesize json pdf + Cpdfcomposition.show_composition filesize json pdf (* Advise the user if a combination of command line flags makes little sense, or error out if it make no sense at all. *) diff --git a/cpdfcomposition.ml b/cpdfcomposition.ml new file mode 100644 index 0000000..7eff76f --- /dev/null +++ b/cpdfcomposition.ml @@ -0,0 +1,80 @@ +open Pdfutil + +let find_composition_images pdf i obj marked = + match Pdf.lookup_direct pdf "/Subtype" obj with + | Some (Pdf.Name "/Image") -> + Hashtbl.add marked i (); + String.length (Pdfwrite.string_of_pdf_including_data obj); + | _ -> 0 + +let find_composition_fonts pdf i obj marked = 0 + +(* Also includes xobjects *) +let find_composition_content_streams pdf i obj marked = + match Pdf.lookup_direct pdf "/Type" obj with + | Some (Pdf.Name "/Page") -> + let cs = + begin match Pdf.lookup_direct pdf "/Contents" obj with + | Some (Pdf.Indirect i) -> [i] + | Some (Pdf.Array is) -> option_map (function Pdf.Indirect i -> Some i | _ -> None) is + | _ -> [] + end + in + let l = ref 0 in + iter + (fun i -> + Hashtbl.add marked i (); + l += String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i))) + cs; + !l + | _ -> 0 + +let find_composition_structure_info pdf i obj marked = 0 + +let find_composition_link_annotations pdf i obj marked = 0 + +let find_composition_embedded_files pdf i obj marked = 0 + +let find_composition pdf = + let marked = null_hash () in + let images = ref 0 in + let fonts = ref 0 in + let content_streams = ref 0 in + let structure_info = ref 0 in + let link_annotations = ref 0 in + let embedded_files = ref 0 in + Pdf.objiter + (fun i obj -> + match Hashtbl.find marked i with _ -> () | exception Not_found -> + images += find_composition_images pdf i obj marked; + fonts += find_composition_fonts pdf i obj marked; + content_streams += find_composition_content_streams pdf i obj marked; + structure_info += find_composition_structure_info pdf i obj marked; + link_annotations += find_composition_link_annotations pdf i obj marked; + embedded_files += find_composition_embedded_files pdf i obj marked) + pdf; + (!images, !fonts, !content_streams, !structure_info, !link_annotations, !embedded_files) + +(* First go: images, fonts, content streams, structure info, link annotations, embedded files *) +let show_composition_json filesize pdf = + let perc x = float_of_int x /. float_of_int filesize *. 100. in + let images, fonts, content_streams, structure_info, link_annotations, embedded_files = + find_composition pdf + in + let r = images + fonts + content_streams + structure_info + link_annotations + embedded_files in + `List [`Tuple [`String "Images"; `Int images; `Float (perc images)]; + `Tuple [`String "Fonts"; `Int fonts; `Float (perc fonts)]; + `Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)]; + `Tuple [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)]; + `Tuple [`String "Link Annotations"; `Int link_annotations; `Float (perc link_annotations)]; + `Tuple [`String "Embedded Files"; `Int embedded_files; `Float (perc embedded_files)]; + `Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]] + +let show_composition filesize json pdf = + let module J = Cpdfyojson.Safe in + let j = show_composition_json filesize pdf in + if json then (flprint (J.pretty_to_string j); flprint "\n") else + match j with + | `List js -> + iter (function `Tuple [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.2f%%)\n" a b c | _ -> ()) js + | _ -> () diff --git a/cpdfcomposition.mli b/cpdfcomposition.mli new file mode 100644 index 0000000..b380998 --- /dev/null +++ b/cpdfcomposition.mli @@ -0,0 +1 @@ +val show_composition : int -> bool -> Pdf.t -> unit