more
This commit is contained in:
parent
e54868a64b
commit
cc74b3fc69
|
@ -302,3 +302,48 @@ let dump_attached_files pdf out =
|
||||||
iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf)
|
iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf)
|
||||||
with
|
with
|
||||||
e -> error (Printf.sprintf "Couldn't dump attached files: %s\n" (Printexc.to_string e))
|
e -> error (Printf.sprintf "Couldn't dump attached files: %s\n" (Printexc.to_string e))
|
||||||
|
|
||||||
|
let size_attachment pdf (_, embeddedfile) =
|
||||||
|
match Pdf.lookup_direct pdf "/F" embeddedfile with
|
||||||
|
| Some (Pdf.String s) ->
|
||||||
|
begin match Pdf.lookup_direct pdf "/EF" embeddedfile with
|
||||||
|
| Some d ->
|
||||||
|
let stream =
|
||||||
|
match Pdf.lookup_direct pdf "/F" d with
|
||||||
|
| Some s -> s
|
||||||
|
| None -> error "Bad embedded file stream"
|
||||||
|
in
|
||||||
|
begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> bytes_size b | _ -> error "Bad embedded file stream" end
|
||||||
|
| _ -> error "Bad embedded file stream"
|
||||||
|
end
|
||||||
|
| _ -> 0
|
||||||
|
|
||||||
|
let size_page_files pdf page =
|
||||||
|
let annots =
|
||||||
|
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
||||||
|
| Some (Pdf.Array l) -> l
|
||||||
|
| _ -> []
|
||||||
|
in
|
||||||
|
let efannots =
|
||||||
|
keep
|
||||||
|
(fun annot ->
|
||||||
|
match Pdf.lookup_direct pdf "/Subtype" annot with
|
||||||
|
| Some (Pdf.Name "/FileAttachment") -> true
|
||||||
|
| _ -> false)
|
||||||
|
annots
|
||||||
|
in
|
||||||
|
let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in
|
||||||
|
map (size_attachment pdf) (map (fun x -> 0, x) fsannots)
|
||||||
|
|
||||||
|
let size_document_files pdf =
|
||||||
|
let root = Pdf.lookup_obj pdf pdf.Pdf.root in
|
||||||
|
let names =
|
||||||
|
match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary []
|
||||||
|
in
|
||||||
|
match Pdf.lookup_direct pdf "/EmbeddedFiles" names with
|
||||||
|
| Some x ->
|
||||||
|
sum (map (size_attachment pdf) (Pdf.contents_of_nametree pdf x))
|
||||||
|
| None -> 0
|
||||||
|
|
||||||
|
let size_attached_files pdf =
|
||||||
|
size_document_files pdf + sum (flatten (map (size_page_files pdf) (Pdfpage.pages_of_pagetree pdf)))
|
||||||
|
|
|
@ -22,3 +22,6 @@ val list_attached_files : Pdf.t -> attachment list
|
||||||
|
|
||||||
(** Dump attached files to a given directory. *)
|
(** Dump attached files to a given directory. *)
|
||||||
val dump_attached_files : Pdf.t -> string -> unit
|
val dump_attached_files : Pdf.t -> string -> unit
|
||||||
|
|
||||||
|
(** Total size in bytes of all attached files. *)
|
||||||
|
val size_attached_files : Pdf.t -> int
|
||||||
|
|
|
@ -73,14 +73,11 @@ let find_composition_content_streams pdf i obj marked =
|
||||||
[i]
|
[i]
|
||||||
| _ -> []
|
| _ -> []
|
||||||
|
|
||||||
let find_composition_embedded_files pdf i obj marked = []
|
|
||||||
|
|
||||||
let find_composition pdf =
|
let find_composition pdf =
|
||||||
let marked = null_hash () in
|
let marked = null_hash () in
|
||||||
let images = ref [] in
|
let images = ref [] in
|
||||||
let fonts = ref [] in
|
let fonts = ref [] in
|
||||||
let content_streams = ref [] in
|
let content_streams = ref [] in
|
||||||
let embedded_files = ref [] in
|
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun i obj ->
|
(fun i obj ->
|
||||||
(*Printf.printf "Looking at object %i\n" i;
|
(*Printf.printf "Looking at object %i\n" i;
|
||||||
|
@ -89,13 +86,12 @@ let find_composition pdf =
|
||||||
Hashtbl.iter (fun k () -> Printf.printf "%i " k) marked;
|
Hashtbl.iter (fun k () -> Printf.printf "%i " k) marked;
|
||||||
Printf.printf "\n";*)
|
Printf.printf "\n";*)
|
||||||
match Hashtbl.find marked i with _ -> () | exception Not_found ->
|
match Hashtbl.find marked i with _ -> () | exception Not_found ->
|
||||||
embedded_files := find_composition_embedded_files pdf i obj marked @ !embedded_files;
|
|
||||||
images := find_composition_images pdf i obj marked @ !images;
|
images := find_composition_images pdf i obj marked @ !images;
|
||||||
content_streams := find_composition_content_streams pdf i obj marked @ !content_streams;
|
content_streams := find_composition_content_streams pdf i obj marked @ !content_streams;
|
||||||
fonts := find_composition_fonts pdf i obj marked @ !fonts)
|
fonts := find_composition_fonts pdf i obj marked @ !fonts)
|
||||||
pdf;
|
pdf;
|
||||||
let structure_info = find_composition_structure_info pdf marked in
|
let structure_info = find_composition_structure_info pdf marked in
|
||||||
(!images, !fonts, !content_streams, structure_info, !embedded_files)
|
(!images, !fonts, !content_streams, structure_info)
|
||||||
|
|
||||||
let size pdf i =
|
let size pdf i =
|
||||||
String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i))
|
String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i))
|
||||||
|
@ -123,21 +119,21 @@ let compressed_xref_table_size pdf =
|
||||||
|
|
||||||
let show_composition_json filesize pdf =
|
let show_composition_json filesize pdf =
|
||||||
let perc x = float_of_int x /. float_of_int filesize *. 100. in
|
let perc x = float_of_int x /. float_of_int filesize *. 100. in
|
||||||
let o_images, o_fonts, o_content_streams, o_structure_info, o_embedded_files = find_composition pdf in
|
let o_images, o_fonts, o_content_streams, o_structure_info = find_composition pdf in
|
||||||
let images, fonts, content_streams, structure_info, embedded_files, xref_table =
|
let images, fonts, content_streams, structure_info, attached_files, xref_table =
|
||||||
compressed_size pdf o_images,
|
compressed_size pdf o_images,
|
||||||
compressed_size pdf o_fonts,
|
compressed_size pdf o_fonts,
|
||||||
compressed_size pdf o_content_streams,
|
compressed_size pdf o_content_streams,
|
||||||
compressed_size pdf o_structure_info,
|
compressed_size pdf o_structure_info,
|
||||||
compressed_size pdf o_embedded_files,
|
Cpdfattach.size_attached_files pdf,
|
||||||
compressed_xref_table_size pdf
|
compressed_xref_table_size pdf
|
||||||
in
|
in
|
||||||
let r = images + fonts + content_streams + structure_info + embedded_files + xref_table in
|
let r = images + fonts + content_streams + structure_info + attached_files + xref_table in
|
||||||
`List [`Tuple [`String "Images"; `Int images; `Float (perc images)];
|
`List [`Tuple [`String "Images"; `Int images; `Float (perc images)];
|
||||||
`Tuple [`String "Fonts"; `Int fonts; `Float (perc fonts)];
|
`Tuple [`String "Fonts"; `Int fonts; `Float (perc fonts)];
|
||||||
`Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)];
|
`Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)];
|
||||||
`Tuple [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)];
|
`Tuple [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)];
|
||||||
`Tuple [`String "Embedded Files"; `Int embedded_files; `Float (perc embedded_files)];
|
`Tuple [`String "Attached Files"; `Int attached_files; `Float (perc attached_files)];
|
||||||
`Tuple [`String "XRef Table"; `Int xref_table; `Float (perc xref_table)];
|
`Tuple [`String "XRef Table"; `Int xref_table; `Float (perc xref_table)];
|
||||||
`Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]]
|
`Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]]
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
%Document -list-annotations[-json] now obey page range
|
%Document -list-annotations[-json] now obey page range
|
||||||
%Document round-tripping of annotations, supersede -copy-annotations.
|
%Document round-tripping of annotations, supersede -copy-annotations.
|
||||||
%Document -utf for JSON and mark -clean-strings as deprecated since can fail to round-trip binary strings which begin with a BOM?
|
%Document -utf for JSON and mark -clean-strings as deprecated since can fail to round-trip binary strings which begin with a BOM?
|
||||||
%Document -composition[-json]
|
%Document -composition[-json] - mention residue may be negative
|
||||||
%Document discourage GhostScript usage, since it can strip data (-gs-malformed, embed missing fonts)
|
%Document discourage GhostScript usage, since it can strip data (-gs-malformed, embed missing fonts)
|
||||||
%Document [ ] pagespecs
|
%Document [ ] pagespecs
|
||||||
%Document extensions to -info
|
%Document extensions to -info
|
||||||
|
|
Loading…
Reference in New Issue