more
This commit is contained in:
parent
e54868a64b
commit
cc74b3fc69
|
@ -302,3 +302,48 @@ let dump_attached_files pdf out =
|
|||
iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf)
|
||||
with
|
||||
e -> error (Printf.sprintf "Couldn't dump attached files: %s\n" (Printexc.to_string e))
|
||||
|
||||
let size_attachment pdf (_, embeddedfile) =
|
||||
match Pdf.lookup_direct pdf "/F" embeddedfile with
|
||||
| Some (Pdf.String s) ->
|
||||
begin match Pdf.lookup_direct pdf "/EF" embeddedfile with
|
||||
| Some d ->
|
||||
let stream =
|
||||
match Pdf.lookup_direct pdf "/F" d with
|
||||
| Some s -> s
|
||||
| None -> error "Bad embedded file stream"
|
||||
in
|
||||
begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> bytes_size b | _ -> error "Bad embedded file stream" end
|
||||
| _ -> error "Bad embedded file stream"
|
||||
end
|
||||
| _ -> 0
|
||||
|
||||
let size_page_files pdf page =
|
||||
let annots =
|
||||
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
||||
| Some (Pdf.Array l) -> l
|
||||
| _ -> []
|
||||
in
|
||||
let efannots =
|
||||
keep
|
||||
(fun annot ->
|
||||
match Pdf.lookup_direct pdf "/Subtype" annot with
|
||||
| Some (Pdf.Name "/FileAttachment") -> true
|
||||
| _ -> false)
|
||||
annots
|
||||
in
|
||||
let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in
|
||||
map (size_attachment pdf) (map (fun x -> 0, x) fsannots)
|
||||
|
||||
let size_document_files pdf =
|
||||
let root = Pdf.lookup_obj pdf pdf.Pdf.root in
|
||||
let names =
|
||||
match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary []
|
||||
in
|
||||
match Pdf.lookup_direct pdf "/EmbeddedFiles" names with
|
||||
| Some x ->
|
||||
sum (map (size_attachment pdf) (Pdf.contents_of_nametree pdf x))
|
||||
| None -> 0
|
||||
|
||||
let size_attached_files pdf =
|
||||
size_document_files pdf + sum (flatten (map (size_page_files pdf) (Pdfpage.pages_of_pagetree pdf)))
|
||||
|
|
|
@ -22,3 +22,6 @@ val list_attached_files : Pdf.t -> attachment list
|
|||
|
||||
(** Dump attached files to a given directory. *)
|
||||
val dump_attached_files : Pdf.t -> string -> unit
|
||||
|
||||
(** Total size in bytes of all attached files. *)
|
||||
val size_attached_files : Pdf.t -> int
|
||||
|
|
|
@ -73,14 +73,11 @@ let find_composition_content_streams pdf i obj marked =
|
|||
[i]
|
||||
| _ -> []
|
||||
|
||||
let find_composition_embedded_files pdf i obj marked = []
|
||||
|
||||
let find_composition pdf =
|
||||
let marked = null_hash () in
|
||||
let images = ref [] in
|
||||
let fonts = ref [] in
|
||||
let content_streams = ref [] in
|
||||
let embedded_files = ref [] in
|
||||
Pdf.objiter
|
||||
(fun i obj ->
|
||||
(*Printf.printf "Looking at object %i\n" i;
|
||||
|
@ -89,13 +86,12 @@ let find_composition pdf =
|
|||
Hashtbl.iter (fun k () -> Printf.printf "%i " k) marked;
|
||||
Printf.printf "\n";*)
|
||||
match Hashtbl.find marked i with _ -> () | exception Not_found ->
|
||||
embedded_files := find_composition_embedded_files pdf i obj marked @ !embedded_files;
|
||||
images := find_composition_images pdf i obj marked @ !images;
|
||||
content_streams := find_composition_content_streams pdf i obj marked @ !content_streams;
|
||||
fonts := find_composition_fonts pdf i obj marked @ !fonts)
|
||||
pdf;
|
||||
let structure_info = find_composition_structure_info pdf marked in
|
||||
(!images, !fonts, !content_streams, structure_info, !embedded_files)
|
||||
(!images, !fonts, !content_streams, structure_info)
|
||||
|
||||
let size pdf i =
|
||||
String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i))
|
||||
|
@ -123,21 +119,21 @@ let compressed_xref_table_size pdf =
|
|||
|
||||
let show_composition_json filesize pdf =
|
||||
let perc x = float_of_int x /. float_of_int filesize *. 100. in
|
||||
let o_images, o_fonts, o_content_streams, o_structure_info, o_embedded_files = find_composition pdf in
|
||||
let images, fonts, content_streams, structure_info, embedded_files, xref_table =
|
||||
let o_images, o_fonts, o_content_streams, o_structure_info = find_composition pdf in
|
||||
let images, fonts, content_streams, structure_info, attached_files, xref_table =
|
||||
compressed_size pdf o_images,
|
||||
compressed_size pdf o_fonts,
|
||||
compressed_size pdf o_content_streams,
|
||||
compressed_size pdf o_structure_info,
|
||||
compressed_size pdf o_embedded_files,
|
||||
Cpdfattach.size_attached_files pdf,
|
||||
compressed_xref_table_size pdf
|
||||
in
|
||||
let r = images + fonts + content_streams + structure_info + embedded_files + xref_table in
|
||||
let r = images + fonts + content_streams + structure_info + attached_files + xref_table in
|
||||
`List [`Tuple [`String "Images"; `Int images; `Float (perc images)];
|
||||
`Tuple [`String "Fonts"; `Int fonts; `Float (perc fonts)];
|
||||
`Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)];
|
||||
`Tuple [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)];
|
||||
`Tuple [`String "Embedded Files"; `Int embedded_files; `Float (perc embedded_files)];
|
||||
`Tuple [`String "Attached Files"; `Int attached_files; `Float (perc attached_files)];
|
||||
`Tuple [`String "XRef Table"; `Int xref_table; `Float (perc xref_table)];
|
||||
`Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]]
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
%Document -list-annotations[-json] now obey page range
|
||||
%Document round-tripping of annotations, supersede -copy-annotations.
|
||||
%Document -utf for JSON and mark -clean-strings as deprecated since can fail to round-trip binary strings which begin with a BOM?
|
||||
%Document -composition[-json]
|
||||
%Document -composition[-json] - mention residue may be negative
|
||||
%Document discourage GhostScript usage, since it can strip data (-gs-malformed, embed missing fonts)
|
||||
%Document [ ] pagespecs
|
||||
%Document extensions to -info
|
||||
|
|
Loading…
Reference in New Issue