mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	more
This commit is contained in:
		| @@ -302,3 +302,48 @@ let dump_attached_files pdf out = | ||||
|     iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf) | ||||
|   with | ||||
|     e -> error (Printf.sprintf "Couldn't dump attached files: %s\n" (Printexc.to_string e)) | ||||
|  | ||||
| let size_attachment pdf (_, embeddedfile) = | ||||
|   match Pdf.lookup_direct pdf "/F" embeddedfile with | ||||
|   | Some (Pdf.String s) -> | ||||
|       begin match Pdf.lookup_direct pdf "/EF" embeddedfile with | ||||
|       | Some d -> | ||||
|           let stream = | ||||
|             match Pdf.lookup_direct pdf "/F" d with | ||||
|             | Some s -> s | ||||
|             | None -> error "Bad embedded file stream" | ||||
|           in | ||||
|             begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> bytes_size b | _ -> error "Bad embedded file stream" end | ||||
|       | _ -> error "Bad embedded file stream" | ||||
|       end | ||||
|   | _ -> 0 | ||||
|  | ||||
| let size_page_files pdf page = | ||||
|   let annots = | ||||
|     match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with | ||||
|     | Some (Pdf.Array l) -> l | ||||
|     | _ -> [] | ||||
|   in | ||||
|     let efannots = | ||||
|       keep | ||||
|         (fun annot -> | ||||
|            match Pdf.lookup_direct pdf "/Subtype" annot with | ||||
|            | Some (Pdf.Name "/FileAttachment") -> true | ||||
|            | _ -> false) | ||||
|         annots | ||||
|     in | ||||
|       let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in | ||||
|         map (size_attachment pdf) (map (fun x -> 0, x) fsannots) | ||||
|  | ||||
| let size_document_files pdf = | ||||
|   let root = Pdf.lookup_obj pdf pdf.Pdf.root in | ||||
|     let names = | ||||
|       match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary [] | ||||
|     in | ||||
|       match Pdf.lookup_direct pdf "/EmbeddedFiles" names with | ||||
|       | Some x -> | ||||
|           sum (map (size_attachment pdf) (Pdf.contents_of_nametree pdf x)) | ||||
|       | None -> 0  | ||||
|  | ||||
| let size_attached_files pdf = | ||||
|   size_document_files pdf + sum (flatten (map (size_page_files pdf) (Pdfpage.pages_of_pagetree pdf))) | ||||
|   | ||||
| @@ -22,3 +22,6 @@ val list_attached_files : Pdf.t -> attachment list | ||||
|  | ||||
| (** Dump attached files to a given directory. *) | ||||
| val dump_attached_files : Pdf.t -> string -> unit | ||||
|  | ||||
| (** Total size in bytes of all attached files. *) | ||||
| val size_attached_files : Pdf.t -> int | ||||
|   | ||||
| @@ -73,14 +73,11 @@ let find_composition_content_streams pdf i obj marked = | ||||
|           [i] | ||||
|       | _ -> [] | ||||
|  | ||||
| let find_composition_embedded_files pdf i obj marked = [] | ||||
|  | ||||
| let find_composition pdf = | ||||
|   let marked = null_hash () in | ||||
|   let images = ref [] in | ||||
|   let fonts = ref [] in | ||||
|   let content_streams = ref [] in | ||||
|   let embedded_files = ref [] in | ||||
|     Pdf.objiter | ||||
|       (fun i obj -> | ||||
|         (*Printf.printf "Looking at object %i\n" i; | ||||
| @@ -89,13 +86,12 @@ let find_composition pdf = | ||||
|         Hashtbl.iter (fun k () -> Printf.printf "%i " k) marked; | ||||
|         Printf.printf "\n";*) | ||||
|          match Hashtbl.find marked i with _ -> () | exception Not_found -> | ||||
|            embedded_files := find_composition_embedded_files pdf i obj marked @ !embedded_files; | ||||
|            images := find_composition_images pdf i obj marked @ !images; | ||||
|            content_streams := find_composition_content_streams pdf i obj marked @ !content_streams; | ||||
|            fonts := find_composition_fonts pdf i obj marked @ !fonts) | ||||
|       pdf; | ||||
|     let structure_info = find_composition_structure_info pdf marked in | ||||
|     (!images, !fonts, !content_streams, structure_info, !embedded_files) | ||||
|     (!images, !fonts, !content_streams, structure_info) | ||||
|  | ||||
| let size pdf i = | ||||
|   String.length (Pdfwrite.string_of_pdf_including_data (Pdf.lookup_obj pdf i)) | ||||
| @@ -123,21 +119,21 @@ let compressed_xref_table_size pdf = | ||||
|  | ||||
| let show_composition_json filesize pdf = | ||||
|   let perc x = float_of_int x /. float_of_int filesize *. 100. in | ||||
|   let o_images, o_fonts, o_content_streams, o_structure_info, o_embedded_files = find_composition pdf in | ||||
|   let images, fonts, content_streams, structure_info, embedded_files, xref_table = | ||||
|   let o_images, o_fonts, o_content_streams, o_structure_info = find_composition pdf in | ||||
|   let images, fonts, content_streams, structure_info, attached_files, xref_table = | ||||
|       compressed_size pdf o_images, | ||||
|       compressed_size pdf o_fonts, | ||||
|       compressed_size pdf o_content_streams, | ||||
|       compressed_size pdf o_structure_info, | ||||
|       compressed_size pdf o_embedded_files, | ||||
|       Cpdfattach.size_attached_files pdf, | ||||
|       compressed_xref_table_size pdf | ||||
|   in | ||||
|   let r = images + fonts + content_streams + structure_info + embedded_files + xref_table in | ||||
|   let r = images + fonts + content_streams + structure_info + attached_files + xref_table in | ||||
|     `List [`Tuple [`String "Images"; `Int images; `Float (perc images)]; | ||||
|            `Tuple [`String "Fonts"; `Int fonts; `Float (perc fonts)]; | ||||
|            `Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)]; | ||||
|            `Tuple [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)]; | ||||
|            `Tuple [`String "Embedded Files"; `Int embedded_files; `Float (perc embedded_files)]; | ||||
|            `Tuple [`String "Attached Files"; `Int attached_files; `Float (perc attached_files)]; | ||||
|            `Tuple [`String "XRef Table"; `Int xref_table; `Float (perc xref_table)]; | ||||
|            `Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]] | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,7 @@ | ||||
| %Document -list-annotations[-json] now obey page range | ||||
| %Document round-tripping of annotations, supersede -copy-annotations. | ||||
| %Document -utf for JSON and mark -clean-strings as deprecated since can fail to round-trip binary strings which begin with a BOM? | ||||
| %Document -composition[-json] | ||||
| %Document -composition[-json] - mention residue may be negative | ||||
| %Document discourage GhostScript usage, since it can strip data (-gs-malformed, embed missing fonts) | ||||
| %Document [ ] pagespecs | ||||
| %Document extensions to -info | ||||
|   | ||||
		Reference in New Issue
	
	Block a user