From 9a39c641a7be3b53d41aad3b01f7ba745542b176 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Tue, 4 Jun 2024 15:29:29 +0100 Subject: [PATCH] Remove nonstandard JSON tuples, replacement scaffolding --- cpdfcommand.ml | 7 +++++-- cpdfcomposition.ml | 16 ++++++++-------- cpdfua.ml | 10 ++++++++-- cpdfua.mli | 2 ++ 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 43eac95..62951cd 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -4473,8 +4473,11 @@ let go () = let pdf = get_single_pdf args.op true in let json = Cpdfua.extract_struct_tree pdf in json_to_output json args.out - | Some (ReplaceStructTree f) -> - () + | Some (ReplaceStructTree s) -> + let pdf = get_single_pdf args.op false in + let json = Cpdfyojson.Safe.from_file s in + Cpdfua.replace_struct_tree pdf json; + write_pdf false pdf (* Advise the user if a combination of command line flags makes little sense, or error out if it make no sense at all. *) diff --git a/cpdfcomposition.ml b/cpdfcomposition.ml index 7f1235d..6aa2342 100644 --- a/cpdfcomposition.ml +++ b/cpdfcomposition.ml @@ -129,13 +129,13 @@ let show_composition_json filesize pdf = compressed_xref_table_size pdf in let r = images + fonts + content_streams + structure_info + attached_files + xref_table in - `List [`Tuple [`String "Images"; `Int images; `Float (perc images)]; - `Tuple [`String "Fonts"; `Int fonts; `Float (perc fonts)]; - `Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)]; - `Tuple [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)]; - `Tuple [`String "Attached Files"; `Int attached_files; `Float (perc attached_files)]; - `Tuple [`String "XRef Table"; `Int xref_table; `Float (perc xref_table)]; - `Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]] + `List [`List [`String "Images"; `Int images; `Float (perc images)]; + `List [`String "Fonts"; `Int fonts; `Float (perc fonts)]; + `List [`String "Content streams"; `Int content_streams; `Float (perc content_streams)]; + `List [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)]; + `List [`String "Attached Files"; `Int attached_files; `Float (perc attached_files)]; + `List [`String "XRef Table"; `Int xref_table; `Float (perc xref_table)]; + `List [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]] let show_composition_json_blob filesize pdf = Pdfio.bytes_of_string (Cpdfyojson.Safe.pretty_to_string (show_composition_json filesize pdf)) @@ -148,7 +148,7 @@ let show_composition filesize json pdf = | `List js -> iter (function - | `Tuple [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.2f%%)\n" a b c + | `List [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.2f%%)\n" a b c | _ -> ()) js | _ -> () diff --git a/cpdfua.ml b/cpdfua.ml index eadaa57..28f1c94 100644 --- a/cpdfua.ml +++ b/cpdfua.ml @@ -339,7 +339,7 @@ let extract_struct_tree pdf = | Some x -> let objs = Pdf.objects_referenced ["/Pg"; "/Obj"; "/Stm"; "/StmOwn"] [] pdf x in let zero = - `Tuple [`Int 0; + `List [`Int 0; `Assoc [("/CPDFJSONformatversion", `Int 1); ("/CPDFJSONpageobjnumbers", `List (map (fun x -> `Int (unopt (Pdfpage.page_object_number pdf x))) (ilist 1 (Pdfpage.endpage pdf))))]] in @@ -349,7 +349,13 @@ let extract_struct_tree pdf = let jsonobj = Cpdfjson.json_of_object ~utf8:true ~no_stream_data:false ~parse_content:false pdf (function _ -> ()) (Pdf.lookup_obj pdf objnum) in - `Tuple [`Int objnum; jsonobj]) + `List [`Int objnum; jsonobj]) objs) end | _ -> error "extract_struct_tree: no root" + +(* Use JSON data to replace objects in a file. Negative objects are new ones, + we make them positive and renumber them not to clash. Everything else must + remain unrenumbered. *) +let replace_struct_tree pdf json = + () diff --git a/cpdfua.mli b/cpdfua.mli index b653cb0..a5c433c 100644 --- a/cpdfua.mli +++ b/cpdfua.mli @@ -6,3 +6,5 @@ val test_matterhorn_json : Pdf.t -> Cpdfyojson.Safe.t val mark : Pdf.t -> unit val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t + +val replace_struct_tree : Pdf.t -> Cpdfyojson.Safe.t -> unit