Remove nonstandard JSON tuples, replacement scaffolding

This commit is contained in:
John Whitington 2024-06-04 15:29:29 +01:00
parent cab4dea2ba
commit 9a39c641a7
4 changed files with 23 additions and 12 deletions

View File

@ -4473,8 +4473,11 @@ let go () =
let pdf = get_single_pdf args.op true in let pdf = get_single_pdf args.op true in
let json = Cpdfua.extract_struct_tree pdf in let json = Cpdfua.extract_struct_tree pdf in
json_to_output json args.out json_to_output json args.out
| Some (ReplaceStructTree f) -> | Some (ReplaceStructTree s) ->
() let pdf = get_single_pdf args.op false in
let json = Cpdfyojson.Safe.from_file s in
Cpdfua.replace_struct_tree pdf json;
write_pdf false pdf
(* Advise the user if a combination of command line flags makes little sense, (* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all. *) or error out if it make no sense at all. *)

View File

@ -129,13 +129,13 @@ let show_composition_json filesize pdf =
compressed_xref_table_size pdf compressed_xref_table_size pdf
in in
let r = images + fonts + content_streams + structure_info + attached_files + xref_table in let r = images + fonts + content_streams + structure_info + attached_files + xref_table in
`List [`Tuple [`String "Images"; `Int images; `Float (perc images)]; `List [`List [`String "Images"; `Int images; `Float (perc images)];
`Tuple [`String "Fonts"; `Int fonts; `Float (perc fonts)]; `List [`String "Fonts"; `Int fonts; `Float (perc fonts)];
`Tuple [`String "Content streams"; `Int content_streams; `Float (perc content_streams)]; `List [`String "Content streams"; `Int content_streams; `Float (perc content_streams)];
`Tuple [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)]; `List [`String "Structure Info"; `Int structure_info; `Float (perc structure_info)];
`Tuple [`String "Attached Files"; `Int attached_files; `Float (perc attached_files)]; `List [`String "Attached Files"; `Int attached_files; `Float (perc attached_files)];
`Tuple [`String "XRef Table"; `Int xref_table; `Float (perc xref_table)]; `List [`String "XRef Table"; `Int xref_table; `Float (perc xref_table)];
`Tuple [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]] `List [`String "Unclassified"; `Int (filesize - r); `Float (perc (filesize - r))]]
let show_composition_json_blob filesize pdf = let show_composition_json_blob filesize pdf =
Pdfio.bytes_of_string (Cpdfyojson.Safe.pretty_to_string (show_composition_json filesize pdf)) Pdfio.bytes_of_string (Cpdfyojson.Safe.pretty_to_string (show_composition_json filesize pdf))
@ -148,7 +148,7 @@ let show_composition filesize json pdf =
| `List js -> | `List js ->
iter iter
(function (function
| `Tuple [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.2f%%)\n" a b c | `List [`String a; `Int b; `Float c] -> Printf.printf "%s: %i bytes (%.2f%%)\n" a b c
| _ -> ()) | _ -> ())
js js
| _ -> () | _ -> ()

View File

@ -339,7 +339,7 @@ let extract_struct_tree pdf =
| Some x -> | Some x ->
let objs = Pdf.objects_referenced ["/Pg"; "/Obj"; "/Stm"; "/StmOwn"] [] pdf x in let objs = Pdf.objects_referenced ["/Pg"; "/Obj"; "/Stm"; "/StmOwn"] [] pdf x in
let zero = let zero =
`Tuple [`Int 0; `List [`Int 0;
`Assoc [("/CPDFJSONformatversion", `Int 1); `Assoc [("/CPDFJSONformatversion", `Int 1);
("/CPDFJSONpageobjnumbers", `List (map (fun x -> `Int (unopt (Pdfpage.page_object_number pdf x))) (ilist 1 (Pdfpage.endpage pdf))))]] ("/CPDFJSONpageobjnumbers", `List (map (fun x -> `Int (unopt (Pdfpage.page_object_number pdf x))) (ilist 1 (Pdfpage.endpage pdf))))]]
in in
@ -349,7 +349,13 @@ let extract_struct_tree pdf =
let jsonobj = let jsonobj =
Cpdfjson.json_of_object ~utf8:true ~no_stream_data:false ~parse_content:false pdf (function _ -> ()) (Pdf.lookup_obj pdf objnum) Cpdfjson.json_of_object ~utf8:true ~no_stream_data:false ~parse_content:false pdf (function _ -> ()) (Pdf.lookup_obj pdf objnum)
in in
`Tuple [`Int objnum; jsonobj]) `List [`Int objnum; jsonobj])
objs) objs)
end end
| _ -> error "extract_struct_tree: no root" | _ -> error "extract_struct_tree: no root"
(* Use JSON data to replace objects in a file. Negative objects are new ones,
we make them positive and renumber them not to clash. Everything else must
remain unrenumbered. *)
let replace_struct_tree pdf json =
()

View File

@ -6,3 +6,5 @@ val test_matterhorn_json : Pdf.t -> Cpdfyojson.Safe.t
val mark : Pdf.t -> unit val mark : Pdf.t -> unit
val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t
val replace_struct_tree : Pdf.t -> Cpdfyojson.Safe.t -> unit