Implements -remove-struct-tree

This commit is contained in:
John Whitington
2025-03-10 14:50:26 +00:00
parent 38e7573350
commit f541afedde
5 changed files with 31 additions and 7 deletions

View File

@ -3032,6 +3032,7 @@ let specs =
("-print-struct-tree", Arg.Unit (fun () -> setop PrintStructTree ()), " Print structure tree");
("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), " Extract structure tree in JSON format");
("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), " Replace structure tree from JSON");
("-remove-struct-tree", Arg.Unit (fun () -> setop RemoveStructTree ()), " Remove entire structure tree");
("-redact", Arg.Unit (fun () -> setop Redact ()), " Redact entire pages");
("-rasterize", Arg.Unit (fun () -> setop Rasterize ()), " Rasterize pages");
("-rasterize-gray", Arg.Unit (fun () -> args.rast_device <- "pnggray"), " Rasterize in grayscale");
@ -4895,7 +4896,7 @@ let go () =
write_pdf false pdf
| Some RemoveStructTree ->
let pdf = get_single_pdf args.op false in
Cpdfua.remove_struct_tree pdf;
let pdf = Cpdfua.remove_struct_tree pdf in
write_pdf false pdf
| Some (SetLanguage s) ->
let pdf = get_single_pdf args.op false in

View File

@ -92,7 +92,7 @@ let blacktext c range pdf =
in
Cpdfpage.process_pages (Pdfpage.ppstub blacktext_page) pdf range
(* Blacken lines *)
(* Blacken lines. FIXME Why doesn't this do xobjects like the other two? *)
let blacklines_ops c pdf resources content =
let rec blacken_strokeops prev = function
| [] -> rev prev

View File

@ -1851,4 +1851,28 @@ let create_pdfua2 title pagesize pages =
mark2 2024 pdf;
pdf
let remove_struct_tree pdf = ()
let remove_struct_tree pdf =
Cpdftweak.remove_dict_entry pdf "/StructTreeRoot" None;
Cpdftweak.remove_dict_entry pdf "/StructParent" None;
Cpdftweak.remove_dict_entry pdf "/StructParents" None;
let remove_struct_tree_ops pdf resources content =
let operators = Pdfops.parse_operators pdf resources content in
(* In fact, we remove all marked content regions. Acceptable in the circumstances. *)
let remove_mcids =
lose
(function
| Pdfops.Op_MP _
| Pdfops.Op_DP _
| Pdfops.Op_BMC _
| Pdfops.Op_BDC _
| Pdfops.Op_EMC -> true | _ -> false)
in
let operators' = remove_mcids operators in
[Pdfops.stream_of_ops operators']
in
let remove_struct_tree_page _ page =
let content' = remove_struct_tree_ops pdf page.Pdfpage.resources page.Pdfpage.content in
Pdfpage.process_xobjects pdf page remove_struct_tree_ops;
{page with Pdfpage.content = content'}
in
Cpdfpage.process_pages (Pdfpage.ppstub remove_struct_tree_page) pdf (ilist 1 (Pdfpage.endpage pdf))

View File

@ -32,7 +32,7 @@ val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t
val replace_struct_tree : Pdf.t -> Cpdfyojson.Safe.t -> unit
(** Remove a structure tree entirely from a file, including unmarking marked content. *)
val remove_struct_tree : Pdf.t -> unit
val remove_struct_tree : Pdf.t -> Pdf.t
(** Make a blank PDF/UA-1 PDF given a title, paper size, and number of pages. *)
val create_pdfua1 : string -> Pdfpaper.t -> int -> Pdf.t

View File

@ -85,4 +85,3 @@ let stamp_as_xobject pdf range over =
let pdf = Pdfmarks.add_bookmarks new_marks changed in
let name = "/" ^ Pdfpage.shortest_unused_prefix pdf ^ "CPDFXObj" in
(add_page_as_xobject pdf range over_page name, name)