diff --git a/cpdfcommand.ml b/cpdfcommand.ml index ab32cb3..bc3ab02 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3032,6 +3032,7 @@ let specs = ("-print-struct-tree", Arg.Unit (fun () -> setop PrintStructTree ()), " Print structure tree"); ("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), " Extract structure tree in JSON format"); ("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), " Replace structure tree from JSON"); + ("-remove-struct-tree", Arg.Unit (fun () -> setop RemoveStructTree ()), " Remove entire structure tree"); ("-redact", Arg.Unit (fun () -> setop Redact ()), " Redact entire pages"); ("-rasterize", Arg.Unit (fun () -> setop Rasterize ()), " Rasterize pages"); ("-rasterize-gray", Arg.Unit (fun () -> args.rast_device <- "pnggray"), " Rasterize in grayscale"); @@ -4895,7 +4896,7 @@ let go () = write_pdf false pdf | Some RemoveStructTree -> let pdf = get_single_pdf args.op false in - Cpdfua.remove_struct_tree pdf; + let pdf = Cpdfua.remove_struct_tree pdf in write_pdf false pdf | Some (SetLanguage s) -> let pdf = get_single_pdf args.op false in diff --git a/cpdftweak.ml b/cpdftweak.ml index 4e67b77..96da311 100644 --- a/cpdftweak.ml +++ b/cpdftweak.ml @@ -92,7 +92,7 @@ let blacktext c range pdf = in Cpdfpage.process_pages (Pdfpage.ppstub blacktext_page) pdf range -(* Blacken lines *) +(* Blacken lines. FIXME Why doesn't this do xobjects like the other two? *) let blacklines_ops c pdf resources content = let rec blacken_strokeops prev = function | [] -> rev prev diff --git a/cpdfua.ml b/cpdfua.ml index e296d7c..56efe78 100644 --- a/cpdfua.ml +++ b/cpdfua.ml @@ -1835,7 +1835,7 @@ let create_pdfua1 title pagesize pages = let pdf = Cpdfmetadata.set_viewer_preference ("/DisplayDocTitle", Pdf.Boolean true, 0) pdf in Pdf.replace_chain pdf ["/Root"; "/MarkInfo"; "/Marked"] (Pdf.Boolean true); Pdf.replace_chain pdf ["/Root"; "/StructTreeRoot"; "/Type"] (Pdf.Name "/StructTreeRoot"); - let pdf = {pdf with Pdf.major = 1; Pdf.minor = 7} in + let pdf = {pdf with Pdf.major = 1; Pdf.minor = 7} in mark pdf; pdf @@ -1847,8 +1847,32 @@ let create_pdfua2 title pagesize pages = let pdf = Cpdfmetadata.set_viewer_preference ("/DisplayDocTitle", Pdf.Boolean true, 0) pdf in Pdf.replace_chain pdf ["/Root"; "/MarkInfo"; "/Marked"] (Pdf.Boolean true); Pdf.replace_chain pdf ["/Root"; "/StructTreeRoot"; "/Type"] (Pdf.Name "/StructTreeRoot"); - let pdf = {pdf with Pdf.major = 2; Pdf.minor = 0} in + let pdf = {pdf with Pdf.major = 2; Pdf.minor = 0} in mark2 2024 pdf; pdf -let remove_struct_tree pdf = () +let remove_struct_tree pdf = + Cpdftweak.remove_dict_entry pdf "/StructTreeRoot" None; + Cpdftweak.remove_dict_entry pdf "/StructParent" None; + Cpdftweak.remove_dict_entry pdf "/StructParents" None; + let remove_struct_tree_ops pdf resources content = + let operators = Pdfops.parse_operators pdf resources content in + (* In fact, we remove all marked content regions. Acceptable in the circumstances. *) + let remove_mcids = + lose + (function + | Pdfops.Op_MP _ + | Pdfops.Op_DP _ + | Pdfops.Op_BMC _ + | Pdfops.Op_BDC _ + | Pdfops.Op_EMC -> true | _ -> false) + in + let operators' = remove_mcids operators in + [Pdfops.stream_of_ops operators'] + in + let remove_struct_tree_page _ page = + let content' = remove_struct_tree_ops pdf page.Pdfpage.resources page.Pdfpage.content in + Pdfpage.process_xobjects pdf page remove_struct_tree_ops; + {page with Pdfpage.content = content'} + in + Cpdfpage.process_pages (Pdfpage.ppstub remove_struct_tree_page) pdf (ilist 1 (Pdfpage.endpage pdf)) diff --git a/cpdfua.mli b/cpdfua.mli index 8d1d1b5..d73f096 100644 --- a/cpdfua.mli +++ b/cpdfua.mli @@ -32,7 +32,7 @@ val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t val replace_struct_tree : Pdf.t -> Cpdfyojson.Safe.t -> unit (** Remove a structure tree entirely from a file, including unmarking marked content. *) -val remove_struct_tree : Pdf.t -> unit +val remove_struct_tree : Pdf.t -> Pdf.t (** Make a blank PDF/UA-1 PDF given a title, paper size, and number of pages. *) val create_pdfua1 : string -> Pdfpaper.t -> int -> Pdf.t diff --git a/cpdfxobject.ml b/cpdfxobject.ml index 3108caa..a81de19 100644 --- a/cpdfxobject.ml +++ b/cpdfxobject.ml @@ -85,4 +85,3 @@ let stamp_as_xobject pdf range over = let pdf = Pdfmarks.add_bookmarks new_marks changed in let name = "/" ^ Pdfpage.shortest_unused_prefix pdf ^ "CPDFXObj" in (add_page_as_xobject pdf range over_page name, name) -