mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-04-14 02:12:27 +02:00
Stamp remove struct tree / artifacts
This commit is contained in:
parent
b9d97b938d
commit
81d88bc57a
15
Makefile
15
Makefile
@ -1,15 +1,14 @@
|
||||
# Build the cpdf command line tools
|
||||
NONDOC = cpdfyojson cpdfxmlm
|
||||
|
||||
DOC = cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord \
|
||||
cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \
|
||||
DOC = cpdfutil cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime \
|
||||
cpdfcoord cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \
|
||||
cpdfbookmarks cpdfpage cpdftruetype cpdfremovetext cpdfextracttext \
|
||||
cpdfembed cpdffont cpdftype cpdfaddtext cpdfpad cpdfocg \
|
||||
cpdfsqueeze cpdfdraft cpdfspot cpdfpagelabels cpdfcreate cpdfannot \
|
||||
cpdfxobject cpdfimpose cpdfchop cpdftweak cpdfprinttree cpdfua cpdftexttopdf cpdftoc \
|
||||
cpdfjpeg cpdfjpeg2000 cpdfpng cpdfimage cpdfdraw \
|
||||
cpdfcomposition cpdfshape cpdfcolours cpdfdrawcontrol \
|
||||
cpdfcommand
|
||||
cpdfembed cpdffont cpdftype cpdfaddtext cpdfpad cpdfocg cpdfsqueeze \
|
||||
cpdfdraft cpdfspot cpdfpagelabels cpdfcreate cpdfannot cpdfxobject \
|
||||
cpdfimpose cpdfchop cpdftweak cpdfprinttree cpdfua cpdftexttopdf \
|
||||
cpdftoc cpdfjpeg cpdfjpeg2000 cpdfpng cpdfimage cpdfdraw \
|
||||
cpdfcomposition cpdfshape cpdfcolours cpdfdrawcontrol cpdfcommand
|
||||
|
||||
MODS = $(NONDOC) $(DOC)
|
||||
|
||||
|
@ -4679,11 +4679,11 @@ let go () =
|
||||
(map Pdfpagelabels.string_of_pagelabel (Pdfpagelabels.read pdf))
|
||||
| Some (RemoveDictEntry key) ->
|
||||
let pdf = get_single_pdf args.op true in
|
||||
Cpdftweak.remove_dict_entry pdf key args.dict_entry_search;
|
||||
Cpdfutil.remove_dict_entry pdf key args.dict_entry_search;
|
||||
write_pdf false pdf
|
||||
| Some (ReplaceDictEntry key) ->
|
||||
let pdf = get_single_pdf args.op true in
|
||||
Cpdftweak.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search;
|
||||
Cpdfutil.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search;
|
||||
write_pdf false pdf
|
||||
| Some (PrintDictEntry key) ->
|
||||
let pdf = get_single_pdf args.op true in
|
||||
@ -4897,7 +4897,7 @@ let go () =
|
||||
write_pdf false pdf
|
||||
| Some RemoveStructTree ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
let pdf = Cpdfua.remove_struct_tree pdf in
|
||||
let pdf = Cpdfpage.remove_struct_tree pdf in
|
||||
write_pdf false pdf
|
||||
| Some (SetLanguage s) ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
|
40
cpdfpage.ml
40
cpdfpage.ml
@ -803,7 +803,47 @@ let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isov
|
||||
Pdfpage.resources =
|
||||
Pdfpage.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources}
|
||||
|
||||
let remove_struct_tree pdf =
|
||||
Cpdfutil.remove_dict_entry pdf "/StructTreeRoot" None;
|
||||
Cpdfutil.remove_dict_entry pdf "/StructParent" None;
|
||||
Cpdfutil.remove_dict_entry pdf "/StructParents" None;
|
||||
let remove_struct_tree_ops pdf resources content =
|
||||
let operators = Pdfops.parse_operators pdf resources content in
|
||||
(* In fact, we remove all marked content regions. Acceptable in the circumstances. *)
|
||||
let remove_mcids =
|
||||
lose
|
||||
(function
|
||||
| Pdfops.Op_MP _
|
||||
| Pdfops.Op_DP _
|
||||
| Pdfops.Op_BMC _
|
||||
| Pdfops.Op_BDC _
|
||||
| Pdfops.Op_EMC -> true | _ -> false)
|
||||
in
|
||||
let operators' = remove_mcids operators in
|
||||
[Pdfops.stream_of_ops operators']
|
||||
in
|
||||
let remove_struct_tree_page _ page =
|
||||
let content' = remove_struct_tree_ops pdf page.Pdfpage.resources page.Pdfpage.content in
|
||||
Pdfpage.process_xobjects pdf page remove_struct_tree_ops;
|
||||
{page with Pdfpage.content = content'}
|
||||
in
|
||||
process_pages (Pdfpage.ppstub remove_struct_tree_page) pdf (ilist 1 (Pdfpage.endpage pdf))
|
||||
|
||||
let mark_all_as_artifact pdf =
|
||||
let mark_all_as_artifact_ops pdf resources content =
|
||||
let operators = Pdfops.parse_operators pdf resources content in
|
||||
let operators' = [Pdfops.Op_BMC "/Artifact"] @ operators @ [Pdfops.Op_EMC] in
|
||||
[Pdfops.stream_of_ops operators']
|
||||
in
|
||||
let remove_struct_tree_page _ page =
|
||||
let content' = mark_all_as_artifact_ops pdf page.Pdfpage.resources page.Pdfpage.content in
|
||||
Pdfpage.process_xobjects pdf page mark_all_as_artifact_ops;
|
||||
{page with Pdfpage.content = content'}
|
||||
in
|
||||
process_pages (Pdfpage.ppstub remove_struct_tree_page) pdf (ilist 1 (Pdfpage.endpage pdf))
|
||||
|
||||
let stamp ~process_struct_tree relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf =
|
||||
let over = if process_struct_tree then mark_all_as_artifact (remove_struct_tree over) else over in
|
||||
let prefix = Pdfpage.shortest_unused_prefix pdf in
|
||||
Pdfpage.add_prefix over prefix;
|
||||
let marks = Pdfmarks.read_bookmarks pdf in
|
||||
|
@ -134,3 +134,10 @@ val alluprightonly : int list -> Pdf.t -> bool
|
||||
val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t
|
||||
|
||||
val redact : process_struct_tree:bool -> Pdf.t -> int list -> Pdf.t
|
||||
|
||||
(** Remove a structure tree entirely from a file, including unmarking marked content. *)
|
||||
val remove_struct_tree : Pdf.t -> Pdf.t
|
||||
|
||||
(** Mark a PDF as begin entirely artifacts (may be used after running [remove_struct_tree]. *)
|
||||
val mark_all_as_artifact : Pdf.t -> Pdf.t
|
||||
|
||||
|
25
cpdftweak.ml
25
cpdftweak.ml
@ -238,31 +238,6 @@ let rec dict_entry_single_object f pdf = function
|
||||
| Pdf.Array a -> Pdf.recurse_array (dict_entry_single_object f pdf) a
|
||||
| x -> x
|
||||
|
||||
(* FIXME are we sure that functional values can never appear in the equality here? *)
|
||||
let remove_dict_entry pdf key search =
|
||||
let f d =
|
||||
match search with
|
||||
| None -> Pdf.remove_dict_entry d key
|
||||
| Some s ->
|
||||
match Pdf.lookup_direct pdf key d with
|
||||
| Some v when v = s -> Pdf.remove_dict_entry d key
|
||||
| _ -> d
|
||||
in
|
||||
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
||||
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
||||
|
||||
let replace_dict_entry pdf key value search =
|
||||
let f d =
|
||||
match search with
|
||||
| None -> begin try Pdf.replace_dict_entry d key value with _ -> d end
|
||||
| Some s ->
|
||||
match Pdf.lookup_direct pdf key d with
|
||||
| Some v when v = s -> Pdf.replace_dict_entry d key value
|
||||
| _ -> d
|
||||
in
|
||||
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
||||
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
||||
|
||||
let print_dict_entry ~utf8 pdf key =
|
||||
let f d =
|
||||
match Pdf.lookup_direct pdf key d with
|
||||
|
@ -15,12 +15,6 @@ val blackfills : Cpdfaddtext.colour -> int list -> Pdf.t -> Pdf.t
|
||||
(** Append page content. *)
|
||||
val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t
|
||||
|
||||
(** Remove a dictionary entry. *)
|
||||
val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit
|
||||
|
||||
(** Replace a dictionary entry. *)
|
||||
val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit
|
||||
|
||||
(** Print a dictionary entry. *)
|
||||
val print_dict_entry : utf8:bool -> Pdf.t -> string -> unit
|
||||
|
||||
|
26
cpdfua.ml
26
cpdfua.ml
@ -1850,29 +1850,3 @@ let create_pdfua2 title pagesize pages =
|
||||
let pdf = {pdf with Pdf.major = 2; Pdf.minor = 0} in
|
||||
mark2 2024 pdf;
|
||||
pdf
|
||||
|
||||
let remove_struct_tree pdf =
|
||||
Cpdftweak.remove_dict_entry pdf "/StructTreeRoot" None;
|
||||
Cpdftweak.remove_dict_entry pdf "/StructParent" None;
|
||||
Cpdftweak.remove_dict_entry pdf "/StructParents" None;
|
||||
let remove_struct_tree_ops pdf resources content =
|
||||
let operators = Pdfops.parse_operators pdf resources content in
|
||||
(* In fact, we remove all marked content regions. Acceptable in the circumstances. *)
|
||||
let remove_mcids =
|
||||
lose
|
||||
(function
|
||||
| Pdfops.Op_MP _
|
||||
| Pdfops.Op_DP _
|
||||
| Pdfops.Op_BMC _
|
||||
| Pdfops.Op_BDC _
|
||||
| Pdfops.Op_EMC -> true | _ -> false)
|
||||
in
|
||||
let operators' = remove_mcids operators in
|
||||
[Pdfops.stream_of_ops operators']
|
||||
in
|
||||
let remove_struct_tree_page _ page =
|
||||
let content' = remove_struct_tree_ops pdf page.Pdfpage.resources page.Pdfpage.content in
|
||||
Pdfpage.process_xobjects pdf page remove_struct_tree_ops;
|
||||
{page with Pdfpage.content = content'}
|
||||
in
|
||||
Cpdfpage.process_pages (Pdfpage.ppstub remove_struct_tree_page) pdf (ilist 1 (Pdfpage.endpage pdf))
|
||||
|
@ -31,9 +31,6 @@ val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t
|
||||
(** Reapply an edited JSON structure tree to its PDF. *)
|
||||
val replace_struct_tree : Pdf.t -> Cpdfyojson.Safe.t -> unit
|
||||
|
||||
(** Remove a structure tree entirely from a file, including unmarking marked content. *)
|
||||
val remove_struct_tree : Pdf.t -> Pdf.t
|
||||
|
||||
(** Make a blank PDF/UA-1 PDF given a title, paper size, and number of pages. *)
|
||||
val create_pdfua1 : string -> Pdfpaper.t -> int -> Pdf.t
|
||||
|
||||
|
32
cpdfutil.ml
Normal file
32
cpdfutil.ml
Normal file
@ -0,0 +1,32 @@
|
||||
let rec dict_entry_single_object f pdf = function
|
||||
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d)
|
||||
| (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) ->
|
||||
f (Pdf.Stream {contents = (Pdf.recurse_dict (dict_entry_single_object f pdf) dict, data)})
|
||||
| Pdf.Array a -> Pdf.recurse_array (dict_entry_single_object f pdf) a
|
||||
| x -> x
|
||||
|
||||
(* FIXME are we sure that functional values can never appear in the equality here? *)
|
||||
let remove_dict_entry pdf key search =
|
||||
let f d =
|
||||
match search with
|
||||
| None -> Pdf.remove_dict_entry d key
|
||||
| Some s ->
|
||||
match Pdf.lookup_direct pdf key d with
|
||||
| Some v when v = s -> Pdf.remove_dict_entry d key
|
||||
| _ -> d
|
||||
in
|
||||
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
||||
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
||||
|
||||
let replace_dict_entry pdf key value search =
|
||||
let f d =
|
||||
match search with
|
||||
| None -> begin try Pdf.replace_dict_entry d key value with _ -> d end
|
||||
| Some s ->
|
||||
match Pdf.lookup_direct pdf key d with
|
||||
| Some v when v = s -> Pdf.replace_dict_entry d key value
|
||||
| _ -> d
|
||||
in
|
||||
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
|
||||
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
|
||||
|
6
cpdfutil.mli
Normal file
6
cpdfutil.mli
Normal file
@ -0,0 +1,6 @@
|
||||
(** Remove a dictionary entry. *)
|
||||
val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit
|
||||
|
||||
(** Replace a dictionary entry. *)
|
||||
val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit
|
||||
|
Loading…
x
Reference in New Issue
Block a user