open Pdfutil let removetext range pdf = (* Could fail on nesting, or other marked content inside our marked content.*) let rec remove_until_last_EMC level = function | [] -> [] | Pdfops.Op_BMC "/CPDFSTAMP"::more -> remove_until_last_EMC (level + 1) more | Pdfops.Op_EMC::more -> if level = 1 then more else remove_until_last_EMC (level - 1) more | _::more -> remove_until_last_EMC level more in let rec remove_stamps prev = function | [] -> rev prev | Pdfops.Op_BMC "/CPDFSTAMP"::more -> let rest = remove_until_last_EMC 1 more in remove_stamps prev rest | h::t -> remove_stamps (h::prev) t in let removetext_page _ page = {page with Pdfpage.content = let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in [Pdfops.stream_of_ops (remove_stamps [] ops)]} in Cpdfpage.process_pages (Pdfpage.ppstub removetext_page) pdf range let remove_all_text_ops pdf resources content = let is_textop = function Pdfops.Op_Tj _ | Pdfops.Op_' _ | Pdfops.Op_'' _ | Pdfops.Op_TJ _ -> true | _ -> false in let content' = let ops = Pdfops.parse_operators pdf resources content in Pdfops.stream_of_ops (option_map (function x -> if is_textop x then None else Some x) ops) in [content'] let remove_all_text_page pdf p = let resources = p.Pdfpage.resources in let content = p.Pdfpage.content in Pdfpage.process_xobjects pdf p remove_all_text_ops; {p with Pdfpage.content = remove_all_text_ops pdf resources content}, pdf let remove_all_text range pdf = let pages = Pdfpage.pages_of_pagetree pdf in let pagenums = indx pages in let pdf = ref pdf in let pages' = ref [] in iter2 (fun p pagenum -> let p', pdf' = if mem pagenum range then remove_all_text_page !pdf p else p, !pdf in pdf := pdf'; pages' =| p') pages pagenums; Pdfpage.change_pages true !pdf (rev !pages')