164 lines
5.6 KiB
OCaml
164 lines
5.6 KiB
OCaml
open Pdfutil
|
|
open Pdfio
|
|
|
|
(* Making draft documents *)
|
|
|
|
(* Predicate on an xobject: true if an image xobject. *)
|
|
let isimage pdf (_, xobj) =
|
|
match Pdf.lookup_direct pdf "/Subtype" xobj with
|
|
| Some (Pdf.Name "/Image") -> true
|
|
| _ -> false
|
|
|
|
(* Given a set of resources for a page, and the name of a resource, determine if
|
|
that name refers to an image xobject. *)
|
|
let xobject_isimage pdf resources name =
|
|
match resources with
|
|
| Pdf.Dictionary _ ->
|
|
begin match Pdf.lookup_direct pdf "/XObject" resources with
|
|
| Some xobjects ->
|
|
isimage pdf ("", Pdf.lookup_fail "xobject not there" pdf name xobjects)
|
|
| _ -> false
|
|
end
|
|
| _ -> failwith "bad resources"
|
|
|
|
(* The subsitute for an image. *)
|
|
let substitute boxes =
|
|
if boxes then
|
|
rev
|
|
[Pdfops.Op_q;
|
|
Pdfops.Op_w 0.;
|
|
Pdfops.Op_G 0.;
|
|
Pdfops.Op_re (0., 0., 1., 1.);
|
|
Pdfops.Op_m (0., 0.);
|
|
Pdfops.Op_l (1., 1.);
|
|
Pdfops.Op_m (0., 1.);
|
|
Pdfops.Op_l (1., 0.);
|
|
Pdfops.Op_S;
|
|
Pdfops.Op_Q]
|
|
else
|
|
[]
|
|
|
|
(* Remove references to images from a graphics stream. *)
|
|
let rec remove_images_stream onlyremove boxes pdf resources prev = function
|
|
| [] -> rev prev
|
|
| (Pdfops.Op_Do name) as h::t ->
|
|
if xobject_isimage pdf resources name && (match onlyremove with None -> true | Some x -> x = name)
|
|
then remove_images_stream onlyremove boxes pdf resources (substitute boxes @ prev) t
|
|
else remove_images_stream onlyremove boxes pdf resources (h::prev) t
|
|
| Pdfops.InlineImage _ as h::t ->
|
|
if onlyremove <> None
|
|
then remove_images_stream onlyremove boxes pdf resources (h::prev) t
|
|
else remove_images_stream onlyremove boxes pdf resources (substitute boxes @ prev) t
|
|
| h::t ->
|
|
remove_images_stream onlyremove boxes pdf resources (h::prev) t
|
|
|
|
let rec process_form_xobject onlyremove boxes pdf form =
|
|
let form = Pdf.direct pdf form in
|
|
let page =
|
|
{Pdfpage.content = [form];
|
|
Pdfpage.mediabox = Pdf.Null;
|
|
Pdfpage.resources =
|
|
begin match Pdf.lookup_direct pdf "/Resources" form with
|
|
| Some r -> r
|
|
| None -> Pdf.Dictionary []
|
|
end;
|
|
Pdfpage.rotate = Pdfpage.Rotate0;
|
|
Pdfpage.rest = Pdf.Dictionary []}
|
|
in
|
|
let page', pdf =
|
|
remove_images_page onlyremove boxes pdf page
|
|
in
|
|
let form' =
|
|
match form with
|
|
| Pdf.Stream {contents = (dict, _)} ->
|
|
begin match
|
|
Pdfops.stream_of_ops
|
|
(Pdfops.parse_operators pdf (Pdf.Dictionary []) page'.Pdfpage.content)
|
|
with
|
|
| Pdf.Stream {contents = (_, Pdf.Got data)} ->
|
|
let dict' =
|
|
Pdf.add_dict_entry dict "/Length" (Pdf.Integer (bytes_size data))
|
|
in
|
|
Pdf.Stream {contents = (dict', Pdf.Got data)}
|
|
| _ -> assert false
|
|
end
|
|
| _ -> raise (Pdf.PDFError "not a stream")
|
|
in
|
|
form', pdf
|
|
|
|
(* Remove images from a page. *)
|
|
and remove_images_page onlyremove boxes pdf page =
|
|
let isform pdf xobj =
|
|
match Pdf.lookup_direct pdf "/Subtype" xobj with Some (Pdf.Name "/Form") -> true | _ -> false
|
|
in
|
|
let isimage pdf xobj =
|
|
match Pdf.lookup_direct pdf "/Subtype" xobj with Some (Pdf.Name "/Image") -> true | _ -> false
|
|
in
|
|
(* Remove image xobjects and look into form ones *)
|
|
let form_xobjects, image_xobjects =
|
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
|
| Some (Pdf.Dictionary elts) ->
|
|
keep (function (_, p) -> isform pdf p) elts,
|
|
keep (function (_, p) -> isimage pdf p) elts
|
|
| _ -> [], []
|
|
in
|
|
let resources', pdf =
|
|
let names, pointers = split form_xobjects in
|
|
let form_xobjects', pdf =
|
|
let pdf = ref pdf
|
|
in let outputs = ref [] in
|
|
iter
|
|
(fun p ->
|
|
let p', pdf' = process_form_xobject onlyremove boxes !pdf p in
|
|
pdf := pdf';
|
|
outputs =| p')
|
|
pointers;
|
|
rev !outputs, !pdf
|
|
in
|
|
let nums = ref [] in
|
|
iter
|
|
(fun xobj ->
|
|
let objnum = Pdf.addobj pdf xobj in
|
|
nums =| objnum)
|
|
form_xobjects';
|
|
let image_xobjects' =
|
|
match onlyremove with
|
|
None -> []
|
|
| Some n -> option_map (function (n', _) as xobj -> if n = n' then None else Some xobj) image_xobjects
|
|
in
|
|
let newdict =
|
|
Pdf.Dictionary (image_xobjects' @ combine names (map (fun x -> Pdf.Indirect x) (rev !nums)))
|
|
in
|
|
Pdf.add_dict_entry page.Pdfpage.resources "/XObject" newdict, pdf
|
|
in
|
|
let content' =
|
|
remove_images_stream onlyremove boxes pdf page.Pdfpage.resources []
|
|
(Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)
|
|
in
|
|
{page with
|
|
Pdfpage.content =
|
|
(let stream = Pdfops.stream_of_ops content' in
|
|
Pdfcodec.encode_pdfstream pdf Pdfcodec.Flate stream;
|
|
[stream]);
|
|
Pdfpage.resources = resources'}, pdf
|
|
|
|
(* Remove images from all pages in a document. *)
|
|
let draft onlyremove boxes range pdf =
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
let pagenums = indx pages in
|
|
let pdf = ref pdf
|
|
in let pages' = ref [] in
|
|
iter2
|
|
(fun p pagenum ->
|
|
let p', pdf' =
|
|
if mem pagenum range
|
|
then remove_images_page onlyremove boxes !pdf p
|
|
else p, !pdf
|
|
in
|
|
pdf := pdf';
|
|
pages' =| p')
|
|
pages
|
|
pagenums;
|
|
Pdfpage.change_pages true !pdf (rev !pages')
|
|
|