cpdf-source/cpdfdraw.ml

315 lines
11 KiB
OCaml
Raw Normal View History

2022-12-15 13:41:19 +01:00
open Pdfutil
2022-12-22 17:20:00 +01:00
type colspec =
2022-12-15 13:41:19 +01:00
NoCol
| RGB of float * float * float
| Grey of float
| CYMK of float * float * float * float
type drawops =
2022-12-21 17:09:04 +01:00
| Rect of float * float * float * float
| Bezier of float * float * float * float * float * float
2022-12-15 13:41:19 +01:00
| To of float * float
| Line of float * float
2022-12-16 14:13:55 +01:00
| ClosePath
2022-12-22 17:20:00 +01:00
| SetFill of colspec
| SetStroke of colspec
2022-12-15 15:20:41 +01:00
| SetLineThickness of float
| SetLineCap of int
| SetLineJoin of int
| SetMiterLimit of float
| SetDashPattern of float list * float
2022-12-16 13:13:38 +01:00
| Matrix of Pdftransform.transform_matrix
| Push
| Pop
2022-12-16 14:13:55 +01:00
| Fill
| FillEvenOdd
| Stroke
| FillStroke
| FillStrokeEvenOdd
2022-12-21 17:40:13 +01:00
| Clip
| ClipEvenOdd
2023-05-04 14:53:49 +02:00
| FormXObject of float * float * float * float * string * drawops list
2023-05-03 15:19:55 +02:00
| Use of string
2022-12-22 21:42:55 +01:00
| ImageXObject of string * Pdf.pdfobject
2023-05-03 15:19:55 +02:00
| Image of string
2023-04-27 20:14:58 +02:00
| NewPage
| Opacity of float
| SOpacity of float
2023-04-28 17:09:19 +02:00
| Font of Pdftext.standard_font * float
2023-05-01 15:39:42 +02:00
| BT
| ET
2023-04-27 20:14:58 +02:00
| Text of string
2023-05-02 15:47:18 +02:00
| SpecialText of string
2023-04-28 20:03:10 +02:00
| Newline
2023-05-01 15:39:42 +02:00
| Leading of float
| CharSpace of float
| WordSpace of float
| TextScale of float
| RenderMode of int
| Rise of float
2023-04-27 20:14:58 +02:00
| URL of string
| EndURL
2022-12-22 17:20:00 +01:00
2023-05-03 16:49:14 +02:00
(* Per page resources *)
2023-05-04 16:01:12 +02:00
type res =
{images : (string, (string * int)) Hashtbl.t; (* (name, (pdf name, objnum)) *)
2023-05-05 17:17:35 +02:00
extgstates : ((string * float), string) Hashtbl.t; (* (kind, value), name *)
fonts : (Pdftext.font, (string * int)) Hashtbl.t; (* (font, (objnum, pdf name)) *)
2023-05-05 14:42:47 +02:00
form_xobjects : (string, (string * int)) Hashtbl.t; (* (name, (pdf name, objnum)) *)
2023-05-07 17:40:02 +02:00
mutable page_names : string list;
2023-05-04 16:01:12 +02:00
mutable time : Cpdfstrftime.t;
mutable current_url : string option;
mutable current_font : Pdftext.font;
mutable num : int}
2023-05-03 16:49:14 +02:00
2023-05-08 16:13:17 +02:00
let empty_res =
2023-05-04 16:01:12 +02:00
{images = null_hash ();
extgstates = null_hash ();
fonts = null_hash ();
form_xobjects = null_hash ();
2023-05-07 17:40:02 +02:00
page_names = [];
2023-05-04 16:01:12 +02:00
time = Cpdfstrftime.dummy;
current_url = None;
current_font = Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding);
num = 0}
2022-12-22 17:20:00 +01:00
2023-05-08 16:13:17 +02:00
let resstack =
ref [empty_res]
let res () =
hd !resstack
2023-05-04 16:01:12 +02:00
let fresh_name s =
2023-05-08 16:13:17 +02:00
(res ()).num <- (res ()).num + 1;
s ^ string_of_int (res ()).num
2022-12-16 13:13:38 +01:00
2023-05-07 17:40:02 +02:00
(* At end of page, we keep things for which we have indirects - but ExtGStates
aren't indirect, so they go. *)
2023-05-03 14:43:57 +02:00
let reset_state () =
2023-05-08 16:13:17 +02:00
Hashtbl.clear (res ()).extgstates;
(res ()).page_names <- []
2023-05-01 17:53:28 +02:00
2023-05-01 20:00:28 +02:00
let process_specials pdf endpage filename bates batespad num page s =
let pairs =
Cpdfaddtext.replace_pairs pdf endpage None filename bates batespad num page
in
2023-05-08 16:13:17 +02:00
Cpdfaddtext.process_text (res ()).time s pairs
2023-05-01 20:00:28 +02:00
2023-05-05 17:17:35 +02:00
let charcodes_of_utf8 s =
2023-05-08 16:13:17 +02:00
implode (map char_of_int (option_map (Pdftext.charcode_extractor_of_font_real (res ()).current_font) (Pdftext.codepoints_of_utf8 s)))
2023-05-05 17:17:35 +02:00
let extgstate kind v =
2023-05-08 16:13:17 +02:00
try Hashtbl.find (res ()).extgstates (kind, v) with
2023-05-05 17:17:35 +02:00
Not_found ->
let n = fresh_name "/gs" in
2023-05-08 16:13:17 +02:00
Hashtbl.add (res ()).extgstates (kind, v) n;
2023-05-05 17:17:35 +02:00
n
2023-05-01 20:00:28 +02:00
let rec ops_of_drawop pdf endpage filename bates batespad num page = function
2022-12-16 14:13:55 +01:00
| Push -> [Pdfops.Op_q]
| Pop -> [Pdfops.Op_Q]
2022-12-16 13:13:38 +01:00
| Matrix m -> [Pdfops.Op_cm m]
2022-12-15 13:41:19 +01:00
| Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)]
2022-12-21 17:09:04 +01:00
| Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)]
2022-12-15 13:41:19 +01:00
| To (x, y) -> [Pdfops.Op_m (x, y)]
| Line (x, y) -> [Pdfops.Op_l (x, y)]
2022-12-16 14:13:55 +01:00
| SetFill x ->
2022-12-15 13:41:19 +01:00
begin match x with
| RGB (r, g, b) -> [Op_rg (r, g, b)]
| Grey g -> [Op_g g]
| CYMK (c, y, m, k) -> [Op_k (c, y, m, k)]
| NoCol -> []
end
2022-12-16 14:13:55 +01:00
| SetStroke x ->
2022-12-15 13:41:19 +01:00
begin match x with
| RGB (r, g, b) -> [Op_RG (r, g, b)]
| Grey g -> [Op_G g]
| CYMK (c, y, m, k) -> [Op_K (c, y, m, k)]
| NoCol -> []
end
2022-12-16 14:13:55 +01:00
| ClosePath
| Fill -> [Pdfops.Op_f]
| FillEvenOdd -> [Pdfops.Op_f']
| Stroke -> [Pdfops.Op_S]
| FillStroke -> [Pdfops.Op_B]
| FillStrokeEvenOdd -> [Pdfops.Op_B']
2022-12-21 17:40:13 +01:00
| Clip -> [Pdfops.Op_W; Pdfops.Op_n]
| ClipEvenOdd -> [Pdfops.Op_W']
| SetLineThickness t -> [Pdfops.Op_w t; Pdfops.Op_n]
2022-12-16 14:13:55 +01:00
| SetLineCap c -> [Pdfops.Op_J c]
| SetLineJoin j -> [Pdfops.Op_j j]
| SetMiterLimit m -> [Pdfops.Op_M m]
| SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)]
2023-05-04 14:53:49 +02:00
| FormXObject (a, b, c, d, n, ops) -> create_form_xobject a b c d pdf endpage filename bates batespad num page n ops; []
2023-05-07 17:40:02 +02:00
| Use n ->
2023-05-08 16:13:17 +02:00
let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> Cpdferror.error ("Form XObject not found: " ^ n) in
(res ()).page_names <- pdfname::(res ()).page_names;
2023-05-07 17:40:02 +02:00
[Pdfops.Op_Do pdfname]
| Image s ->
2023-05-08 16:13:17 +02:00
let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> Cpdferror.error ("Image not found: " ^ s) in
(res ()).page_names <- pdfname::(res ()).page_names;
2023-05-07 17:40:02 +02:00
[Pdfops.Op_Do pdfname]
2022-12-22 21:42:55 +01:00
| ImageXObject (s, obj) ->
2023-05-08 16:13:17 +02:00
Hashtbl.add (res ()).images s (fresh_name "/XObj", Pdf.addobj pdf obj);
2022-12-22 17:20:00 +01:00
[]
2023-04-27 20:14:58 +02:00
| NewPage -> Pdfe.log ("NewPage remaining in graphic stream"); assert false
2023-05-05 17:17:35 +02:00
| Opacity v -> [Pdfops.Op_gs (extgstate "/ca" v)]
| SOpacity v -> [Pdfops.Op_gs (extgstate "/CA" v)]
2023-04-28 16:35:05 +02:00
| URL s ->
2023-05-08 16:13:17 +02:00
(res ()).current_url <- Some s;
2023-04-28 16:35:05 +02:00
[]
| EndURL ->
2023-05-08 16:13:17 +02:00
(res ()).current_url <- None;
2023-04-28 16:35:05 +02:00
[]
| Font (s, f) ->
2023-05-05 17:17:35 +02:00
let font = Pdftext.StandardFont (s, Pdftext.WinAnsiEncoding) in
let (n, _) =
2023-05-08 16:13:17 +02:00
try Hashtbl.find (res ()).fonts font with
2023-05-05 17:17:35 +02:00
Not_found ->
let o = Pdftext.write_font pdf font in
let n = fresh_name "/F" in
2023-05-08 16:13:17 +02:00
Hashtbl.add (res ()).fonts font (n, o);
2023-05-05 17:17:35 +02:00
(n, o)
in
2023-05-08 16:13:17 +02:00
(res ()).current_font <- font;
(res ()).page_names <- n::(res ()).page_names;
2023-04-28 17:09:19 +02:00
[Pdfops.Op_Tf (n, f)]
2023-05-01 15:39:42 +02:00
| BT -> [Pdfops.Op_BT]
| ET -> [Pdfops.Op_ET]
2023-05-05 17:17:35 +02:00
| Text s -> [Pdfops.Op_Tj (charcodes_of_utf8 s)]
2023-05-02 15:47:18 +02:00
| SpecialText s ->
2023-05-01 20:00:28 +02:00
let s = process_specials pdf endpage filename bates batespad num page s in
2023-05-05 17:17:35 +02:00
[Pdfops.Op_Tj (charcodes_of_utf8 s)]
2023-05-01 15:39:42 +02:00
| Leading f -> [Pdfops.Op_TL f]
| CharSpace f -> [Pdfops.Op_Tc f]
| WordSpace f -> [Pdfops.Op_Tw f]
| TextScale f -> [Pdfops.Op_Tz f]
| RenderMode i -> [Pdfops.Op_Tr i]
| Rise f -> [Pdfops.Op_Ts f]
| Newline -> [Pdfops.Op_T']
2022-12-15 13:41:19 +01:00
2023-05-01 20:00:28 +02:00
and ops_of_drawops pdf endpage filename bates batespad num page drawops =
flatten (map (ops_of_drawop pdf endpage filename bates batespad num page) drawops)
2022-12-15 13:41:19 +01:00
2023-05-08 15:39:37 +02:00
(* 1. We want resources to be created locally to the xobject, not on the page so it may be shared between pages.
a. make sure none are added to the page
b. make sure we collect them and add them to the xobject *)
2023-05-04 14:53:49 +02:00
and create_form_xobject a b c d pdf endpage filename bates batespad num page n ops =
2023-05-03 20:01:25 +02:00
let data =
Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad num page ops))
in
let obj =
Pdf.Stream
{contents =
(Pdf.Dictionary
[("/Length", Pdf.Integer (Pdfio.bytes_size data));
("/Subtype", Pdf.Name "/Form");
2023-05-04 14:53:49 +02:00
("/BBox", Pdf.Array [Pdf.Real a; Pdf.Real b; Pdf.Real c; Pdf.Real d])
2023-05-03 20:01:25 +02:00
],
Pdf.Got data)}
in
2023-05-08 16:13:17 +02:00
Hashtbl.add (res ()).form_xobjects n (fresh_name "/Fm", (Pdf.addobj pdf obj))
2023-05-03 20:01:25 +02:00
2023-05-04 16:51:03 +02:00
let read_resource pdf n p =
match Pdf.lookup_direct pdf n p.Pdfpage.resources with
| Some (Pdf.Dictionary d) -> d
| _ -> []
2023-05-04 19:57:08 +02:00
let minimum_resource_number pdf range =
2023-05-05 15:46:51 +02:00
let pages = Pdfpage.pages_of_pagetree pdf in
let pages_in_range =
option_map2 (fun p n -> if mem n range then Some p else None) pages (indx pages) in
let number_of_name s =
match implode (rev (takewhile (function '0'..'9' -> true | _ -> false) (rev (explode s)))) with
| "" -> None
| s -> Some (int_of_string s)
in
let resource_names_page p =
let names n =
match Pdf.lookup_direct pdf n p.Pdfpage.resources with
| Some (Pdf.Dictionary d) -> map fst d
| _ -> []
in
names "/XObject" @ names "/ExtGState" @ names "/Font"
in
match
sort
(fun a b -> compare b a)
(option_map number_of_name (flatten (map resource_names_page pages_in_range)))
with
| [] -> 0
| n::_ -> n + 1
2023-05-04 19:57:08 +02:00
2023-05-05 17:27:41 +02:00
let contains_specials drawops =
List.exists (function SpecialText _ -> true | _ -> false) drawops
2023-05-03 14:43:57 +02:00
let draw_single ~filename ~bates ~batespad fast range pdf drawops =
2023-05-08 16:13:17 +02:00
(res ()).num <- max (res ()).num (minimum_resource_number pdf range);
2023-05-02 15:47:18 +02:00
let endpage = Pdfpage.endpage pdf in
let pages = Pdfpage.pages_of_pagetree pdf in
2023-05-05 17:27:41 +02:00
let str =
if contains_specials drawops then None else Some (Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad 0 (hd pages) drawops))
in
2023-05-02 15:47:18 +02:00
let ss =
map2
2023-05-05 14:42:47 +02:00
(fun n p ->
2023-05-05 17:27:41 +02:00
if mem n range then (match str with Some x -> x | None -> Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad n p drawops)) else "")
2023-05-02 15:47:18 +02:00
(ilist 1 endpage)
pages
in
let pdf = ref pdf in
iter2
(fun n s ->
2023-05-03 14:53:48 +02:00
if mem n range then pdf := Cpdftweak.append_page_content s false fast [n] !pdf)
2023-05-02 15:47:18 +02:00
(ilist 1 endpage)
ss;
let pdf = !pdf in
2023-05-08 16:13:17 +02:00
let gss_resources = map (fun ((kind, v), n) -> (kind, Pdf.Real v)) (list_of_hashtbl (res ()).extgstates) in
2023-05-07 17:40:02 +02:00
let select_resources t =
2023-05-08 16:13:17 +02:00
option_map (fun (_, (n, o)) -> if mem n (res ()).page_names then Some (n, Pdf.Indirect o) else None) (list_of_hashtbl t)
2023-05-07 17:40:02 +02:00
in
2023-05-04 16:51:03 +02:00
let pages =
2023-05-08 15:15:03 +02:00
map2
(fun n p ->
if not (mem n range) then p else
2023-05-04 16:51:03 +02:00
let new_resources =
let update = fold_right (fun (k, v) d -> add k v d) in
let new_gss = update gss_resources (read_resource pdf "/ExtGState" p) in
2023-05-08 16:13:17 +02:00
let new_xobjects = update (select_resources (res ()).form_xobjects @ select_resources (res ()).images) (read_resource pdf "/XObject" p) in
let new_fonts = update (select_resources (res ()).fonts) (read_resource pdf "/Font" p) in
2023-05-07 18:09:08 +02:00
let add_if_non_empty dict name newdict =
if newdict = Pdf.Dictionary [] then dict else
Pdf.add_dict_entry dict name newdict
in
add_if_non_empty
(add_if_non_empty
(add_if_non_empty p.Pdfpage.resources "/XObject" (Pdf.Dictionary new_xobjects))
"/ExtGState"
(Pdf.Dictionary new_gss))
"/Font"
(Pdf.Dictionary new_fonts)
2023-05-04 16:51:03 +02:00
in
{p with resources = new_resources})
2023-05-08 15:15:03 +02:00
(ilist 1 endpage)
2023-05-04 16:51:03 +02:00
(Pdfpage.pages_of_pagetree pdf)
in
Pdfpage.change_pages true pdf pages
2023-05-03 14:43:57 +02:00
let draw ~filename ~bates ~batespad fast range pdf drawops =
2023-05-08 16:13:17 +02:00
(res ()).time <- Cpdfstrftime.current_time ();
2023-05-03 14:43:57 +02:00
let pdf = ref pdf in
let range = ref range in
let chunks = ref (split_around (eq NewPage) drawops) in
while !chunks <> [] do
reset_state ();
pdf := draw_single ~filename ~bates ~batespad fast !range !pdf (hd !chunks);
chunks := tl !chunks;
if !chunks <> [] then begin
let endpage = Pdfpage.endpage !pdf in
pdf := Cpdfpad.padafter [endpage] !pdf;
range := [endpage + 1]
end
done;
!pdf