cpdf-source/cpdfdraw.ml

257 lines
8.5 KiB
OCaml
Raw Normal View History

2022-12-15 13:41:19 +01:00
open Pdfutil
2022-12-22 17:20:00 +01:00
type colspec =
2022-12-15 13:41:19 +01:00
NoCol
| RGB of float * float * float
| Grey of float
| CYMK of float * float * float * float
2022-12-22 17:20:00 +01:00
type image =
JPEG
2022-12-15 13:41:19 +01:00
type drawops =
2022-12-21 17:09:04 +01:00
| Rect of float * float * float * float
| Bezier of float * float * float * float * float * float
2022-12-15 13:41:19 +01:00
| To of float * float
| Line of float * float
2022-12-16 14:13:55 +01:00
| ClosePath
2022-12-22 17:20:00 +01:00
| SetFill of colspec
| SetStroke of colspec
2022-12-15 15:20:41 +01:00
| SetLineThickness of float
| SetLineCap of int
| SetLineJoin of int
| SetMiterLimit of float
| SetDashPattern of float list * float
2022-12-16 13:13:38 +01:00
| Matrix of Pdftransform.transform_matrix
| Push
| Pop
2022-12-16 14:13:55 +01:00
| Fill
| FillEvenOdd
| Stroke
| FillStroke
| FillStrokeEvenOdd
2022-12-21 17:40:13 +01:00
| Clip
| ClipEvenOdd
2023-05-04 14:53:49 +02:00
| FormXObject of float * float * float * float * string * drawops list
2023-05-03 15:19:55 +02:00
| Use of string
2022-12-22 21:42:55 +01:00
| ImageXObject of string * Pdf.pdfobject
2023-05-03 15:19:55 +02:00
| Image of string
2023-04-27 20:14:58 +02:00
| NewPage
| Opacity of float
| SOpacity of float
2023-04-28 17:09:19 +02:00
| Font of Pdftext.standard_font * float
2023-05-01 15:39:42 +02:00
| BT
| ET
2023-04-27 20:14:58 +02:00
| Text of string
2023-05-02 15:47:18 +02:00
| SpecialText of string
2023-04-28 20:03:10 +02:00
| Newline
2023-05-01 15:39:42 +02:00
| Leading of float
| CharSpace of float
| WordSpace of float
| TextScale of float
| RenderMode of int
| Rise of float
2023-04-27 20:14:58 +02:00
| URL of string
| EndURL
2022-12-22 17:20:00 +01:00
2023-05-03 16:49:14 +02:00
(* Per page resources *)
2023-05-04 16:01:12 +02:00
type res =
{images : (string, (string * int)) Hashtbl.t; (* (name, (pdf name, objnum)) *)
extgstates : (string, Pdf.pdfobject) Hashtbl.t; (* pdf name, pdf object *)
fonts : (string, int) Hashtbl.t; (* (pdf name, objnum)) *)
form_xobjects : (string, int) Hashtbl.t; (* (pdf name, objnum)) *)
mutable time : Cpdfstrftime.t;
mutable current_url : string option;
mutable current_font : Pdftext.font;
mutable num : int}
2023-05-03 16:49:14 +02:00
2023-05-04 16:01:12 +02:00
let res =
{images = null_hash ();
extgstates = null_hash ();
fonts = null_hash ();
form_xobjects = null_hash ();
time = Cpdfstrftime.dummy;
current_url = None;
current_font = Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding);
num = 0}
2022-12-22 17:20:00 +01:00
2023-05-04 16:01:12 +02:00
let fresh_name s =
res.num <- res.num + 1;
s ^ string_of_int res.num
2022-12-16 13:13:38 +01:00
2023-05-04 16:01:12 +02:00
(* At end of page, we keep things for which we have indirects - we may use them on another page. *)
2023-05-03 14:43:57 +02:00
let reset_state () =
2023-05-04 16:01:12 +02:00
Hashtbl.clear res.extgstates
2023-05-01 17:53:28 +02:00
2023-05-01 20:00:28 +02:00
let process_specials pdf endpage filename bates batespad num page s =
let pairs =
Cpdfaddtext.replace_pairs pdf endpage None filename bates batespad num page
in
2023-05-04 16:01:12 +02:00
Cpdfaddtext.process_text res.time s pairs
2023-05-01 20:00:28 +02:00
let rec ops_of_drawop pdf endpage filename bates batespad num page = function
2022-12-16 14:13:55 +01:00
| Push -> [Pdfops.Op_q]
| Pop -> [Pdfops.Op_Q]
2022-12-16 13:13:38 +01:00
| Matrix m -> [Pdfops.Op_cm m]
2022-12-15 13:41:19 +01:00
| Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)]
2022-12-21 17:09:04 +01:00
| Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)]
2022-12-15 13:41:19 +01:00
| To (x, y) -> [Pdfops.Op_m (x, y)]
| Line (x, y) -> [Pdfops.Op_l (x, y)]
2022-12-16 14:13:55 +01:00
| SetFill x ->
2022-12-15 13:41:19 +01:00
begin match x with
| RGB (r, g, b) -> [Op_rg (r, g, b)]
| Grey g -> [Op_g g]
| CYMK (c, y, m, k) -> [Op_k (c, y, m, k)]
| NoCol -> []
end
2022-12-16 14:13:55 +01:00
| SetStroke x ->
2022-12-15 13:41:19 +01:00
begin match x with
| RGB (r, g, b) -> [Op_RG (r, g, b)]
| Grey g -> [Op_G g]
| CYMK (c, y, m, k) -> [Op_K (c, y, m, k)]
| NoCol -> []
end
2022-12-16 14:13:55 +01:00
| ClosePath
| Fill -> [Pdfops.Op_f]
| FillEvenOdd -> [Pdfops.Op_f']
| Stroke -> [Pdfops.Op_S]
| FillStroke -> [Pdfops.Op_B]
| FillStrokeEvenOdd -> [Pdfops.Op_B']
2022-12-21 17:40:13 +01:00
| Clip -> [Pdfops.Op_W; Pdfops.Op_n]
| ClipEvenOdd -> [Pdfops.Op_W']
| SetLineThickness t -> [Pdfops.Op_w t; Pdfops.Op_n]
2022-12-16 14:13:55 +01:00
| SetLineCap c -> [Pdfops.Op_J c]
| SetLineJoin j -> [Pdfops.Op_j j]
| SetMiterLimit m -> [Pdfops.Op_M m]
| SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)]
2023-05-04 14:53:49 +02:00
| FormXObject (a, b, c, d, n, ops) -> create_form_xobject a b c d pdf endpage filename bates batespad num page n ops; []
2023-05-03 16:49:14 +02:00
| Use n -> [Pdfops.Op_Do n]
2023-05-04 16:01:12 +02:00
| Image s -> [Pdfops.Op_Do (try fst (Hashtbl.find res.images s) with _ -> Cpdferror.error ("Image not found: " ^ s))]
2022-12-22 21:42:55 +01:00
| ImageXObject (s, obj) ->
2023-05-04 16:01:12 +02:00
Hashtbl.add res.images s (fresh_name "/XObj", Pdf.addobj pdf obj);
2022-12-22 17:20:00 +01:00
[]
2023-04-27 20:14:58 +02:00
| NewPage -> Pdfe.log ("NewPage remaining in graphic stream"); assert false
2023-04-28 15:31:42 +02:00
| Opacity v ->
2023-05-04 16:01:12 +02:00
let n = fresh_name "/gs" in
Hashtbl.add res.extgstates n (Pdf.Dictionary [("/ca", Pdf.Real v)]);
2023-04-28 15:31:42 +02:00
[Pdfops.Op_gs n]
| SOpacity v ->
2023-05-04 16:01:12 +02:00
let n = fresh_name "/gs" in
Hashtbl.add res.extgstates n (Pdf.Dictionary [("/CA", Pdf.Real v)]);
2023-04-28 15:31:42 +02:00
[Pdfops.Op_gs n]
2023-04-28 16:35:05 +02:00
| URL s ->
2023-05-04 16:01:12 +02:00
res.current_url <- Some s;
2023-04-28 16:35:05 +02:00
[]
| EndURL ->
2023-05-04 16:01:12 +02:00
res.current_url <- None;
2023-04-28 16:35:05 +02:00
[]
| Font (s, f) ->
2023-05-04 16:01:12 +02:00
let o = Pdftext.write_font pdf (Pdftext.StandardFont (s, Pdftext.WinAnsiEncoding)) in
let n = fresh_name "/F" in
Hashtbl.add res.fonts n o;
2023-05-04 16:02:21 +02:00
res.current_font <- (Pdftext.StandardFont (s, Pdftext.WinAnsiEncoding));
2023-04-28 17:09:19 +02:00
[Pdfops.Op_Tf (n, f)]
2023-05-01 15:39:42 +02:00
| BT -> [Pdfops.Op_BT]
| ET -> [Pdfops.Op_ET]
2023-04-28 16:35:05 +02:00
| Text s ->
2023-05-02 15:47:18 +02:00
let charcodes =
2023-05-04 16:01:12 +02:00
implode (map char_of_int (option_map (Pdftext.charcode_extractor_of_font_real res.current_font) (Pdftext.codepoints_of_utf8 s)))
2023-05-02 15:47:18 +02:00
in
[Pdfops.Op_Tj charcodes]
| SpecialText s ->
2023-05-01 20:00:28 +02:00
let s = process_specials pdf endpage filename bates batespad num page s in
2023-04-28 17:56:13 +02:00
let charcodes =
2023-05-04 16:01:12 +02:00
implode (map char_of_int (option_map (Pdftext.charcode_extractor_of_font_real res.current_font) (Pdftext.codepoints_of_utf8 s)))
2023-04-28 17:56:13 +02:00
in
2023-05-01 15:39:42 +02:00
[Pdfops.Op_Tj charcodes]
| Leading f -> [Pdfops.Op_TL f]
| CharSpace f -> [Pdfops.Op_Tc f]
| WordSpace f -> [Pdfops.Op_Tw f]
| TextScale f -> [Pdfops.Op_Tz f]
| RenderMode i -> [Pdfops.Op_Tr i]
| Rise f -> [Pdfops.Op_Ts f]
| Newline -> [Pdfops.Op_T']
2022-12-15 13:41:19 +01:00
2023-05-01 20:00:28 +02:00
and ops_of_drawops pdf endpage filename bates batespad num page drawops =
flatten (map (ops_of_drawop pdf endpage filename bates batespad num page) drawops)
2022-12-15 13:41:19 +01:00
2023-05-04 14:53:49 +02:00
and create_form_xobject a b c d pdf endpage filename bates batespad num page n ops =
2023-05-03 20:01:25 +02:00
let data =
Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad num page ops))
in
let obj =
Pdf.Stream
{contents =
(Pdf.Dictionary
[("/Length", Pdf.Integer (Pdfio.bytes_size data));
("/Subtype", Pdf.Name "/Form");
2023-05-04 14:53:49 +02:00
("/BBox", Pdf.Array [Pdf.Real a; Pdf.Real b; Pdf.Real c; Pdf.Real d])
2023-05-03 20:01:25 +02:00
],
Pdf.Got data)}
in
2023-05-04 16:01:12 +02:00
Hashtbl.add res.form_xobjects n (Pdf.addobj pdf obj)
2023-05-03 20:01:25 +02:00
2023-05-04 16:51:03 +02:00
let read_resource pdf n p =
match Pdf.lookup_direct pdf n p.Pdfpage.resources with
| Some (Pdf.Dictionary d) -> d
| _ -> []
2023-05-03 14:43:57 +02:00
let draw_single ~filename ~bates ~batespad fast range pdf drawops =
2023-05-02 15:47:18 +02:00
let endpage = Pdfpage.endpage pdf in
let pages = Pdfpage.pages_of_pagetree pdf in
let ss =
map2
(fun n p -> Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad n p drawops))
(ilist 1 endpage)
pages
in
let pdf = ref pdf in
iter2
(fun n s ->
2023-05-03 14:53:48 +02:00
if mem n range then pdf := Cpdftweak.append_page_content s false fast [n] !pdf)
2023-05-02 15:47:18 +02:00
(ilist 1 endpage)
ss;
let pdf = !pdf in
2023-05-04 16:01:12 +02:00
let images = list_of_hashtbl res.images in
2023-04-28 15:31:42 +02:00
let image_resources = map (fun (_, (n, o)) -> (n, Pdf.Indirect o)) images in
2023-05-04 16:01:12 +02:00
let gss_resources = list_of_hashtbl res.extgstates in
let font_resources = map (fun (n, o) -> (n, Pdf.Indirect o)) (list_of_hashtbl res.fonts) in
let form_resources = map (fun (n, o) -> (n, Pdf.Indirect o)) (list_of_hashtbl res.form_xobjects) in
2023-05-04 16:51:03 +02:00
let pages =
map
(fun p ->
let new_resources =
let update = fold_right (fun (k, v) d -> add k v d) in
let new_xobjects = update (form_resources @ image_resources) (read_resource pdf "/XObject" p) in
let new_gss = update gss_resources (read_resource pdf "/ExtGState" p) in
let new_fonts = update font_resources (read_resource pdf "/Font" p) in
Pdf.add_dict_entry
(Pdf.add_dict_entry
(Pdf.add_dict_entry p.Pdfpage.resources "/XObject" (Pdf.Dictionary new_xobjects))
"/ExtGState"
(Pdf.Dictionary new_gss))
"/Font"
(Pdf.Dictionary new_fonts)
in
{p with resources = new_resources})
(Pdfpage.pages_of_pagetree pdf)
in
Pdfpage.change_pages true pdf pages
2023-05-03 14:43:57 +02:00
let draw ~filename ~bates ~batespad fast range pdf drawops =
2023-05-04 16:01:12 +02:00
res.time <- Cpdfstrftime.current_time ();
2023-05-03 14:43:57 +02:00
let pdf = ref pdf in
let range = ref range in
let chunks = ref (split_around (eq NewPage) drawops) in
while !chunks <> [] do
reset_state ();
pdf := draw_single ~filename ~bates ~batespad fast !range !pdf (hd !chunks);
chunks := tl !chunks;
if !chunks <> [] then begin
let endpage = Pdfpage.endpage !pdf in
pdf := Cpdfpad.padafter [endpage] !pdf;
range := [endpage + 1]
end
done;
!pdf