2022-12-15 13:41:19 +01:00
|
|
|
open Pdfutil
|
|
|
|
|
2022-12-22 17:20:00 +01:00
|
|
|
type colspec =
|
2022-12-15 13:41:19 +01:00
|
|
|
NoCol
|
|
|
|
| RGB of float * float * float
|
|
|
|
| Grey of float
|
|
|
|
| CYMK of float * float * float * float
|
|
|
|
|
2022-12-22 17:20:00 +01:00
|
|
|
type image =
|
|
|
|
JPEG
|
|
|
|
|
2022-12-15 13:41:19 +01:00
|
|
|
type drawops =
|
2022-12-21 17:09:04 +01:00
|
|
|
| Rect of float * float * float * float
|
|
|
|
| Bezier of float * float * float * float * float * float
|
2022-12-15 13:41:19 +01:00
|
|
|
| To of float * float
|
|
|
|
| Line of float * float
|
2022-12-16 14:13:55 +01:00
|
|
|
| ClosePath
|
2022-12-22 17:20:00 +01:00
|
|
|
| SetFill of colspec
|
|
|
|
| SetStroke of colspec
|
2022-12-15 15:20:41 +01:00
|
|
|
| SetLineThickness of float
|
|
|
|
| SetLineCap of int
|
|
|
|
| SetLineJoin of int
|
|
|
|
| SetMiterLimit of float
|
|
|
|
| SetDashPattern of float list * float
|
2022-12-16 13:13:38 +01:00
|
|
|
| Matrix of Pdftransform.transform_matrix
|
|
|
|
| Push
|
|
|
|
| Pop
|
2022-12-16 14:13:55 +01:00
|
|
|
| Fill
|
|
|
|
| FillEvenOdd
|
|
|
|
| Stroke
|
|
|
|
| FillStroke
|
|
|
|
| FillStrokeEvenOdd
|
2022-12-21 17:40:13 +01:00
|
|
|
| Clip
|
|
|
|
| ClipEvenOdd
|
2022-12-16 17:49:59 +01:00
|
|
|
| SoftXObject of drawops list
|
|
|
|
| HardXObject of drawops list
|
2022-12-22 17:20:00 +01:00
|
|
|
| Image of string
|
2022-12-22 21:42:55 +01:00
|
|
|
| ImageXObject of string * Pdf.pdfobject
|
2023-04-27 20:14:58 +02:00
|
|
|
| NewPage
|
|
|
|
| Opacity of float
|
|
|
|
| SOpacity of float
|
2023-04-28 17:09:19 +02:00
|
|
|
| Font of Pdftext.standard_font * float
|
2023-05-01 15:39:42 +02:00
|
|
|
| BT
|
|
|
|
| ET
|
2023-04-27 20:14:58 +02:00
|
|
|
| Text of string
|
2023-05-02 15:47:18 +02:00
|
|
|
| SpecialText of string
|
2023-04-28 20:03:10 +02:00
|
|
|
| Newline
|
2023-05-01 15:39:42 +02:00
|
|
|
| Leading of float
|
|
|
|
| CharSpace of float
|
|
|
|
| WordSpace of float
|
|
|
|
| TextScale of float
|
|
|
|
| RenderMode of int
|
|
|
|
| Rise of float
|
2023-04-27 20:14:58 +02:00
|
|
|
| URL of string
|
|
|
|
| EndURL
|
2022-12-22 17:20:00 +01:00
|
|
|
|
2022-12-22 21:42:55 +01:00
|
|
|
(* Hash table of (human name, (resources name, object)) for image xobjects *)
|
2022-12-22 17:20:00 +01:00
|
|
|
let images = null_hash ()
|
2023-04-28 15:31:42 +02:00
|
|
|
let gss = null_hash ()
|
2022-12-22 17:20:00 +01:00
|
|
|
|
|
|
|
(* Fresh XObject names. If we are stamping over another page, manage clashes later. *)
|
|
|
|
let fresh_xobj_name () = "/Img0"
|
2022-12-16 13:13:38 +01:00
|
|
|
|
2023-04-28 15:31:42 +02:00
|
|
|
let gsnum = ref ~-1
|
|
|
|
|
|
|
|
let fresh_gs_name () =
|
|
|
|
gsnum += 1;
|
|
|
|
"/gs" ^ string_of_int !gsnum
|
|
|
|
|
2023-04-28 16:35:05 +02:00
|
|
|
let current_url = ref None
|
|
|
|
|
2023-04-28 17:09:19 +02:00
|
|
|
let fontnum = ref 0
|
|
|
|
|
|
|
|
let fonts = null_hash ()
|
|
|
|
|
2023-04-28 17:56:13 +02:00
|
|
|
let current_font = ref (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding))
|
|
|
|
|
2023-04-28 17:09:19 +02:00
|
|
|
let fresh_font_name pdf f =
|
|
|
|
fontnum += 1;
|
|
|
|
let n = "/F" ^ string_of_int !fontnum in
|
|
|
|
Hashtbl.add fonts n (Pdf.Indirect (Pdftext.write_font pdf f));
|
|
|
|
n
|
2023-04-28 16:35:05 +02:00
|
|
|
|
2023-05-03 14:43:57 +02:00
|
|
|
(* This will remove fonts, images etc, for moving on to the next page *)
|
|
|
|
let reset_state () =
|
|
|
|
()
|
|
|
|
|
2023-05-01 20:00:28 +02:00
|
|
|
let time = ref Cpdfstrftime.dummy
|
2023-05-01 17:53:28 +02:00
|
|
|
|
2023-05-01 20:00:28 +02:00
|
|
|
let process_specials pdf endpage filename bates batespad num page s =
|
|
|
|
let pairs =
|
|
|
|
Cpdfaddtext.replace_pairs pdf endpage None filename bates batespad num page
|
|
|
|
in
|
|
|
|
Cpdfaddtext.process_text !time s pairs
|
|
|
|
|
|
|
|
let rec ops_of_drawop pdf endpage filename bates batespad num page = function
|
2022-12-16 14:13:55 +01:00
|
|
|
| Push -> [Pdfops.Op_q]
|
|
|
|
| Pop -> [Pdfops.Op_Q]
|
2022-12-16 13:13:38 +01:00
|
|
|
| Matrix m -> [Pdfops.Op_cm m]
|
2022-12-15 13:41:19 +01:00
|
|
|
| Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)]
|
2022-12-21 17:09:04 +01:00
|
|
|
| Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)]
|
2022-12-15 13:41:19 +01:00
|
|
|
| To (x, y) -> [Pdfops.Op_m (x, y)]
|
|
|
|
| Line (x, y) -> [Pdfops.Op_l (x, y)]
|
2022-12-16 14:13:55 +01:00
|
|
|
| SetFill x ->
|
2022-12-15 13:41:19 +01:00
|
|
|
begin match x with
|
|
|
|
| RGB (r, g, b) -> [Op_rg (r, g, b)]
|
|
|
|
| Grey g -> [Op_g g]
|
|
|
|
| CYMK (c, y, m, k) -> [Op_k (c, y, m, k)]
|
|
|
|
| NoCol -> []
|
|
|
|
end
|
2022-12-16 14:13:55 +01:00
|
|
|
| SetStroke x ->
|
2022-12-15 13:41:19 +01:00
|
|
|
begin match x with
|
|
|
|
| RGB (r, g, b) -> [Op_RG (r, g, b)]
|
|
|
|
| Grey g -> [Op_G g]
|
|
|
|
| CYMK (c, y, m, k) -> [Op_K (c, y, m, k)]
|
|
|
|
| NoCol -> []
|
|
|
|
end
|
2022-12-16 14:13:55 +01:00
|
|
|
| ClosePath
|
|
|
|
| Fill -> [Pdfops.Op_f]
|
|
|
|
| FillEvenOdd -> [Pdfops.Op_f']
|
|
|
|
| Stroke -> [Pdfops.Op_S]
|
|
|
|
| FillStroke -> [Pdfops.Op_B]
|
|
|
|
| FillStrokeEvenOdd -> [Pdfops.Op_B']
|
2022-12-21 17:40:13 +01:00
|
|
|
| Clip -> [Pdfops.Op_W; Pdfops.Op_n]
|
|
|
|
| ClipEvenOdd -> [Pdfops.Op_W']
|
|
|
|
| SetLineThickness t -> [Pdfops.Op_w t; Pdfops.Op_n]
|
2022-12-16 14:13:55 +01:00
|
|
|
| SetLineCap c -> [Pdfops.Op_J c]
|
|
|
|
| SetLineJoin j -> [Pdfops.Op_j j]
|
|
|
|
| SetMiterLimit m -> [Pdfops.Op_M m]
|
|
|
|
| SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)]
|
2022-12-16 17:49:59 +01:00
|
|
|
| SoftXObject l | HardXObject l ->
|
2023-05-01 20:00:28 +02:00
|
|
|
[Pdfops.Op_q] @ ops_of_drawops pdf endpage filename bates batespad num page l @ [Pdfops.Op_Q]
|
2022-12-22 17:20:00 +01:00
|
|
|
| Image s -> [Pdfops.Op_Do (try fst (Hashtbl.find images s) with _ -> Cpdferror.error ("Image not found: " ^ s))]
|
2022-12-22 21:42:55 +01:00
|
|
|
| ImageXObject (s, obj) ->
|
|
|
|
Hashtbl.add images s (fresh_xobj_name (), Pdf.addobj pdf obj);
|
2022-12-22 17:20:00 +01:00
|
|
|
[]
|
2023-04-27 20:14:58 +02:00
|
|
|
| NewPage -> Pdfe.log ("NewPage remaining in graphic stream"); assert false
|
2023-04-28 15:31:42 +02:00
|
|
|
| Opacity v ->
|
|
|
|
let n = fresh_gs_name () in
|
|
|
|
Hashtbl.add gss n (Pdf.Dictionary [("/ca", Pdf.Real v)]);
|
|
|
|
[Pdfops.Op_gs n]
|
|
|
|
| SOpacity v ->
|
|
|
|
let n = fresh_gs_name () in
|
|
|
|
Hashtbl.add gss n (Pdf.Dictionary [("/CA", Pdf.Real v)]);
|
|
|
|
[Pdfops.Op_gs n]
|
2023-04-28 16:35:05 +02:00
|
|
|
| URL s ->
|
|
|
|
current_url := Some s;
|
|
|
|
[]
|
|
|
|
| EndURL ->
|
|
|
|
current_url := None;
|
|
|
|
[]
|
|
|
|
| Font (s, f) ->
|
2023-04-28 17:09:19 +02:00
|
|
|
let n = fresh_font_name pdf (Pdftext.StandardFont (s, Pdftext.WinAnsiEncoding)) in
|
|
|
|
[Pdfops.Op_Tf (n, f)]
|
2023-05-01 15:39:42 +02:00
|
|
|
| BT -> [Pdfops.Op_BT]
|
|
|
|
| ET -> [Pdfops.Op_ET]
|
2023-04-28 16:35:05 +02:00
|
|
|
| Text s ->
|
2023-05-02 15:47:18 +02:00
|
|
|
let charcodes =
|
|
|
|
implode (map char_of_int (option_map (Pdftext.charcode_extractor_of_font_real !current_font) (Pdftext.codepoints_of_utf8 s)))
|
|
|
|
in
|
|
|
|
[Pdfops.Op_Tj charcodes]
|
|
|
|
| SpecialText s ->
|
2023-05-01 20:00:28 +02:00
|
|
|
let s = process_specials pdf endpage filename bates batespad num page s in
|
2023-04-28 17:56:13 +02:00
|
|
|
let charcodes =
|
2023-05-01 20:00:28 +02:00
|
|
|
implode (map char_of_int (option_map (Pdftext.charcode_extractor_of_font_real !current_font) (Pdftext.codepoints_of_utf8 s)))
|
2023-04-28 17:56:13 +02:00
|
|
|
in
|
2023-05-01 15:39:42 +02:00
|
|
|
[Pdfops.Op_Tj charcodes]
|
|
|
|
| Leading f -> [Pdfops.Op_TL f]
|
|
|
|
| CharSpace f -> [Pdfops.Op_Tc f]
|
|
|
|
| WordSpace f -> [Pdfops.Op_Tw f]
|
|
|
|
| TextScale f -> [Pdfops.Op_Tz f]
|
|
|
|
| RenderMode i -> [Pdfops.Op_Tr i]
|
|
|
|
| Rise f -> [Pdfops.Op_Ts f]
|
|
|
|
| Newline -> [Pdfops.Op_T']
|
2022-12-15 13:41:19 +01:00
|
|
|
|
2023-05-01 20:00:28 +02:00
|
|
|
and ops_of_drawops pdf endpage filename bates batespad num page drawops =
|
|
|
|
flatten (map (ops_of_drawop pdf endpage filename bates batespad num page) drawops)
|
2022-12-15 13:41:19 +01:00
|
|
|
|
2023-05-01 20:00:28 +02:00
|
|
|
(* Draw all the accumulated operators. *)
|
2023-05-03 14:43:57 +02:00
|
|
|
let draw_single ~filename ~bates ~batespad fast range pdf drawops =
|
2023-05-02 15:47:18 +02:00
|
|
|
let endpage = Pdfpage.endpage pdf in
|
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
let ss =
|
|
|
|
map2
|
|
|
|
(fun n p -> Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad n p drawops))
|
|
|
|
(ilist 1 endpage)
|
|
|
|
pages
|
|
|
|
in
|
|
|
|
let pdf = ref pdf in
|
|
|
|
iter2
|
|
|
|
(fun n s ->
|
|
|
|
if mem n range then (Printf.printf "Adding ops to page %i\n" n; pdf := Cpdftweak.append_page_content s false fast [n] !pdf))
|
|
|
|
(ilist 1 endpage)
|
|
|
|
ss;
|
|
|
|
let pdf = !pdf in
|
2022-12-22 21:42:55 +01:00
|
|
|
let images = list_of_hashtbl images in
|
2023-04-28 15:31:42 +02:00
|
|
|
let image_resources = map (fun (_, (n, o)) -> (n, Pdf.Indirect o)) images in
|
|
|
|
let gss_resources = list_of_hashtbl gss in
|
2023-04-28 17:18:26 +02:00
|
|
|
let font_resources = list_of_hashtbl fonts in
|
|
|
|
match images, gss_resources, font_resources with [], [], [] -> pdf | _ ->
|
2022-12-22 21:42:55 +01:00
|
|
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
|
|
let pages =
|
|
|
|
map
|
|
|
|
(fun p ->
|
|
|
|
let new_resources =
|
2023-04-28 15:31:42 +02:00
|
|
|
let existing_xobjects =
|
|
|
|
match Pdf.lookup_direct pdf "/XObject" p.Pdfpage.resources with
|
|
|
|
| Some (Pdf.Dictionary d) -> d
|
|
|
|
| _ -> []
|
|
|
|
in
|
|
|
|
let existing_gss =
|
|
|
|
match Pdf.lookup_direct pdf "/ExtGState" p.Pdfpage.resources with
|
2022-12-22 21:42:55 +01:00
|
|
|
| Some (Pdf.Dictionary d) -> d
|
|
|
|
| _ -> []
|
2023-04-28 17:18:26 +02:00
|
|
|
in
|
|
|
|
let existing_fonts =
|
|
|
|
match Pdf.lookup_direct pdf "/Font" p.Pdfpage.resources with
|
|
|
|
| Some (Pdf.Dictionary d) -> d
|
|
|
|
| _ -> []
|
2022-12-22 21:42:55 +01:00
|
|
|
in
|
2023-04-28 15:31:42 +02:00
|
|
|
let new_xobjects = fold_right (fun (k, v) d -> add k v d) image_resources existing_xobjects in
|
|
|
|
let new_gss = fold_right (fun (k, v) d -> add k v d) gss_resources existing_gss in
|
2023-04-28 17:18:26 +02:00
|
|
|
let new_fonts = fold_right (fun (k, v) d -> add k v d) font_resources existing_fonts in
|
|
|
|
Pdf.add_dict_entry
|
|
|
|
(Pdf.add_dict_entry
|
|
|
|
(Pdf.add_dict_entry p.Pdfpage.resources "/XObject" (Pdf.Dictionary new_xobjects))
|
|
|
|
"/ExtGState"
|
|
|
|
(Pdf.Dictionary new_gss))
|
|
|
|
"/Font"
|
|
|
|
(Pdf.Dictionary new_fonts)
|
2022-12-22 21:42:55 +01:00
|
|
|
in
|
|
|
|
{p with resources = new_resources})
|
|
|
|
pages
|
|
|
|
in
|
|
|
|
Pdfpage.change_pages true pdf pages
|
2023-05-03 14:43:57 +02:00
|
|
|
|
|
|
|
let draw ~filename ~bates ~batespad fast range pdf drawops =
|
|
|
|
time := Cpdfstrftime.current_time ();
|
|
|
|
let pdf = ref pdf in
|
|
|
|
let range = ref range in
|
|
|
|
let chunks = ref (split_around (eq NewPage) drawops) in
|
|
|
|
while !chunks <> [] do
|
|
|
|
reset_state ();
|
|
|
|
pdf := draw_single ~filename ~bates ~batespad fast !range !pdf (hd !chunks);
|
|
|
|
chunks := tl !chunks;
|
|
|
|
if !chunks <> [] then begin
|
|
|
|
let endpage = Pdfpage.endpage !pdf in
|
|
|
|
pdf := Cpdfpad.padafter [endpage] !pdf;
|
|
|
|
range := [endpage + 1]
|
|
|
|
end
|
|
|
|
done;
|
|
|
|
!pdf
|