open Pdfutil type colspec = NoCol | RGB of float * float * float | Grey of float | CYMK of float * float * float * float type drawops = | Rect of float * float * float * float | Bezier of float * float * float * float * float * float | To of float * float | Line of float * float | ClosePath | SetFill of colspec | SetStroke of colspec | SetLineThickness of float | SetLineCap of int | SetLineJoin of int | SetMiterLimit of float | SetDashPattern of float list * float | Matrix of Pdftransform.transform_matrix | Push | Pop | Fill | FillEvenOdd | Stroke | FillStroke | FillStrokeEvenOdd | Clip | ClipEvenOdd | FormXObject of float * float * float * float * string * drawops list | Use of string | ImageXObject of string * Pdf.pdfobject | Image of string | NewPage | Opacity of float | SOpacity of float | Font of Pdftext.standard_font * float | BT | ET | Text of string | SpecialText of string | Newline | Leading of float | CharSpace of float | WordSpace of float | TextScale of float | RenderMode of int | Rise of float | URL of string | EndURL (* Per page resources *) type res = {images : (string, (string * int)) Hashtbl.t; (* (name, (pdf name, objnum)) *) extgstates : ((string * float), string) Hashtbl.t; (* (kind, value), name *) fonts : (Pdftext.font, (string * int)) Hashtbl.t; (* (font, (objnum, pdf name)) *) form_xobjects : (string, (string * int)) Hashtbl.t; (* (name, (pdf name, objnum)) *) mutable page_names : string list; mutable time : Cpdfstrftime.t; mutable current_url : string option; mutable current_font : Pdftext.font; mutable num : int} let empty_res = {images = null_hash (); extgstates = null_hash (); fonts = null_hash (); form_xobjects = null_hash (); page_names = []; time = Cpdfstrftime.dummy; current_url = None; current_font = Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding); num = 0} let resstack = ref [empty_res] let res () = hd !resstack let rescopy r = {r with images = Hashtbl.copy r.images; fonts = Hashtbl.copy r.fonts; extgstates = Hashtbl.copy r.extgstates; form_xobjects = Hashtbl.copy r.form_xobjects} let respush () = resstack := (rescopy (res ()))::!resstack let respop () = let n = (hd !resstack).num in resstack := tl !resstack; (* not necessary, since names are isolated in the xobject, but it makes manual debugging of PDF files easier if we don't re-use numbers *) (res ()).num <- max n (res ()).num let fresh_name s = (res ()).num <- (res ()).num + 1; s ^ string_of_int (res ()).num (* At end of page, we keep things for which we have indirects - but ExtGStates aren't indirect, so they go. *) let reset_state () = Hashtbl.clear (res ()).extgstates; (res ()).page_names <- [] let process_specials pdf endpage filename bates batespad num page s = let pairs = Cpdfaddtext.replace_pairs pdf endpage None filename bates batespad num page in Cpdfaddtext.process_text (res ()).time s pairs let charcodes_of_utf8 s = implode (map char_of_int (option_map (Pdftext.charcode_extractor_of_font_real (res ()).current_font) (Pdftext.codepoints_of_utf8 s))) let extgstate kind v = try Hashtbl.find (res ()).extgstates (kind, v) with Not_found -> let n = fresh_name "/G" in Hashtbl.add (res ()).extgstates (kind, v) n; n let read_resource pdf n res = match Pdf.lookup_direct pdf n res with | Some (Pdf.Dictionary d) -> d | _ -> [] let update_resources pdf old_resources = let gss_resources = map (fun ((kind, v), n) -> (kind, Pdf.Real v)) (list_of_hashtbl (res ()).extgstates) in let select_resources t = option_map (fun (_, (n, o)) -> if mem n (res ()).page_names then Some (n, Pdf.Indirect o) else None) (list_of_hashtbl t) in let update = fold_right (fun (k, v) d -> add k v d) in let new_gss = update gss_resources (read_resource pdf "/ExtGState" old_resources) in let new_xobjects = update (select_resources (res ()).form_xobjects @ select_resources (res ()).images) (read_resource pdf "/XObject" old_resources) in let new_fonts = update (select_resources (res ()).fonts) (read_resource pdf "/Font" old_resources) in let add_if_non_empty dict name newdict = if newdict = Pdf.Dictionary [] then dict else Pdf.add_dict_entry dict name newdict in add_if_non_empty (add_if_non_empty (add_if_non_empty old_resources "/XObject" (Pdf.Dictionary new_xobjects)) "/ExtGState" (Pdf.Dictionary new_gss)) "/Font" (Pdf.Dictionary new_fonts) let rec ops_of_drawop pdf endpage filename bates batespad num page = function | Push -> [Pdfops.Op_q] | Pop -> [Pdfops.Op_Q] | Matrix m -> [Pdfops.Op_cm m] | Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)] | Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)] | To (x, y) -> [Pdfops.Op_m (x, y)] | Line (x, y) -> [Pdfops.Op_l (x, y)] | SetFill x -> begin match x with | RGB (r, g, b) -> [Op_rg (r, g, b)] | Grey g -> [Op_g g] | CYMK (c, y, m, k) -> [Op_k (c, y, m, k)] | NoCol -> [] end | SetStroke x -> begin match x with | RGB (r, g, b) -> [Op_RG (r, g, b)] | Grey g -> [Op_G g] | CYMK (c, y, m, k) -> [Op_K (c, y, m, k)] | NoCol -> [] end | ClosePath | Fill -> [Pdfops.Op_f] | FillEvenOdd -> [Pdfops.Op_f'] | Stroke -> [Pdfops.Op_S] | FillStroke -> [Pdfops.Op_B] | FillStrokeEvenOdd -> [Pdfops.Op_B'] | Clip -> [Pdfops.Op_W; Pdfops.Op_n] | ClipEvenOdd -> [Pdfops.Op_W'] | SetLineThickness t -> [Pdfops.Op_w t; Pdfops.Op_n] | SetLineCap c -> [Pdfops.Op_J c] | SetLineJoin j -> [Pdfops.Op_j j] | SetMiterLimit m -> [Pdfops.Op_M m] | SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)] | FormXObject (a, b, c, d, n, ops) -> create_form_xobject a b c d pdf endpage filename bates batespad num page n ops; [] | Use n -> let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> Cpdferror.error ("Form XObject not found: " ^ n) in (res ()).page_names <- pdfname::(res ()).page_names; [Pdfops.Op_Do pdfname] | Image s -> let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> Cpdferror.error ("Image not found: " ^ s) in (res ()).page_names <- pdfname::(res ()).page_names; [Pdfops.Op_Do pdfname] | ImageXObject (s, obj) -> Hashtbl.add (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj); [] | NewPage -> Pdfe.log ("NewPage remaining in graphic stream"); assert false | Opacity v -> [Pdfops.Op_gs (extgstate "/ca" v)] | SOpacity v -> [Pdfops.Op_gs (extgstate "/CA" v)] | URL s -> (res ()).current_url <- Some s; [] | EndURL -> (res ()).current_url <- None; [] | Font (s, f) -> let font = Pdftext.StandardFont (s, Pdftext.WinAnsiEncoding) in let (n, _) = try Hashtbl.find (res ()).fonts font with Not_found -> let o = Pdftext.write_font pdf font in let n = fresh_name "/F" in Hashtbl.add (res ()).fonts font (n, o); (n, o) in (res ()).current_font <- font; (res ()).page_names <- n::(res ()).page_names; [Pdfops.Op_Tf (n, f)] | BT -> [Pdfops.Op_BT] | ET -> [Pdfops.Op_ET] | Text s -> [Pdfops.Op_Tj (charcodes_of_utf8 s)] | SpecialText s -> let s = process_specials pdf endpage filename bates batespad num page s in [Pdfops.Op_Tj (charcodes_of_utf8 s)] | Leading f -> [Pdfops.Op_TL f] | CharSpace f -> [Pdfops.Op_Tc f] | WordSpace f -> [Pdfops.Op_Tw f] | TextScale f -> [Pdfops.Op_Tz f] | RenderMode i -> [Pdfops.Op_Tr i] | Rise f -> [Pdfops.Op_Ts f] | Newline -> [Pdfops.Op_T'] and ops_of_drawops pdf endpage filename bates batespad num page drawops = flatten (map (ops_of_drawop pdf endpage filename bates batespad num page) drawops) and create_form_xobject a b c d pdf endpage filename bates batespad num page n ops = respush (); reset_state (); let data = Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad num page ops)) in let obj = Pdf.Stream {contents = (Pdf.Dictionary [("/Length", Pdf.Integer (Pdfio.bytes_size data)); ("/Subtype", Pdf.Name "/Form"); ("/Resources", update_resources pdf (Pdf.Dictionary [])); ("/BBox", Pdf.Array [Pdf.Real a; Pdf.Real b; Pdf.Real c; Pdf.Real d]) ], Pdf.Got data)} in respop (); Hashtbl.add (res ()).form_xobjects n (fresh_name "/X", (Pdf.addobj pdf obj)) let minimum_resource_number pdf range = let pages = Pdfpage.pages_of_pagetree pdf in let pages_in_range = option_map2 (fun p n -> if mem n range then Some p else None) pages (indx pages) in let number_of_name s = match implode (rev (takewhile (function '0'..'9' -> true | _ -> false) (rev (explode s)))) with | "" -> None | s -> Some (int_of_string s) in let resource_names_page p = let names n = match Pdf.lookup_direct pdf n p.Pdfpage.resources with | Some (Pdf.Dictionary d) -> map fst d | _ -> [] in names "/XObject" @ names "/ExtGState" @ names "/Font" in match sort (fun a b -> compare b a) (option_map number_of_name (flatten (map resource_names_page pages_in_range))) with | [] -> 0 | n::_ -> n + 1 let contains_specials drawops = List.exists (function SpecialText _ -> true | _ -> false) drawops let draw_single ~filename ~bates ~batespad fast range pdf drawops = (res ()).num <- max (res ()).num (minimum_resource_number pdf range); let endpage = Pdfpage.endpage pdf in let pages = Pdfpage.pages_of_pagetree pdf in let str = if contains_specials drawops then None else Some (Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad 0 (hd pages) drawops)) in let ss = map2 (fun n p -> if mem n range then (match str with Some x -> x | None -> Pdfops.string_of_ops (ops_of_drawops pdf endpage filename bates batespad n p drawops)) else "") (ilist 1 endpage) pages in let pdf = ref pdf in iter2 (fun n s -> if mem n range then pdf := Cpdftweak.append_page_content s false fast [n] !pdf) (ilist 1 endpage) ss; let pdf = !pdf in let pages = map2 (fun n p -> if not (mem n range) then p else {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources}) (ilist 1 endpage) (Pdfpage.pages_of_pagetree pdf) in Pdfpage.change_pages true pdf pages let draw ~filename ~bates ~batespad fast range pdf drawops = (res ()).time <- Cpdfstrftime.current_time (); let pdf = ref pdf in let range = ref range in let chunks = ref (split_around (eq NewPage) drawops) in while !chunks <> [] do reset_state (); pdf := draw_single ~filename ~bates ~batespad fast !range !pdf (hd !chunks); chunks := tl !chunks; if !chunks <> [] then begin let endpage = Pdfpage.endpage !pdf in pdf := Cpdfpad.padafter [endpage] !pdf; range := [endpage + 1] end done; !pdf