mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2024-12-22 15:05:46 +01:00
Scaffolding for struct trees in typesetter
This commit is contained in:
parent
a7d0fcad56
commit
c827f7b410
@ -4549,7 +4549,8 @@ let go () =
|
||||
| Some (Typeset filename) ->
|
||||
let text = Pdfio.bytes_of_input_channel (open_in_bin filename) in
|
||||
let cpdffont = embed_font () in
|
||||
let pdf = Cpdftexttopdf.typeset ~subformat:args.subformat ~title:args.title ~font:cpdffont ~papersize:args.createpdf_pagesize ~fontsize:args.fontsize text in
|
||||
let pdf = Cpdftexttopdf.typeset ~process_struct_tree:args.process_struct_trees
|
||||
?subformat:args.subformat ?title:args.title ~font:cpdffont ~papersize:args.createpdf_pagesize ~fontsize:args.fontsize text in
|
||||
write_pdf false pdf
|
||||
| Some (TextWidth s) ->
|
||||
let rawwidth =
|
||||
|
35
cpdfdraw.ml
35
cpdfdraw.ml
@ -559,39 +559,6 @@ let save_whole_stack () =
|
||||
let restore_whole_stack r =
|
||||
resstack := r
|
||||
|
||||
(* Mark as an artifact anything not already marked. *)
|
||||
let add_artifacts ops =
|
||||
let content = ref false in
|
||||
let artifact = ref false in
|
||||
let rec loop a = function
|
||||
| [] ->
|
||||
(* The end. Must end artifact if in artifact. *)
|
||||
if !artifact then rev (Pdfops.Op_EMC::a) else rev a
|
||||
| Pdfops.Op_BMC "/BeginArtifact"::t ->
|
||||
(* Convert back-channel artifact beginning. *)
|
||||
set artifact;
|
||||
loop (Pdfops.Op_BMC "/Artifact"::a) t
|
||||
| Pdfops.Op_BMC "/EndArtifact"::t ->
|
||||
(* Convert back-channel artifact ending. *)
|
||||
clear artifact;
|
||||
loop (Pdfops.Op_EMC::a) t
|
||||
| Pdfops.Op_BDC _ as h::t ->
|
||||
(* Entering content. If in artifact, must end artifact. *)
|
||||
let a' = if !artifact then h::Pdfops.Op_EMC::a else h::a in
|
||||
set content; clear artifact; loop a' t
|
||||
| Pdfops.Op_EMC as h::t ->
|
||||
(* Exiting content. *)
|
||||
clear content;
|
||||
loop (h::a) t
|
||||
| h::t ->
|
||||
(* A normal operation. If not in content or artifact must start artifact. *)
|
||||
let a' =
|
||||
if not (!content || !artifact) then (set artifact; h::Pdfops.Op_BMC "/Artifact"::a) else h::a
|
||||
in
|
||||
loop a' t
|
||||
in
|
||||
loop [] ops
|
||||
|
||||
(* When no automatic artifacting, we still need to fix our backchannel manual artifacts. *)
|
||||
let fixup_manual_artifacts =
|
||||
map (function Pdfops.Op_BMC "/BeginArtifact" -> Pdfops.Op_BMC "/Artifact"
|
||||
@ -623,7 +590,7 @@ let draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range
|
||||
map3
|
||||
(fun n p ops ->
|
||||
if not (mem n range) then p else
|
||||
let ops = if struct_tree && !do_add_artifacts then add_artifacts ops else fixup_manual_artifacts ops in
|
||||
let ops = if struct_tree && !do_add_artifacts then Cpdftype.add_artifacts ops else fixup_manual_artifacts ops in
|
||||
let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in
|
||||
(if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page)
|
||||
(ilist 1 endpage)
|
||||
|
@ -43,7 +43,10 @@ let of_utf8_with_newlines fontpack fontsize t =
|
||||
if c <> [] then process_codepoints c;
|
||||
rev !items
|
||||
|
||||
let typeset ?subformat ?title ~papersize ~font ~fontsize text =
|
||||
let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize text =
|
||||
let process_struct_tree =
|
||||
process_struct_tree || subformat = Some Cpdfua.PDFUA1 || subformat = Some Cpdfua.PDFUA2
|
||||
in
|
||||
let pdf = Pdf.empty () in
|
||||
let codepoints = setify (Pdftext.codepoints_of_utf8 (Pdfio.string_of_bytes text)) in
|
||||
let fontpack =
|
||||
@ -61,6 +64,6 @@ let typeset ?subformat ?title ~papersize ~font ~fontsize text =
|
||||
let firstfont = hd (keep (function Cpdftype.Font _ -> true | _ -> false) instrs) in
|
||||
[firstfont; Cpdftype.BeginDocument] @ instrs
|
||||
in
|
||||
let pages = Cpdftype.typeset margin margin margin margin papersize pdf instrs in
|
||||
let pages = Cpdftype.typeset ~process_struct_tree margin margin margin margin papersize pdf instrs in
|
||||
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
|
||||
Pdfpage.add_root pageroot [] pdf
|
||||
|
@ -1,4 +1,4 @@
|
||||
(** Text to PDF *)
|
||||
|
||||
(** Typeset a text file as a PDF. *)
|
||||
val typeset : ?subformat:Cpdfua.subformat option -> ?title:string option -> papersize:Pdfpaper.t -> font:Cpdfembed.cpdffont -> fontsize:float -> Pdfio.bytes -> Pdf.t
|
||||
val typeset : process_struct_tree:bool -> ?subformat:Cpdfua.subformat -> ?title:string -> papersize:Pdfpaper.t -> font:Cpdfembed.cpdffont -> fontsize:float -> Pdfio.bytes -> Pdf.t
|
||||
|
@ -175,7 +175,7 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf =
|
||||
let firstfont =
|
||||
hd (keep (function Cpdftype.Font _ -> true | _ -> false) (title @ flatten lines))
|
||||
in
|
||||
Cpdftype.typeset lm rm tm bm firstpage_papersize pdf
|
||||
Cpdftype.typeset ~process_struct_tree:false lm rm tm bm firstpage_papersize pdf
|
||||
([firstfont; Cpdftype.BeginDocument] @ title @ flatten lines)
|
||||
in
|
||||
let toc_pages =
|
||||
|
41
cpdftype.ml
41
cpdftype.ml
@ -18,6 +18,8 @@ type element =
|
||||
| BeginDest of Pdfdest.t
|
||||
| EndDest
|
||||
| BeginDocument
|
||||
| Tag of string
|
||||
| EndTag
|
||||
|
||||
let to_string_elt = function
|
||||
| Text t -> implode t
|
||||
@ -29,6 +31,8 @@ let to_string_elt = function
|
||||
| BeginDest _ -> "BeginDest"
|
||||
| EndDest -> "EndDest"
|
||||
| BeginDocument -> "BeginDocument"
|
||||
| Tag s -> "Tag " ^ s
|
||||
| EndTag -> "EndTag"
|
||||
|
||||
let to_string es = fold_left (fun a b -> a ^ "\n" ^ b) "" (map to_string_elt es)
|
||||
|
||||
@ -54,6 +58,39 @@ let initial_state () =
|
||||
ypos = 0.;
|
||||
dest = None}
|
||||
|
||||
(* Mark as an artifact anything not already marked. *)
|
||||
let add_artifacts ops =
|
||||
let content = ref false in
|
||||
let artifact = ref false in
|
||||
let rec loop a = function
|
||||
| [] ->
|
||||
(* The end. Must end artifact if in artifact. *)
|
||||
if !artifact then rev (Pdfops.Op_EMC::a) else rev a
|
||||
| Pdfops.Op_BMC "/BeginArtifact"::t ->
|
||||
(* Convert back-channel artifact beginning. *)
|
||||
set artifact;
|
||||
loop (Pdfops.Op_BMC "/Artifact"::a) t
|
||||
| Pdfops.Op_BMC "/EndArtifact"::t ->
|
||||
(* Convert back-channel artifact ending. *)
|
||||
clear artifact;
|
||||
loop (Pdfops.Op_EMC::a) t
|
||||
| Pdfops.Op_BDC _ as h::t ->
|
||||
(* Entering content. If in artifact, must end artifact. *)
|
||||
let a' = if !artifact then h::Pdfops.Op_EMC::a else h::a in
|
||||
set content; clear artifact; loop a' t
|
||||
| Pdfops.Op_EMC as h::t ->
|
||||
(* Exiting content. *)
|
||||
clear content;
|
||||
loop (h::a) t
|
||||
| h::t ->
|
||||
(* A normal operation. If not in content or artifact must start artifact. *)
|
||||
let a' =
|
||||
if not (!content || !artifact) then (set artifact; h::Pdfops.Op_BMC "/Artifact"::a) else h::a
|
||||
in
|
||||
loop a' t
|
||||
in
|
||||
loop [] ops
|
||||
|
||||
let font_widths id f fontsize =
|
||||
match Hashtbl.find width_table_cache (id, fontsize) with
|
||||
| x -> x
|
||||
@ -189,7 +226,7 @@ let make_annotations pdf annots =
|
||||
(* At this stage, just Font and Text and HGlue 0. and VGlue 0. and Newline and
|
||||
NewPage elements. Split on NewPages, typeset each page, add font
|
||||
dictionaries. New page only creates a page when that page has content. *)
|
||||
let typeset lmargin rmargin tmargin bmargin papersize pdf i =
|
||||
let typeset ~process_struct_tree lmargin rmargin tmargin bmargin papersize pdf i =
|
||||
Hashtbl.clear width_table_cache;
|
||||
let debug = false in
|
||||
if debug then (print_endline "***input:\n\n"; print_endline (to_string i));
|
||||
@ -286,3 +323,5 @@ let typeset lmargin rmargin tmargin bmargin papersize pdf i =
|
||||
iter typeset_element i;
|
||||
write_page ();
|
||||
rev !pages
|
||||
|
||||
|
||||
|
@ -9,6 +9,8 @@ type element =
|
||||
| BeginDest of Pdfdest.t
|
||||
| EndDest
|
||||
| BeginDocument
|
||||
| Tag of string
|
||||
| EndTag
|
||||
|
||||
type t = element list
|
||||
|
||||
@ -22,4 +24,6 @@ val font_widths : string -> Pdftext.font -> float -> float array
|
||||
val width_of_string : float array -> char list -> float
|
||||
|
||||
(** [typeset lmargin rmargin tmargin bmargin papersize pdf contents] builds a list of pages of typset content. *)
|
||||
val typeset : float -> float -> float -> float -> Pdfpaper.t -> Pdf.t -> t -> Pdfpage.t list
|
||||
val typeset : process_struct_tree:bool -> float -> float -> float -> float -> Pdfpaper.t -> Pdf.t -> t -> Pdfpage.t list
|
||||
|
||||
val add_artifacts : Pdfops.t list -> Pdfops.t list
|
||||
|
Loading…
Reference in New Issue
Block a user