Scaffolding for struct trees in typesetter
This commit is contained in:
parent
a7d0fcad56
commit
c827f7b410
|
@ -4549,7 +4549,8 @@ let go () =
|
||||||
| Some (Typeset filename) ->
|
| Some (Typeset filename) ->
|
||||||
let text = Pdfio.bytes_of_input_channel (open_in_bin filename) in
|
let text = Pdfio.bytes_of_input_channel (open_in_bin filename) in
|
||||||
let cpdffont = embed_font () in
|
let cpdffont = embed_font () in
|
||||||
let pdf = Cpdftexttopdf.typeset ~subformat:args.subformat ~title:args.title ~font:cpdffont ~papersize:args.createpdf_pagesize ~fontsize:args.fontsize text in
|
let pdf = Cpdftexttopdf.typeset ~process_struct_tree:args.process_struct_trees
|
||||||
|
?subformat:args.subformat ?title:args.title ~font:cpdffont ~papersize:args.createpdf_pagesize ~fontsize:args.fontsize text in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| Some (TextWidth s) ->
|
| Some (TextWidth s) ->
|
||||||
let rawwidth =
|
let rawwidth =
|
||||||
|
|
35
cpdfdraw.ml
35
cpdfdraw.ml
|
@ -559,39 +559,6 @@ let save_whole_stack () =
|
||||||
let restore_whole_stack r =
|
let restore_whole_stack r =
|
||||||
resstack := r
|
resstack := r
|
||||||
|
|
||||||
(* Mark as an artifact anything not already marked. *)
|
|
||||||
let add_artifacts ops =
|
|
||||||
let content = ref false in
|
|
||||||
let artifact = ref false in
|
|
||||||
let rec loop a = function
|
|
||||||
| [] ->
|
|
||||||
(* The end. Must end artifact if in artifact. *)
|
|
||||||
if !artifact then rev (Pdfops.Op_EMC::a) else rev a
|
|
||||||
| Pdfops.Op_BMC "/BeginArtifact"::t ->
|
|
||||||
(* Convert back-channel artifact beginning. *)
|
|
||||||
set artifact;
|
|
||||||
loop (Pdfops.Op_BMC "/Artifact"::a) t
|
|
||||||
| Pdfops.Op_BMC "/EndArtifact"::t ->
|
|
||||||
(* Convert back-channel artifact ending. *)
|
|
||||||
clear artifact;
|
|
||||||
loop (Pdfops.Op_EMC::a) t
|
|
||||||
| Pdfops.Op_BDC _ as h::t ->
|
|
||||||
(* Entering content. If in artifact, must end artifact. *)
|
|
||||||
let a' = if !artifact then h::Pdfops.Op_EMC::a else h::a in
|
|
||||||
set content; clear artifact; loop a' t
|
|
||||||
| Pdfops.Op_EMC as h::t ->
|
|
||||||
(* Exiting content. *)
|
|
||||||
clear content;
|
|
||||||
loop (h::a) t
|
|
||||||
| h::t ->
|
|
||||||
(* A normal operation. If not in content or artifact must start artifact. *)
|
|
||||||
let a' =
|
|
||||||
if not (!content || !artifact) then (set artifact; h::Pdfops.Op_BMC "/Artifact"::a) else h::a
|
|
||||||
in
|
|
||||||
loop a' t
|
|
||||||
in
|
|
||||||
loop [] ops
|
|
||||||
|
|
||||||
(* When no automatic artifacting, we still need to fix our backchannel manual artifacts. *)
|
(* When no automatic artifacting, we still need to fix our backchannel manual artifacts. *)
|
||||||
let fixup_manual_artifacts =
|
let fixup_manual_artifacts =
|
||||||
map (function Pdfops.Op_BMC "/BeginArtifact" -> Pdfops.Op_BMC "/Artifact"
|
map (function Pdfops.Op_BMC "/BeginArtifact" -> Pdfops.Op_BMC "/Artifact"
|
||||||
|
@ -623,7 +590,7 @@ let draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range
|
||||||
map3
|
map3
|
||||||
(fun n p ops ->
|
(fun n p ops ->
|
||||||
if not (mem n range) then p else
|
if not (mem n range) then p else
|
||||||
let ops = if struct_tree && !do_add_artifacts then add_artifacts ops else fixup_manual_artifacts ops in
|
let ops = if struct_tree && !do_add_artifacts then Cpdftype.add_artifacts ops else fixup_manual_artifacts ops in
|
||||||
let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in
|
let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in
|
||||||
(if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page)
|
(if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page)
|
||||||
(ilist 1 endpage)
|
(ilist 1 endpage)
|
||||||
|
|
|
@ -43,7 +43,10 @@ let of_utf8_with_newlines fontpack fontsize t =
|
||||||
if c <> [] then process_codepoints c;
|
if c <> [] then process_codepoints c;
|
||||||
rev !items
|
rev !items
|
||||||
|
|
||||||
let typeset ?subformat ?title ~papersize ~font ~fontsize text =
|
let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize text =
|
||||||
|
let process_struct_tree =
|
||||||
|
process_struct_tree || subformat = Some Cpdfua.PDFUA1 || subformat = Some Cpdfua.PDFUA2
|
||||||
|
in
|
||||||
let pdf = Pdf.empty () in
|
let pdf = Pdf.empty () in
|
||||||
let codepoints = setify (Pdftext.codepoints_of_utf8 (Pdfio.string_of_bytes text)) in
|
let codepoints = setify (Pdftext.codepoints_of_utf8 (Pdfio.string_of_bytes text)) in
|
||||||
let fontpack =
|
let fontpack =
|
||||||
|
@ -61,6 +64,6 @@ let typeset ?subformat ?title ~papersize ~font ~fontsize text =
|
||||||
let firstfont = hd (keep (function Cpdftype.Font _ -> true | _ -> false) instrs) in
|
let firstfont = hd (keep (function Cpdftype.Font _ -> true | _ -> false) instrs) in
|
||||||
[firstfont; Cpdftype.BeginDocument] @ instrs
|
[firstfont; Cpdftype.BeginDocument] @ instrs
|
||||||
in
|
in
|
||||||
let pages = Cpdftype.typeset margin margin margin margin papersize pdf instrs in
|
let pages = Cpdftype.typeset ~process_struct_tree margin margin margin margin papersize pdf instrs in
|
||||||
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
|
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
|
||||||
Pdfpage.add_root pageroot [] pdf
|
Pdfpage.add_root pageroot [] pdf
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
(** Text to PDF *)
|
(** Text to PDF *)
|
||||||
|
|
||||||
(** Typeset a text file as a PDF. *)
|
(** Typeset a text file as a PDF. *)
|
||||||
val typeset : ?subformat:Cpdfua.subformat option -> ?title:string option -> papersize:Pdfpaper.t -> font:Cpdfembed.cpdffont -> fontsize:float -> Pdfio.bytes -> Pdf.t
|
val typeset : process_struct_tree:bool -> ?subformat:Cpdfua.subformat -> ?title:string -> papersize:Pdfpaper.t -> font:Cpdfembed.cpdffont -> fontsize:float -> Pdfio.bytes -> Pdf.t
|
||||||
|
|
|
@ -175,7 +175,7 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf =
|
||||||
let firstfont =
|
let firstfont =
|
||||||
hd (keep (function Cpdftype.Font _ -> true | _ -> false) (title @ flatten lines))
|
hd (keep (function Cpdftype.Font _ -> true | _ -> false) (title @ flatten lines))
|
||||||
in
|
in
|
||||||
Cpdftype.typeset lm rm tm bm firstpage_papersize pdf
|
Cpdftype.typeset ~process_struct_tree:false lm rm tm bm firstpage_papersize pdf
|
||||||
([firstfont; Cpdftype.BeginDocument] @ title @ flatten lines)
|
([firstfont; Cpdftype.BeginDocument] @ title @ flatten lines)
|
||||||
in
|
in
|
||||||
let toc_pages =
|
let toc_pages =
|
||||||
|
|
41
cpdftype.ml
41
cpdftype.ml
|
@ -18,6 +18,8 @@ type element =
|
||||||
| BeginDest of Pdfdest.t
|
| BeginDest of Pdfdest.t
|
||||||
| EndDest
|
| EndDest
|
||||||
| BeginDocument
|
| BeginDocument
|
||||||
|
| Tag of string
|
||||||
|
| EndTag
|
||||||
|
|
||||||
let to_string_elt = function
|
let to_string_elt = function
|
||||||
| Text t -> implode t
|
| Text t -> implode t
|
||||||
|
@ -29,6 +31,8 @@ let to_string_elt = function
|
||||||
| BeginDest _ -> "BeginDest"
|
| BeginDest _ -> "BeginDest"
|
||||||
| EndDest -> "EndDest"
|
| EndDest -> "EndDest"
|
||||||
| BeginDocument -> "BeginDocument"
|
| BeginDocument -> "BeginDocument"
|
||||||
|
| Tag s -> "Tag " ^ s
|
||||||
|
| EndTag -> "EndTag"
|
||||||
|
|
||||||
let to_string es = fold_left (fun a b -> a ^ "\n" ^ b) "" (map to_string_elt es)
|
let to_string es = fold_left (fun a b -> a ^ "\n" ^ b) "" (map to_string_elt es)
|
||||||
|
|
||||||
|
@ -54,6 +58,39 @@ let initial_state () =
|
||||||
ypos = 0.;
|
ypos = 0.;
|
||||||
dest = None}
|
dest = None}
|
||||||
|
|
||||||
|
(* Mark as an artifact anything not already marked. *)
|
||||||
|
let add_artifacts ops =
|
||||||
|
let content = ref false in
|
||||||
|
let artifact = ref false in
|
||||||
|
let rec loop a = function
|
||||||
|
| [] ->
|
||||||
|
(* The end. Must end artifact if in artifact. *)
|
||||||
|
if !artifact then rev (Pdfops.Op_EMC::a) else rev a
|
||||||
|
| Pdfops.Op_BMC "/BeginArtifact"::t ->
|
||||||
|
(* Convert back-channel artifact beginning. *)
|
||||||
|
set artifact;
|
||||||
|
loop (Pdfops.Op_BMC "/Artifact"::a) t
|
||||||
|
| Pdfops.Op_BMC "/EndArtifact"::t ->
|
||||||
|
(* Convert back-channel artifact ending. *)
|
||||||
|
clear artifact;
|
||||||
|
loop (Pdfops.Op_EMC::a) t
|
||||||
|
| Pdfops.Op_BDC _ as h::t ->
|
||||||
|
(* Entering content. If in artifact, must end artifact. *)
|
||||||
|
let a' = if !artifact then h::Pdfops.Op_EMC::a else h::a in
|
||||||
|
set content; clear artifact; loop a' t
|
||||||
|
| Pdfops.Op_EMC as h::t ->
|
||||||
|
(* Exiting content. *)
|
||||||
|
clear content;
|
||||||
|
loop (h::a) t
|
||||||
|
| h::t ->
|
||||||
|
(* A normal operation. If not in content or artifact must start artifact. *)
|
||||||
|
let a' =
|
||||||
|
if not (!content || !artifact) then (set artifact; h::Pdfops.Op_BMC "/Artifact"::a) else h::a
|
||||||
|
in
|
||||||
|
loop a' t
|
||||||
|
in
|
||||||
|
loop [] ops
|
||||||
|
|
||||||
let font_widths id f fontsize =
|
let font_widths id f fontsize =
|
||||||
match Hashtbl.find width_table_cache (id, fontsize) with
|
match Hashtbl.find width_table_cache (id, fontsize) with
|
||||||
| x -> x
|
| x -> x
|
||||||
|
@ -189,7 +226,7 @@ let make_annotations pdf annots =
|
||||||
(* At this stage, just Font and Text and HGlue 0. and VGlue 0. and Newline and
|
(* At this stage, just Font and Text and HGlue 0. and VGlue 0. and Newline and
|
||||||
NewPage elements. Split on NewPages, typeset each page, add font
|
NewPage elements. Split on NewPages, typeset each page, add font
|
||||||
dictionaries. New page only creates a page when that page has content. *)
|
dictionaries. New page only creates a page when that page has content. *)
|
||||||
let typeset lmargin rmargin tmargin bmargin papersize pdf i =
|
let typeset ~process_struct_tree lmargin rmargin tmargin bmargin papersize pdf i =
|
||||||
Hashtbl.clear width_table_cache;
|
Hashtbl.clear width_table_cache;
|
||||||
let debug = false in
|
let debug = false in
|
||||||
if debug then (print_endline "***input:\n\n"; print_endline (to_string i));
|
if debug then (print_endline "***input:\n\n"; print_endline (to_string i));
|
||||||
|
@ -286,3 +323,5 @@ let typeset lmargin rmargin tmargin bmargin papersize pdf i =
|
||||||
iter typeset_element i;
|
iter typeset_element i;
|
||||||
write_page ();
|
write_page ();
|
||||||
rev !pages
|
rev !pages
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,8 @@ type element =
|
||||||
| BeginDest of Pdfdest.t
|
| BeginDest of Pdfdest.t
|
||||||
| EndDest
|
| EndDest
|
||||||
| BeginDocument
|
| BeginDocument
|
||||||
|
| Tag of string
|
||||||
|
| EndTag
|
||||||
|
|
||||||
type t = element list
|
type t = element list
|
||||||
|
|
||||||
|
@ -22,4 +24,6 @@ val font_widths : string -> Pdftext.font -> float -> float array
|
||||||
val width_of_string : float array -> char list -> float
|
val width_of_string : float array -> char list -> float
|
||||||
|
|
||||||
(** [typeset lmargin rmargin tmargin bmargin papersize pdf contents] builds a list of pages of typset content. *)
|
(** [typeset lmargin rmargin tmargin bmargin papersize pdf contents] builds a list of pages of typset content. *)
|
||||||
val typeset : float -> float -> float -> float -> Pdfpaper.t -> Pdf.t -> t -> Pdfpage.t list
|
val typeset : process_struct_tree:bool -> float -> float -> float -> float -> Pdfpaper.t -> Pdf.t -> t -> Pdfpage.t list
|
||||||
|
|
||||||
|
val add_artifacts : Pdfops.t list -> Pdfops.t list
|
||||||
|
|
Loading…
Reference in New Issue