Rework autotagging
This commit is contained in:
parent
505969d6cb
commit
d0ef176c68
19
cpdfdraw.ml
19
cpdfdraw.ml
|
@ -69,8 +69,9 @@ type drawops =
|
||||||
| Namespace of string
|
| Namespace of string
|
||||||
| EltInfo of string * Pdf.pdfobject
|
| EltInfo of string * Pdf.pdfobject
|
||||||
| EndEltInfo of string
|
| EndEltInfo of string
|
||||||
|
| AutoTag of bool
|
||||||
|
|
||||||
(*let rec string_of_drawop = function
|
let rec string_of_drawop = function
|
||||||
| Qq o -> "Qq (" ^ string_of_drawops o ^ ")"
|
| Qq o -> "Qq (" ^ string_of_drawops o ^ ")"
|
||||||
| FormXObject (_, _, _, _, _, o) -> "FormXObject (" ^ string_of_drawops o ^ ")"
|
| FormXObject (_, _, _, _, _, o) -> "FormXObject (" ^ string_of_drawops o ^ ")"
|
||||||
| TextSection o -> "TextSection (" ^ string_of_drawops o ^ ")"
|
| TextSection o -> "TextSection (" ^ string_of_drawops o ^ ")"
|
||||||
|
@ -89,9 +90,14 @@ type drawops =
|
||||||
| Newline -> "Newline" | Leading _ -> "Leading" | CharSpace _ -> "CharSpace"
|
| Newline -> "Newline" | Leading _ -> "Leading" | CharSpace _ -> "CharSpace"
|
||||||
| WordSpace _ -> "WordSpace" | TextScale _ -> "TextScale"
|
| WordSpace _ -> "WordSpace" | TextScale _ -> "TextScale"
|
||||||
| RenderMode _ -> "RenderMode" | Rise _ -> "Rise"
|
| RenderMode _ -> "RenderMode" | Rise _ -> "Rise"
|
||||||
|
| EndTag -> "EndTag" | Tag s -> "Tag " ^ s | EndSTag -> "EndSTag" | STag s -> "Tag " ^ s
|
||||||
|
| BeginArtifact -> "BeginArtifact" | EndArtifact -> "EndArtifact"
|
||||||
|
| Para (_, _, _, _) -> "Para" | Namespace s -> "Namespace " ^ s
|
||||||
|
| EltInfo (_, _) -> "EltInfo" | EndEltInfo _ -> "EndEltInfo"
|
||||||
|
| AutoTag _ -> "AutoTag"
|
||||||
|
|
||||||
and string_of_drawops l =
|
and string_of_drawops l =
|
||||||
fold_left (fun x y -> x ^ " " ^ y) "" (map string_of_drawop l)*)
|
fold_left (fun x y -> x ^ " " ^ y) "" (map string_of_drawop l)
|
||||||
|
|
||||||
(* Per page / xobject resources *)
|
(* Per page / xobject resources *)
|
||||||
type res =
|
type res =
|
||||||
|
@ -437,7 +443,7 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num
|
||||||
[]
|
[]
|
||||||
| TextSection ops ->
|
| TextSection ops ->
|
||||||
let m = mcid () in
|
let m = mcid () in
|
||||||
if not dryrun then structdata := StDataMCID ("/P", m)::!structdata;
|
if not dryrun && !do_auto_tag then structdata := StDataMCID ("/P", m)::!structdata;
|
||||||
(if struct_tree && !do_auto_tag then [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else [])
|
(if struct_tree && !do_auto_tag then [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else [])
|
||||||
@ [Pdfops.Op_BT]
|
@ [Pdfops.Op_BT]
|
||||||
@ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops
|
@ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops
|
||||||
|
@ -487,6 +493,9 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num
|
||||||
| EndEltInfo s ->
|
| EndEltInfo s ->
|
||||||
if not dryrun then structdata =| StEndEltInfo s;
|
if not dryrun then structdata =| StEndEltInfo s;
|
||||||
[]
|
[]
|
||||||
|
| AutoTag b ->
|
||||||
|
do_auto_tag := b;
|
||||||
|
[]
|
||||||
|
|
||||||
and ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page drawops =
|
and ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page drawops =
|
||||||
flatten (map (ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page) drawops)
|
flatten (map (ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page) drawops)
|
||||||
|
@ -653,12 +662,12 @@ let rec find_tree_contents a level = function
|
||||||
if level = 1 then (rev a, t) else find_tree_contents a (level - 1) t
|
if level = 1 then (rev a, t) else find_tree_contents a (level - 1) t
|
||||||
| h::t -> find_tree_contents (h::a) level t
|
| h::t -> find_tree_contents (h::a) level t
|
||||||
|
|
||||||
let mstdebug = ref false
|
let mstdebug = ref true
|
||||||
|
|
||||||
let rec make_structure_tree pageobjnums (pn, ns, ei) pdf = function
|
let rec make_structure_tree pageobjnums (pn, ns, ei) pdf = function
|
||||||
| [] -> []
|
| [] -> []
|
||||||
| StDataMCID (n, mcid)::t ->
|
| StDataMCID (n, mcid)::t ->
|
||||||
if !mstdebug then Printf.printf "StDataMCID, pagenum = %i, pageobjnum = %i\n" !pn (unopt (lookup !pn pageobjnums));
|
if !mstdebug then Printf.printf "StDataMCID, type = %s pagenum = %i, pageobjnum = %i\n" n !pn (unopt (lookup !pn pageobjnums));
|
||||||
let item =
|
let item =
|
||||||
StItem {kind = n; namespace = !ns; alt = list_of_hashtbl ei; pageobjnum = lookup !pn pageobjnums; children = [StMCID mcid]}
|
StItem {kind = n; namespace = !ns; alt = list_of_hashtbl ei; pageobjnum = lookup !pn pageobjnums; children = [StMCID mcid]}
|
||||||
in
|
in
|
||||||
|
|
|
@ -62,11 +62,10 @@ type drawops =
|
||||||
| Namespace of string
|
| Namespace of string
|
||||||
| EltInfo of string * Pdf.pdfobject
|
| EltInfo of string * Pdf.pdfobject
|
||||||
| EndEltInfo of string
|
| EndEltInfo of string
|
||||||
|
| AutoTag of bool
|
||||||
|
|
||||||
val do_add_artifacts : bool ref
|
val do_add_artifacts : bool ref
|
||||||
|
|
||||||
val do_auto_tag : bool ref
|
|
||||||
|
|
||||||
val rolemap : string ref
|
val rolemap : string ref
|
||||||
|
|
||||||
(** Calling [draw fast underneath filename bates batespad range pdf drawops] draws on
|
(** Calling [draw fast underneath filename bates batespad range pdf drawops] draws on
|
||||||
|
|
|
@ -109,7 +109,7 @@ let endstag () =
|
||||||
addop Cpdfdraw.EndSTag
|
addop Cpdfdraw.EndSTag
|
||||||
|
|
||||||
let autotags b =
|
let autotags b =
|
||||||
Cpdfdraw.do_auto_tag := b
|
addop (Cpdfdraw.AutoTag b)
|
||||||
|
|
||||||
let autoartifacts b =
|
let autoartifacts b =
|
||||||
Cpdfdraw.do_add_artifacts := b
|
Cpdfdraw.do_add_artifacts := b
|
||||||
|
|
Loading…
Reference in New Issue