Apply rolemapping when reading structure tree
This commit is contained in:
parent
52b4d0d189
commit
dcb4dfdadd
35
cpdfua.ml
35
cpdfua.ml
|
@ -1,6 +1,11 @@
|
|||
open Pdfutil
|
||||
open Cpdferror
|
||||
|
||||
(* Implements all Matterhorn checks except for:
|
||||
|
||||
a) Those which require looking deep inside font files; and
|
||||
b) Those which require reading inside the graphics stream. *)
|
||||
|
||||
exception MatterhornError of Cpdfyojson.Safe.t
|
||||
|
||||
exception MatterhornUnimplemented
|
||||
|
@ -32,7 +37,7 @@ let rec read_st_inner pdf stnode =
|
|||
and read_st_inner_array pdf nodes =
|
||||
map (read_st_inner pdf) nodes
|
||||
|
||||
let read_st pdf =
|
||||
let read_st_basic pdf =
|
||||
match Pdf.lookup_obj pdf pdf.Pdf.root with
|
||||
| Pdf.Dictionary d ->
|
||||
begin match lookup "/StructTreeRoot" d with
|
||||
|
@ -41,6 +46,29 @@ let read_st pdf =
|
|||
end
|
||||
| _ -> error "read_st no root"
|
||||
|
||||
(* Rewrite a tree according to a rolemap. FIXME Better do the non-circularity
|
||||
check when reading a rolemap, or this won't terminate... *)
|
||||
let rec rewrite_st rolemap (E (n, cs)) =
|
||||
let rec rewrite_st_name rolemap n =
|
||||
match List.assoc_opt n rolemap with
|
||||
| Some n' -> rewrite_st_name rolemap n'
|
||||
| None -> n
|
||||
in
|
||||
E (rewrite_st_name rolemap n, map (rewrite_st rolemap) cs)
|
||||
|
||||
let read_rolemap pdf = function
|
||||
| Pdf.Dictionary d ->
|
||||
option_map (function (k, Pdf.Name v) -> Some (k, v) | _ -> None) d
|
||||
| _ -> error "read_rolemap: not a rolemap"
|
||||
|
||||
let read_st pdf =
|
||||
let rolemap =
|
||||
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
|
||||
| Some rm -> read_rolemap pdf rm
|
||||
| None -> []
|
||||
in
|
||||
rewrite_st rolemap (read_st_basic pdf)
|
||||
|
||||
let rec st_mem p = function
|
||||
| E (s, _) when p s -> true
|
||||
| E (_, cs) -> List.exists (st_mem p) cs
|
||||
|
@ -77,11 +105,6 @@ let standard_structure_types =
|
|||
"/Art"; "/BlockQuote"; "/TOC"; "/TOCI"; "/Index"; "/Private"; "/Quote";
|
||||
"/Note"; "/Reference"; "/Code"]
|
||||
|
||||
let read_rolemap pdf = function
|
||||
| Pdf.Dictionary d ->
|
||||
option_map (function (k, Pdf.Name v) -> Some (k, v) | _ -> None) d
|
||||
| _ -> error "read_rolemap: not a rolemap"
|
||||
|
||||
let rec follow_standard rm n =
|
||||
match List.assoc_opt n rm with
|
||||
| None -> raise Exit
|
||||
|
|
Loading…
Reference in New Issue