Apply rolemapping when reading structure tree
This commit is contained in:
parent
52b4d0d189
commit
dcb4dfdadd
35
cpdfua.ml
35
cpdfua.ml
|
@ -1,6 +1,11 @@
|
||||||
open Pdfutil
|
open Pdfutil
|
||||||
open Cpdferror
|
open Cpdferror
|
||||||
|
|
||||||
|
(* Implements all Matterhorn checks except for:
|
||||||
|
|
||||||
|
a) Those which require looking deep inside font files; and
|
||||||
|
b) Those which require reading inside the graphics stream. *)
|
||||||
|
|
||||||
exception MatterhornError of Cpdfyojson.Safe.t
|
exception MatterhornError of Cpdfyojson.Safe.t
|
||||||
|
|
||||||
exception MatterhornUnimplemented
|
exception MatterhornUnimplemented
|
||||||
|
@ -32,7 +37,7 @@ let rec read_st_inner pdf stnode =
|
||||||
and read_st_inner_array pdf nodes =
|
and read_st_inner_array pdf nodes =
|
||||||
map (read_st_inner pdf) nodes
|
map (read_st_inner pdf) nodes
|
||||||
|
|
||||||
let read_st pdf =
|
let read_st_basic pdf =
|
||||||
match Pdf.lookup_obj pdf pdf.Pdf.root with
|
match Pdf.lookup_obj pdf pdf.Pdf.root with
|
||||||
| Pdf.Dictionary d ->
|
| Pdf.Dictionary d ->
|
||||||
begin match lookup "/StructTreeRoot" d with
|
begin match lookup "/StructTreeRoot" d with
|
||||||
|
@ -41,6 +46,29 @@ let read_st pdf =
|
||||||
end
|
end
|
||||||
| _ -> error "read_st no root"
|
| _ -> error "read_st no root"
|
||||||
|
|
||||||
|
(* Rewrite a tree according to a rolemap. FIXME Better do the non-circularity
|
||||||
|
check when reading a rolemap, or this won't terminate... *)
|
||||||
|
let rec rewrite_st rolemap (E (n, cs)) =
|
||||||
|
let rec rewrite_st_name rolemap n =
|
||||||
|
match List.assoc_opt n rolemap with
|
||||||
|
| Some n' -> rewrite_st_name rolemap n'
|
||||||
|
| None -> n
|
||||||
|
in
|
||||||
|
E (rewrite_st_name rolemap n, map (rewrite_st rolemap) cs)
|
||||||
|
|
||||||
|
let read_rolemap pdf = function
|
||||||
|
| Pdf.Dictionary d ->
|
||||||
|
option_map (function (k, Pdf.Name v) -> Some (k, v) | _ -> None) d
|
||||||
|
| _ -> error "read_rolemap: not a rolemap"
|
||||||
|
|
||||||
|
let read_st pdf =
|
||||||
|
let rolemap =
|
||||||
|
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
|
||||||
|
| Some rm -> read_rolemap pdf rm
|
||||||
|
| None -> []
|
||||||
|
in
|
||||||
|
rewrite_st rolemap (read_st_basic pdf)
|
||||||
|
|
||||||
let rec st_mem p = function
|
let rec st_mem p = function
|
||||||
| E (s, _) when p s -> true
|
| E (s, _) when p s -> true
|
||||||
| E (_, cs) -> List.exists (st_mem p) cs
|
| E (_, cs) -> List.exists (st_mem p) cs
|
||||||
|
@ -77,11 +105,6 @@ let standard_structure_types =
|
||||||
"/Art"; "/BlockQuote"; "/TOC"; "/TOCI"; "/Index"; "/Private"; "/Quote";
|
"/Art"; "/BlockQuote"; "/TOC"; "/TOCI"; "/Index"; "/Private"; "/Quote";
|
||||||
"/Note"; "/Reference"; "/Code"]
|
"/Note"; "/Reference"; "/Code"]
|
||||||
|
|
||||||
let read_rolemap pdf = function
|
|
||||||
| Pdf.Dictionary d ->
|
|
||||||
option_map (function (k, Pdf.Name v) -> Some (k, v) | _ -> None) d
|
|
||||||
| _ -> error "read_rolemap: not a rolemap"
|
|
||||||
|
|
||||||
let rec follow_standard rm n =
|
let rec follow_standard rm n =
|
||||||
match List.assoc_opt n rm with
|
match List.assoc_opt n rm with
|
||||||
| None -> raise Exit
|
| None -> raise Exit
|
||||||
|
|
Loading…
Reference in New Issue