/Reconcile /C and /A
This commit is contained in:
parent
facb9d3c75
commit
cda49a1cd8
26
cpdfua.ml
26
cpdfua.ml
|
@ -27,18 +27,15 @@ let print_children (E (n, cs)) =
|
|||
iter (fun (E (n, _)) -> Printf.printf "%S " n) cs;
|
||||
flprint "\n"
|
||||
|
||||
(* FIXME What about /C? *)
|
||||
(* FIXME What about class map? *)
|
||||
(* Read attributes. *)
|
||||
let read_a pdf stnode =
|
||||
let rec read_single d =
|
||||
match d with
|
||||
| Pdf.Dictionary d -> map fst d
|
||||
| Pdf.Stream s -> read_single (fst !s)
|
||||
| _ -> error "read_single"
|
||||
in
|
||||
let from_a =
|
||||
match Pdf.lookup_direct pdf "/A" stnode with
|
||||
|
||||
let read_a pdf n stnode =
|
||||
match Pdf.lookup_direct pdf n stnode with
|
||||
| Some (Pdf.Array attrs) ->
|
||||
let attrs = keep (function Pdf.Integer _ -> false | _ -> true) attrs in
|
||||
flatten (map read_single attrs)
|
||||
|
@ -48,7 +45,12 @@ let read_a pdf stnode =
|
|||
read_single (Pdf.Stream s)
|
||||
| Some _ -> []
|
||||
| None -> []
|
||||
in
|
||||
|
||||
let read_attributes pdf stnode =
|
||||
let from_a = read_a pdf "/A" stnode in
|
||||
let from_c = read_a pdf "/C" stnode in
|
||||
(* Prefer entries from a, but we are just testing for presence, so merely setify *)
|
||||
let attrs = setify (from_a @ from_c) in
|
||||
(* For now, stick /ID, /Alt, /ActualText in here too. Eventually, move to prevent crashes. *)
|
||||
let alt =
|
||||
match Pdf.lookup_direct pdf "/Alt" stnode with | Some _ -> ["/Alt"] | None -> []
|
||||
|
@ -69,7 +71,7 @@ let read_a pdf stnode =
|
|||
end
|
||||
| _ -> []
|
||||
in
|
||||
from_a @ id @ at @ alt @ pageref
|
||||
attrs @ id @ at @ alt @ pageref
|
||||
|
||||
let rec read_st_inner pdf stnode =
|
||||
let s =
|
||||
|
@ -78,10 +80,10 @@ let rec read_st_inner pdf stnode =
|
|||
| _ -> ""
|
||||
in
|
||||
match Pdf.lookup_direct pdf "/K" stnode with
|
||||
| None -> E2 (s, read_a pdf stnode, [])
|
||||
| Some (Pdf.Dictionary d) -> E2 (s, read_a pdf stnode, [read_st_inner pdf (Pdf.Dictionary d)])
|
||||
| Some (Pdf.Integer mcd) -> E2 (s, read_a pdf stnode, []) (* marked content identifier, we drop. *)
|
||||
| Some (Pdf.Array a) -> E2 (s, read_a pdf stnode, read_st_inner_array pdf a)
|
||||
| None -> E2 (s, read_attributes pdf stnode, [])
|
||||
| Some (Pdf.Dictionary d) -> E2 (s, read_attributes pdf stnode, [read_st_inner pdf (Pdf.Dictionary d)])
|
||||
| Some (Pdf.Integer mcd) -> E2 (s, read_attributes pdf stnode, []) (* marked content identifier, we drop. *)
|
||||
| Some (Pdf.Array a) -> E2 (s, read_attributes pdf stnode, read_st_inner_array pdf a)
|
||||
| _ -> error "malformed st node"
|
||||
|
||||
and read_st_inner_array pdf nodes =
|
||||
|
|
Loading…
Reference in New Issue