Fix for reading malformed bookmarks

This commit is contained in:
John Whitington 2019-07-06 15:09:25 +01:00
parent fd954c1b5d
commit b6eeb6efaa
1 changed files with 23 additions and 13 deletions

18
cpdf.ml
View File

@ -1230,13 +1230,13 @@ let remove_metadata pdf =
(* List bookmarks *) (* List bookmarks *)
let output_string_of_target pdf fastrefnums x = let output_string_of_target pdf fastrefnums x =
match Pdfdest.pdfobject_of_destination x with match Pdfdest.pdfobject_of_destination x with
| Pdf.Array [_; Pdf.Name "/Fit"] -> "" (*| Pdf.Array [_; Pdf.Name "/Fit"] -> ""*)
| Pdf.Array (Pdf.Indirect targetobjnum::more) -> | Pdf.Array (_::more) ->
let a = let a =
Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more) Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more)
in in
"\"" ^ Pdfwrite.string_of_pdf a ^ "\"" "\"" ^ Pdfwrite.string_of_pdf a ^ "\""
| _ -> "" | _ -> Printf.eprintf "Warning: could not read target for bookmark\n"; ""
(* List the bookmarks, optionally deunicoding the text, in the given range to the given output *) (* List the bookmarks, optionally deunicoding the text, in the given range to the given output *)
let list_bookmarks encoding range pdf output = let list_bookmarks encoding range pdf output =
@ -1283,6 +1283,16 @@ let list_bookmarks encoding range pdf output =
x.Pdfmarks.target = Pdfdest.NullDestination || x.Pdfmarks.target = Pdfdest.NullDestination ||
(match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) || (match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) ||
Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks
in
let calculate_page_number mark =
(* Some buggy PDFs use integers for page numbers instead of page
* object references. Adobe Reader and Preview seem to support
* this, for presumably historical reasons. So if we see a
* OtherDocPageNumber (which is what Pdfdest parses these as,
* because that's what they are legitimately, we use this as the
* page number. It is zero based, though, and we are one-based, so
* we add one. Pdfpage.pagenumber_of_target has been modified to support this.*)
Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target
in in
iter iter
(function mark -> (function mark ->
@ -1290,7 +1300,7 @@ let list_bookmarks encoding range pdf output =
(Printf.sprintf "%i \"%s\" %i %s %s\n" (Printf.sprintf "%i \"%s\" %i %s %s\n"
mark.Pdfmarks.level mark.Pdfmarks.level
(process_string mark.Pdfmarks.text) (process_string mark.Pdfmarks.text)
(Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target) (calculate_page_number mark)
(if mark.Pdfmarks.isopen then "open" else "") (if mark.Pdfmarks.isopen then "open" else "")
(output_string_of_target pdf fastrefnums mark.Pdfmarks.target))) (output_string_of_target pdf fastrefnums mark.Pdfmarks.target)))
inrange inrange