Fix for reading malformed bookmarks

This commit is contained in:
John Whitington 2019-07-06 15:09:25 +01:00
parent fd954c1b5d
commit b6eeb6efaa
1 changed files with 23 additions and 13 deletions

36
cpdf.ml
View File

@ -1230,13 +1230,13 @@ let remove_metadata pdf =
(* List bookmarks *) (* List bookmarks *)
let output_string_of_target pdf fastrefnums x = let output_string_of_target pdf fastrefnums x =
match Pdfdest.pdfobject_of_destination x with match Pdfdest.pdfobject_of_destination x with
| Pdf.Array [_; Pdf.Name "/Fit"] -> "" (*| Pdf.Array [_; Pdf.Name "/Fit"] -> ""*)
| Pdf.Array (Pdf.Indirect targetobjnum::more) -> | Pdf.Array (_::more) ->
let a = let a =
Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more) Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more)
in in
"\"" ^ Pdfwrite.string_of_pdf a ^ "\"" "\"" ^ Pdfwrite.string_of_pdf a ^ "\""
| _ -> "" | _ -> Printf.eprintf "Warning: could not read target for bookmark\n"; ""
(* List the bookmarks, optionally deunicoding the text, in the given range to the given output *) (* List the bookmarks, optionally deunicoding the text, in the given range to the given output *)
let list_bookmarks encoding range pdf output = let list_bookmarks encoding range pdf output =
@ -1284,16 +1284,26 @@ let list_bookmarks encoding range pdf output =
(match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) || (match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) ||
Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks
in in
iter let calculate_page_number mark =
(function mark -> (* Some buggy PDFs use integers for page numbers instead of page
output.Pdfio.output_string * object references. Adobe Reader and Preview seem to support
(Printf.sprintf "%i \"%s\" %i %s %s\n" * this, for presumably historical reasons. So if we see a
mark.Pdfmarks.level * OtherDocPageNumber (which is what Pdfdest parses these as,
(process_string mark.Pdfmarks.text) * because that's what they are legitimately, we use this as the
(Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target) * page number. It is zero based, though, and we are one-based, so
(if mark.Pdfmarks.isopen then "open" else "") * we add one. Pdfpage.pagenumber_of_target has been modified to support this.*)
(output_string_of_target pdf fastrefnums mark.Pdfmarks.target))) Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target
inrange in
iter
(function mark ->
output.Pdfio.output_string
(Printf.sprintf "%i \"%s\" %i %s %s\n"
mark.Pdfmarks.level
(process_string mark.Pdfmarks.text)
(calculate_page_number mark)
(if mark.Pdfmarks.isopen then "open" else "")
(output_string_of_target pdf fastrefnums mark.Pdfmarks.target)))
inrange
(* o is the stamp, u is the main pdf page *) (* o is the stamp, u is the main pdf page *)