mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-04-15 18:57:31 +02:00
more
This commit is contained in:
parent
974025f7ac
commit
83d484a15c
132
cpdfbookmarks.ml
132
cpdfbookmarks.ml
@ -192,75 +192,75 @@ let output_json_marks ch calculate_page_number pdf fastrefnums marks =
|
|||||||
let json = `List (map json_of_mark marks) in
|
let json = `List (map json_of_mark marks) in
|
||||||
J.pretty_to_channel ch json
|
J.pretty_to_channel ch json
|
||||||
|
|
||||||
|
let process_string encoding s =
|
||||||
|
let rec replace c x y = function
|
||||||
|
| [] -> []
|
||||||
|
| h::t when h = c -> x::y::replace c x y t
|
||||||
|
| h::t -> h::replace c x y t
|
||||||
|
in
|
||||||
|
(* Convert to UTF8, raw, or stripped, and escape backslashed and quotation marks *)
|
||||||
|
let codepoints = Pdftext.codepoints_of_pdfdocstring s in
|
||||||
|
let escaped =
|
||||||
|
let bs = int_of_char '\\'
|
||||||
|
and nl = int_of_char '\n'
|
||||||
|
and n = int_of_char 'n'
|
||||||
|
and q = int_of_char '\"' in
|
||||||
|
replace q bs q (replace nl bs n (replace bs bs bs codepoints))
|
||||||
|
in
|
||||||
|
let process_stripped escaped =
|
||||||
|
let b = Buffer.create 200 in
|
||||||
|
iter
|
||||||
|
(fun x ->
|
||||||
|
if x <= 127 then Buffer.add_char b (char_of_int x))
|
||||||
|
escaped;
|
||||||
|
Buffer.contents b
|
||||||
|
in
|
||||||
|
match encoding with
|
||||||
|
| Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped
|
||||||
|
| Cpdfmetadata.Stripped -> process_stripped escaped
|
||||||
|
| Cpdfmetadata.Raw -> s
|
||||||
|
|
||||||
(* List the bookmarks in the given range to the given output *)
|
(* List the bookmarks in the given range to the given output *)
|
||||||
let list_bookmarks ~json encoding range pdf output =
|
let list_bookmarks ~json encoding range pdf output =
|
||||||
let process_stripped escaped =
|
let bookmarks = Pdfmarks.read_bookmarks pdf in
|
||||||
let b = Buffer.create 200 in
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
iter
|
let rangetable = hashset_of_list range in
|
||||||
(fun x ->
|
let range_is_all = range = ilist 1 (Pdfpage.endpage pdf) in
|
||||||
if x <= 127 then Buffer.add_char b (char_of_int x))
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
escaped;
|
(* Find the pagenumber of each bookmark target. If it is in the range,
|
||||||
Buffer.contents b
|
* keep that bookmark. Also keep the bookmark if its target is the null
|
||||||
in
|
* destination. *)
|
||||||
let process_string s =
|
let inrange =
|
||||||
let rec replace c x y = function
|
keep
|
||||||
| [] -> []
|
(function x ->
|
||||||
| h::t when h = c -> x::y::replace c x y t
|
range_is_all ||
|
||||||
| h::t -> h::replace c x y t
|
x.Pdfmarks.target = Pdfdest.NullDestination ||
|
||||||
|
(match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) ||
|
||||||
|
Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks
|
||||||
in
|
in
|
||||||
(* Convert to UTF8, raw, or stripped, and escape backslashed and quotation marks *)
|
let calculate_page_number mark =
|
||||||
let codepoints = Pdftext.codepoints_of_pdfdocstring s in
|
(* Some buggy PDFs use integers for page numbers instead of page
|
||||||
let escaped =
|
* object references. Adobe Reader and Preview seem to support
|
||||||
let bs = int_of_char '\\'
|
* this, for presumably historical reasons. So if we see a
|
||||||
and nl = int_of_char '\n'
|
* OtherDocPageNumber (which is what Pdfdest parses these as,
|
||||||
and n = int_of_char 'n'
|
* because that's what they are legitimately, we use this as the
|
||||||
and q = int_of_char '\"' in
|
* page number. It is zero based, though, and we are one-based, so
|
||||||
replace q bs q (replace nl bs n (replace bs bs bs codepoints))
|
* we add one. Pdfpage.pagenumber_of_target has been modified to support this.*)
|
||||||
in
|
Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target
|
||||||
match encoding with
|
in
|
||||||
| Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped
|
if json then
|
||||||
| Cpdfmetadata.Stripped -> process_stripped escaped
|
output_json_marks stdout calculate_page_number pdf fastrefnums inrange
|
||||||
| Cpdfmetadata.Raw -> s
|
else
|
||||||
in
|
iter
|
||||||
let bookmarks = Pdfmarks.read_bookmarks pdf in
|
(function mark ->
|
||||||
let refnums = Pdf.page_reference_numbers pdf in
|
output.Pdfio.output_string
|
||||||
let rangetable = hashset_of_list range in
|
(Printf.sprintf "%i \"%s\" %i%s %s\n"
|
||||||
let range_is_all = range = ilist 1 (Pdfpage.endpage pdf) in
|
mark.Pdfmarks.level
|
||||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
(process_string encoding mark.Pdfmarks.text)
|
||||||
(* Find the pagenumber of each bookmark target. If it is in the range,
|
(calculate_page_number mark)
|
||||||
* keep that bookmark. Also keep the bookmark if its target is the null
|
(if mark.Pdfmarks.isopen then " open" else "")
|
||||||
* destination. *)
|
(output_string_of_target pdf fastrefnums mark.Pdfmarks.target)))
|
||||||
let inrange =
|
inrange
|
||||||
keep
|
|
||||||
(function x ->
|
|
||||||
range_is_all ||
|
|
||||||
x.Pdfmarks.target = Pdfdest.NullDestination ||
|
|
||||||
(match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) ||
|
|
||||||
Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks
|
|
||||||
in
|
|
||||||
let calculate_page_number mark =
|
|
||||||
(* Some buggy PDFs use integers for page numbers instead of page
|
|
||||||
* object references. Adobe Reader and Preview seem to support
|
|
||||||
* this, for presumably historical reasons. So if we see a
|
|
||||||
* OtherDocPageNumber (which is what Pdfdest parses these as,
|
|
||||||
* because that's what they are legitimately, we use this as the
|
|
||||||
* page number. It is zero based, though, and we are one-based, so
|
|
||||||
* we add one. Pdfpage.pagenumber_of_target has been modified to support this.*)
|
|
||||||
Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target
|
|
||||||
in
|
|
||||||
if json then
|
|
||||||
output_json_marks stdout calculate_page_number pdf fastrefnums inrange
|
|
||||||
else
|
|
||||||
iter
|
|
||||||
(function mark ->
|
|
||||||
output.Pdfio.output_string
|
|
||||||
(Printf.sprintf "%i \"%s\" %i%s %s\n"
|
|
||||||
mark.Pdfmarks.level
|
|
||||||
(process_string mark.Pdfmarks.text)
|
|
||||||
(calculate_page_number mark)
|
|
||||||
(if mark.Pdfmarks.isopen then " open" else "")
|
|
||||||
(output_string_of_target pdf fastrefnums mark.Pdfmarks.target)))
|
|
||||||
inrange
|
|
||||||
|
|
||||||
(* o is the stamp, u is the main pdf page *)
|
(* o is the stamp, u is the main pdf page *)
|
||||||
|
|
||||||
|
BIN
cpdfmanual.pdf
BIN
cpdfmanual.pdf
Binary file not shown.
@ -1562,6 +1562,9 @@ There are two options which turn off parts of the squeezer. They are \texttt{-sq
|
|||||||
\begin{framed}
|
\begin{framed}
|
||||||
\small\noindent\verb!cpdf -list-bookmarks [-utf8 | -raw] in.pdf!
|
\small\noindent\verb!cpdf -list-bookmarks [-utf8 | -raw] in.pdf!
|
||||||
|
|
||||||
|
\vspace{1.5mm}
|
||||||
|
\small\noindent\verb!cpdf -list-bookmarks-json in.pdf!
|
||||||
|
|
||||||
\vspace{1.5mm}
|
\vspace{1.5mm}
|
||||||
\small\noindent\verb!cpdf -remove-bookmarks in.pdf -o out.pdf!
|
\small\noindent\verb!cpdf -remove-bookmarks in.pdf -o out.pdf!
|
||||||
|
|
||||||
@ -1608,6 +1611,24 @@ the ASCII range. To prevent this, and return unicode UTF8 output, add the
|
|||||||
\texttt{-utf8} option to the command. To prevent any processing, use the
|
\texttt{-utf8} option to the command. To prevent any processing, use the
|
||||||
\texttt{-raw} option. See Section \ref{textencodings} for more information. A newline in a bookmark is represented as \texttt{"\textbackslash n"}.
|
\texttt{-raw} option. See Section \ref{textencodings} for more information. A newline in a bookmark is represented as \texttt{"\textbackslash n"}.
|
||||||
|
|
||||||
|
By using \texttt{-list-bookmarks-json} instead, the bookmarks are formatted as a JSON array, in order, of dictionaries formatted thus:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
{ "level": 0,
|
||||||
|
"text": "1 Basic Usage",
|
||||||
|
"page": 17,
|
||||||
|
"open": false,
|
||||||
|
"target":
|
||||||
|
[ { "I": 17 },
|
||||||
|
{ "N": "/XYZ" },
|
||||||
|
{ "F": 85.039 },
|
||||||
|
{ "F": 609.307 },
|
||||||
|
null ]
|
||||||
|
}
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
See chapter 15 for more details of cpdf's JSON formatting. Bookmark text in JSON bookmarks, however, is in UTF8 for ease of use.
|
||||||
|
|
||||||
\subsection{Destinations}
|
\subsection{Destinations}
|
||||||
|
|
||||||
The destination is an extended description of where the bookmark should point to (i.e it can be more detailed than just giving the page). For example, it may point to a section heading halfway down a page. Here are the possibilities:
|
The destination is an extended description of where the bookmark should point to (i.e it can be more detailed than just giving the page). For example, it may point to a section heading halfway down a page. Here are the possibilities:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user