Modernize @B bookmark namer
This commit is contained in:
parent
697ee3d412
commit
8119e6eb3f
|
@ -2,22 +2,31 @@ open Pdfutil
|
||||||
open Pdfio
|
open Pdfio
|
||||||
open Cpdferror
|
open Cpdferror
|
||||||
|
|
||||||
(* Remove characters which might not make good filenames. *)
|
(* Remove characters which might not make good filenames. In, UTF8, out UTF8. *)
|
||||||
let remove_unsafe_characters encoding s =
|
let remove_unsafe_characters s =
|
||||||
if encoding = Cpdfmetadata.UTF8 then Pdftext.utf8_of_pdfdocstring s else (* For @B bookmarks splitting. *)
|
let codepoints = Pdftext.codepoints_of_utf8 s in
|
||||||
if encoding = Cpdfmetadata.Raw then s else
|
let codepoints =
|
||||||
let chars =
|
lose
|
||||||
lose
|
(function x ->
|
||||||
(function x ->
|
x = int_of_char '/'
|
||||||
match x with
|
|| x = int_of_char '?'
|
||||||
'/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true
|
|| x = int_of_char '<'
|
||||||
| x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Cpdfmetadata.Stripped) -> true
|
|| x = int_of_char '>'
|
||||||
| _ -> false)
|
|| x = int_of_char '\\'
|
||||||
(explode s)
|
|| x = int_of_char ':'
|
||||||
in
|
|| x = int_of_char '*'
|
||||||
match chars with
|
|| x = int_of_char '|'
|
||||||
| '.'::more -> implode more
|
|| x = int_of_char '\"'
|
||||||
| chars -> implode chars
|
|| x = int_of_char '^'
|
||||||
|
|| x = int_of_char '+'
|
||||||
|
|| x = int_of_char '='
|
||||||
|
|| x < 32
|
||||||
|
|| x = 127)
|
||||||
|
codepoints
|
||||||
|
in
|
||||||
|
match codepoints with
|
||||||
|
| 46::more -> Pdftext.utf8_of_codepoints codepoints (* Don't produce a dotfile *)
|
||||||
|
| chars -> Pdftext.utf8_of_codepoints codepoints
|
||||||
|
|
||||||
(* Attaching files *)
|
(* Attaching files *)
|
||||||
let attach_file ?memory keepversion topage pdf file =
|
let attach_file ?memory keepversion topage pdf file =
|
||||||
|
@ -257,7 +266,7 @@ let dump_attachment out pdf (_, embeddedfile) =
|
||||||
| _ -> error "Bad embedded file stream"
|
| _ -> error "Bad embedded file stream"
|
||||||
end
|
end
|
||||||
in
|
in
|
||||||
let s = remove_unsafe_characters Cpdfmetadata.UTF8 s in
|
let s = remove_unsafe_characters (Pdftext.utf8_of_pdfdocstring s) in
|
||||||
let filename = if out = "" then s else out ^ Filename.dir_sep ^ s in
|
let filename = if out = "" then s else out ^ Filename.dir_sep ^ s in
|
||||||
begin try
|
begin try
|
||||||
let fh = open_out_bin filename in
|
let fh = open_out_bin filename in
|
||||||
|
|
|
@ -5,9 +5,8 @@ type attachment =
|
||||||
pagenumber : int;
|
pagenumber : int;
|
||||||
data : unit -> Pdfio.bytes}
|
data : unit -> Pdfio.bytes}
|
||||||
|
|
||||||
(** Remove characters which might not make good filenames. If the encoding is
|
(** Remove characters which might not make good filenames from a UTF8 string. *)
|
||||||
[Cpdfmetadata.Stripped] we in addition lose any character > 126. *)
|
val remove_unsafe_characters : string -> string
|
||||||
val remove_unsafe_characters : Cpdfmetadata.encoding -> string -> string
|
|
||||||
|
|
||||||
(** [attach_file keepversion topage pdf filename] attaches the file in
|
(** [attach_file keepversion topage pdf filename] attaches the file in
|
||||||
[filename] to the pdf, optionally to a page (rather than document-level).
|
[filename] to the pdf, optionally to a page (rather than document-level).
|
||||||
|
|
|
@ -265,7 +265,7 @@ let get_bookmark_name encoding pdf marks splitlevel n _ =
|
||||||
let refnums = Pdf.page_reference_numbers pdf in
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
match keep (function m -> n = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with
|
match keep (function m -> n = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with
|
||||||
| {Pdfmarks.text = title}::_ -> Cpdfattach.remove_unsafe_characters encoding title
|
| {Pdfmarks.text = title}::_ -> Cpdfattach.remove_unsafe_characters (Pdftext.utf8_of_pdfdocstring title)
|
||||||
| _ -> ""
|
| _ -> ""
|
||||||
|
|
||||||
(* @F means filename without extension *)
|
(* @F means filename without extension *)
|
||||||
|
|
BIN
cpdfmanual.pdf
BIN
cpdfmanual.pdf
Binary file not shown.
|
@ -1303,7 +1303,7 @@ the result is unspecified. The following format operators may be used:
|
||||||
\texttt{@N} & Sequence number without padding zeroes \\
|
\texttt{@N} & Sequence number without padding zeroes \\
|
||||||
\texttt{@S} & Start page of this chunk \\
|
\texttt{@S} & Start page of this chunk \\
|
||||||
\texttt{@E} & End page of this chunk \\
|
\texttt{@E} & End page of this chunk \\
|
||||||
\texttt{@B} & Bookmark name at this page \\
|
\texttt{@B} & Bookmark name at this page, if any. \\
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{center}
|
\end{center}
|
||||||
|
|
||||||
|
@ -1340,14 +1340,12 @@ one of the output files.
|
||||||
\noindent Split \texttt{a.pdf} on bookmark boundaries, using the bookmark text as the filename.
|
\noindent Split \texttt{a.pdf} on bookmark boundaries, using the bookmark text as the filename.
|
||||||
|
|
||||||
\end{framed}
|
\end{framed}
|
||||||
\noindent The bookmark text used for a name is converted from unicode to 7 bit ASCII, and the following characters are removed, in addition to any character with ASCII code less than 32:
|
\noindent The bookmark text used for a name has the following characters are removed, in addition to any character with ASCII code less than 32 or equal to 126. In addition, names beginning with \texttt{.} are not produced.
|
||||||
\begin{framed}
|
\begin{framed}
|
||||||
\centering
|
\centering
|
||||||
\verb! / ? < > \ : * | " ^ + =!
|
\verb! / ? < > \ : * | " ^ + =!
|
||||||
\end{framed}
|
\end{framed}
|
||||||
|
|
||||||
\noindent To prevent this process, and convert bookmark names to UTF8 instead, add \texttt{-utf8} to the command.
|
|
||||||
|
|
||||||
\section{Splitting to Maximum Size}
|
\section{Splitting to Maximum Size}
|
||||||
|
|
||||||
The \texttt{-split-max} operation splits a file into chunks of no more than the given size, starting at the beginning. The suffixes kB, KiB, MB, MiB, GB, and GiB may be used to give the size. For example:
|
The \texttt{-split-max} operation splits a file into chunks of no more than the given size, starting at the beginning. The suffixes kB, KiB, MB, MiB, GB, and GiB may be used to give the size. For example:
|
||||||
|
|
Loading…
Reference in New Issue