Intial implementation of page labels in ranges

This commit is contained in:
John Whitington 2023-03-13 21:28:03 +00:00
parent 4394399bc7
commit b5ed462fc9
2 changed files with 23 additions and 0 deletions

View File

@ -95,8 +95,30 @@ let invert_range endpage r =
let duplicate_range n r = let duplicate_range n r =
flatten (map (fun x -> many x n) r) flatten (map (fun x -> many x n) r)
(* e.g <1> -> 1, <iii> -> x, </>> -> > etc. *)
let resolve_pagelabels pdf spec =
let labels =
let labs = Pdfpagelabels.read pdf in
map
(fun pnum -> (Pdfpagelabels.pagelabeltext_of_pagenumber pnum labs, pnum))
(ilist 1 (Pdfpage.endpage pdf))
in
(*iter (fun (s, l) -> Printf.printf "%s = %i\n" s l) labels;*)
let rec resolve_pagelabels_inner = function
| '<'::t ->
let pagelabel, rest = cleavewhile (neq '>') t in
let resolved = explode (string_of_int (begin match lookup (implode pagelabel) labels with Some x -> x | None -> 0 end)) in
if rest = [] then resolved else resolved @ resolve_pagelabels_inner (tl rest)
| '\\'::('<' | '>' as c)::t -> c::resolve_pagelabels_inner t
| '>'::t -> raise PageSpecBadSyntax
| h::t -> h::resolve_pagelabels_inner t
| [] -> []
in
resolve_pagelabels_inner spec
let rec parse_pagespec_inner endpage pdf spec = let rec parse_pagespec_inner endpage pdf spec =
let spec = if spec = "" then "all" else spec in let spec = if spec = "" then "all" else spec in
let spec = implode (resolve_pagelabels pdf (explode spec)) in
let spec = space_string spec in let spec = space_string spec in
if endpage < 1 then raise (Pdf.PDFError "This PDF file has no pages and is therefore malformed") else if endpage < 1 then raise (Pdf.PDFError "This PDF file has no pages and is therefore malformed") else
let numbers = let numbers =

View File

@ -3,6 +3,7 @@
(** Here are the rules for building input ranges: (** Here are the rules for building input ranges:
{ul {ul
{- Page numbers can be given plain, like 1 or 2, or as page labels, like <1> or <iii> (backslash to escape intended angle bracket)}
{- A dash (-) defines ranges e.g 1-5 or 6-3.} {- A dash (-) defines ranges e.g 1-5 or 6-3.}
{- A comma (,) allows one to specify several ranges, e.g. 1-2,4-5.} {- A comma (,) allows one to specify several ranges, e.g. 1-2,4-5.}
{- The word end represents the last page number.} {- The word end represents the last page number.}