From b5ed462fc9296430fe080be6e6636c43da87399e Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 13 Mar 2023 21:28:03 +0000 Subject: [PATCH] Intial implementation of page labels in ranges --- cpdfpagespec.ml | 22 ++++++++++++++++++++++ cpdfpagespec.mli | 1 + 2 files changed, 23 insertions(+) diff --git a/cpdfpagespec.ml b/cpdfpagespec.ml index a325d01..27a785a 100644 --- a/cpdfpagespec.ml +++ b/cpdfpagespec.ml @@ -95,8 +95,30 @@ let invert_range endpage r = let duplicate_range n r = flatten (map (fun x -> many x n) r) +(* e.g <1> -> 1, -> x, > -> > etc. *) +let resolve_pagelabels pdf spec = + let labels = + let labs = Pdfpagelabels.read pdf in + map + (fun pnum -> (Pdfpagelabels.pagelabeltext_of_pagenumber pnum labs, pnum)) + (ilist 1 (Pdfpage.endpage pdf)) + in + (*iter (fun (s, l) -> Printf.printf "%s = %i\n" s l) labels;*) + let rec resolve_pagelabels_inner = function + | '<'::t -> + let pagelabel, rest = cleavewhile (neq '>') t in + let resolved = explode (string_of_int (begin match lookup (implode pagelabel) labels with Some x -> x | None -> 0 end)) in + if rest = [] then resolved else resolved @ resolve_pagelabels_inner (tl rest) + | '\\'::('<' | '>' as c)::t -> c::resolve_pagelabels_inner t + | '>'::t -> raise PageSpecBadSyntax + | h::t -> h::resolve_pagelabels_inner t + | [] -> [] + in + resolve_pagelabels_inner spec + let rec parse_pagespec_inner endpage pdf spec = let spec = if spec = "" then "all" else spec in + let spec = implode (resolve_pagelabels pdf (explode spec)) in let spec = space_string spec in if endpage < 1 then raise (Pdf.PDFError "This PDF file has no pages and is therefore malformed") else let numbers = diff --git a/cpdfpagespec.mli b/cpdfpagespec.mli index 5c4e158..1a68feb 100644 --- a/cpdfpagespec.mli +++ b/cpdfpagespec.mli @@ -3,6 +3,7 @@ (** Here are the rules for building input ranges: {ul +{- Page numbers can be given plain, like 1 or 2, or as page labels, like <1> or (backslash to escape intended angle bracket)} {- A dash (-) defines ranges e.g 1-5 or 6-3.} {- A comma (,) allows one to specify several ranges, e.g. 1-2,4-5.} {- The word end represents the last page number.}