Splitting page spec into separate module
This commit is contained in:
parent
53d0a08422
commit
afe80205dd
2
Makefile
2
Makefile
|
@ -1,6 +1,6 @@
|
||||||
# Build the cpdf command line tools and top level
|
# Build the cpdf command line tools and top level
|
||||||
MODS = tjutil tjutf16 tjllist tjparserMonad tjjson xmlm \
|
MODS = tjutil tjutf16 tjllist tjparserMonad tjjson xmlm \
|
||||||
cpdfwriteJSON cpdfstrftime cpdfcoord cpdf cpdfcommand
|
cpdfwriteJSON cpdfstrftime cpdfcoord cpdfpagespec cpdf cpdfcommand
|
||||||
|
|
||||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||||
|
|
||||||
|
|
217
cpdf.ml
217
cpdf.ml
|
@ -346,220 +346,7 @@ let endpage_io ?revision i user_pw owner_pw =
|
||||||
let pdf = Pdfread.pdf_of_input_lazy ?revision user_pw owner_pw i in
|
let pdf = Pdfread.pdf_of_input_lazy ?revision user_pw owner_pw i in
|
||||||
Pdfpage.endpage pdf
|
Pdfpage.endpage pdf
|
||||||
|
|
||||||
(* Raised when syntax is ok, but endpage is too low. Caught by validator.
|
|
||||||
Caught and reraised as normal failure by parse_pagespec. *)
|
|
||||||
exception PageSpecUnknownPage of int
|
|
||||||
|
|
||||||
(* There would be no pages *)
|
|
||||||
exception PageSpecWouldBeNoPages
|
|
||||||
|
|
||||||
(* Raised when syntax is wrong. Caught and reraised by parse_pagespec and
|
|
||||||
validator. *)
|
|
||||||
exception PageSpecBadSyntax
|
|
||||||
|
|
||||||
(* Parsing range specifications *)
|
|
||||||
let rec splitat_commas toks =
|
|
||||||
match cleavewhile (neq (Pdfgenlex.LexName ",")) toks with
|
|
||||||
| [], [] -> []
|
|
||||||
| [], _ -> raise PageSpecBadSyntax
|
|
||||||
| some, [] -> [some]
|
|
||||||
| _::_ as before, _::rest -> before::splitat_commas rest
|
|
||||||
|
|
||||||
let is_dimension comparison {Pdfpage.mediabox = box} =
|
|
||||||
let minx, miny, maxx, maxy = Pdf.parse_rectangle box in
|
|
||||||
comparison (maxx -. minx) (maxy -. miny)
|
|
||||||
|
|
||||||
let select_dimensions comparison pdf candidates =
|
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
let pagenums, kept_pages =
|
|
||||||
split
|
|
||||||
(option_map
|
|
||||||
(fun (index, page) ->
|
|
||||||
if mem index candidates then Some (index, page) else None)
|
|
||||||
(combine (indx pages) pages))
|
|
||||||
in
|
|
||||||
option_map2
|
|
||||||
(fun pagenum page ->
|
|
||||||
if is_dimension comparison page then Some pagenum else None)
|
|
||||||
pagenums
|
|
||||||
kept_pages
|
|
||||||
|
|
||||||
let select_portrait = select_dimensions ( < )
|
|
||||||
|
|
||||||
let select_landscape = select_dimensions ( > )
|
|
||||||
|
|
||||||
let rec mk_numbers pdf endpage lexemes =
|
|
||||||
match lexemes with
|
|
||||||
| [Pdfgenlex.LexInt n] -> [n]
|
|
||||||
| [Pdfgenlex.LexName "end"] -> [endpage]
|
|
||||||
| [Pdfgenlex.LexInt n; Pdfgenlex.LexName "-"; Pdfgenlex.LexInt n'] ->
|
|
||||||
if n > n' then rev (ilist n' n) else ilist n n'
|
|
||||||
| [Pdfgenlex.LexName "end"; Pdfgenlex.LexName "-"; Pdfgenlex.LexInt n] ->
|
|
||||||
if n <= endpage
|
|
||||||
then rev (ilist n endpage)
|
|
||||||
else raise (PageSpecUnknownPage n)
|
|
||||||
| [Pdfgenlex.LexInt n; Pdfgenlex.LexName "-"; Pdfgenlex.LexName "end"] ->
|
|
||||||
if n <= endpage
|
|
||||||
then ilist n endpage
|
|
||||||
else raise (PageSpecUnknownPage n)
|
|
||||||
| [Pdfgenlex.LexName "end"; Pdfgenlex.LexName "-"; Pdfgenlex.LexName "end"] ->
|
|
||||||
[endpage]
|
|
||||||
| [Pdfgenlex.LexName "even"] ->
|
|
||||||
drop_odds (ilist 1 endpage)
|
|
||||||
| [Pdfgenlex.LexName "portrait"] ->
|
|
||||||
select_portrait pdf (ilist 1 endpage)
|
|
||||||
| [Pdfgenlex.LexName "landscape"] ->
|
|
||||||
select_landscape pdf (ilist 1 endpage)
|
|
||||||
| [Pdfgenlex.LexName "odd"] ->
|
|
||||||
really_drop_evens (ilist 1 endpage)
|
|
||||||
| [Pdfgenlex.LexName "all"] ->
|
|
||||||
ilist 1 endpage
|
|
||||||
| [Pdfgenlex.LexName "reverse"] ->
|
|
||||||
rev (ilist 1 endpage)
|
|
||||||
| toks ->
|
|
||||||
let ranges = splitat_commas toks in
|
|
||||||
if ranges = [toks] then raise PageSpecBadSyntax else
|
|
||||||
flatten (map (mk_numbers pdf endpage) ranges)
|
|
||||||
|
|
||||||
(* Space dashes and commas *)
|
|
||||||
let rec add_spaces = function
|
|
||||||
| [] -> []
|
|
||||||
| ('-' | ',') as h::t -> ' '::h::' '::add_spaces t
|
|
||||||
| h::t -> h::add_spaces t
|
|
||||||
|
|
||||||
let space_string s =
|
|
||||||
implode (add_spaces (explode s))
|
|
||||||
|
|
||||||
let fixup_negatives endpage = function
|
|
||||||
| Pdfgenlex.LexName s when String.length s > 1 && s.[0] = '~' ->
|
|
||||||
Pdfgenlex.LexInt (endpage + 1 + ~-(int_of_string (implode (tl (explode s)))))
|
|
||||||
| x -> x
|
|
||||||
|
|
||||||
let invert_range endpage r =
|
|
||||||
option_map (fun p -> if mem p r then None else Some p) (ilist 1 endpage)
|
|
||||||
|
|
||||||
let rec parse_pagespec_inner endpage pdf spec =
|
|
||||||
let spec = space_string spec in
|
|
||||||
if endpage < 1 then raise (Pdf.PDFError "This PDF file has no pages and is therefore malformed") else
|
|
||||||
let numbers =
|
|
||||||
try
|
|
||||||
match explode spec with
|
|
||||||
| 'N'::'O'::'T'::r ->
|
|
||||||
invert_range endpage (parse_pagespec_inner endpage pdf (implode r))
|
|
||||||
| _ ->
|
|
||||||
match rev (explode spec) with
|
|
||||||
| ['n'; 'e'; 'v'; 'e'] ->
|
|
||||||
keep even (ilist 1 endpage)
|
|
||||||
| ['d'; 'd'; 'o'] ->
|
|
||||||
keep odd (ilist 1 endpage)
|
|
||||||
| ['t'; 'i'; 'a'; 'r'; 't'; 'r'; 'o'; 'p'] ->
|
|
||||||
select_portrait pdf (ilist 1 endpage)
|
|
||||||
| ['e'; 'p'; 'a'; 'c'; 's'; 'd'; 'n'; 'a'; 'l'] ->
|
|
||||||
select_landscape pdf (ilist 1 endpage)
|
|
||||||
| 't'::'i'::'a'::'r'::'t'::'r'::'o'::'p'::more ->
|
|
||||||
select_portrait
|
|
||||||
pdf
|
|
||||||
(mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string (implode (rev more)))))
|
|
||||||
| 'e'::'p'::'a'::'c'::'s'::'d'::'n'::'a'::'l'::more ->
|
|
||||||
select_landscape
|
|
||||||
pdf
|
|
||||||
(mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string (implode (rev more)))))
|
|
||||||
| 'd'::'d'::'o'::more ->
|
|
||||||
keep
|
|
||||||
odd
|
|
||||||
(mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string (implode (rev more)))))
|
|
||||||
| 'n'::'e'::'v'::'e'::more ->
|
|
||||||
keep
|
|
||||||
even
|
|
||||||
(mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string (implode (rev more)))))
|
|
||||||
| _ ->
|
|
||||||
mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string spec))
|
|
||||||
with
|
|
||||||
e -> raise PageSpecBadSyntax
|
|
||||||
in
|
|
||||||
if numbers = [] then raise PageSpecWouldBeNoPages else
|
|
||||||
iter
|
|
||||||
(fun n ->
|
|
||||||
if n <= 0 || n > endpage then raise (PageSpecUnknownPage n))
|
|
||||||
numbers;
|
|
||||||
numbers
|
|
||||||
|
|
||||||
let parse_pagespec pdf spec =
|
|
||||||
try parse_pagespec_inner (Pdfpage.endpage pdf) pdf spec with
|
|
||||||
| PageSpecUnknownPage n ->
|
|
||||||
raise (Pdf.PDFError ("Page " ^ string_of_int n ^ " does not exist."))
|
|
||||||
| PageSpecWouldBeNoPages ->
|
|
||||||
raise (Pdf.PDFError ("Page range specifies no pages"))
|
|
||||||
| e ->
|
|
||||||
raise
|
|
||||||
(Pdf.PDFError
|
|
||||||
("Bad page specification " ^ spec ^
|
|
||||||
". Raw error was " ^ Printexc.to_string e ^
|
|
||||||
". Last page was " ^ string_of_int (Pdfpage.endpage pdf)))
|
|
||||||
|
|
||||||
(* To validate a pagespec as being syntactically correct without the PDF in
|
|
||||||
question. This is nasty, since the parser above includes checking based on the
|
|
||||||
endpage of the PDF (which we don't have). Pass 100 as the endpage, doubling on
|
|
||||||
page range exception, bailng out above 500000. *)
|
|
||||||
let rec validate_pagespec_inner n spec =
|
|
||||||
try
|
|
||||||
ignore (parse_pagespec_inner n (Pdf.empty ()) spec); true
|
|
||||||
with
|
|
||||||
| PageSpecUnknownPage _ -> if n < 500000 then validate_pagespec_inner (n * 2) spec else false
|
|
||||||
| PageSpecBadSyntax | _ -> false
|
|
||||||
|
|
||||||
let validate_pagespec spec =
|
|
||||||
validate_pagespec_inner 100 spec
|
|
||||||
|
|
||||||
let rec parse_pagespec_without_pdf_inner n spec =
|
|
||||||
try
|
|
||||||
parse_pagespec_inner n (Pdf.empty ()) spec
|
|
||||||
with
|
|
||||||
PageSpecUnknownPage _ ->
|
|
||||||
if n < 500000
|
|
||||||
then parse_pagespec_without_pdf_inner (n * 2) spec
|
|
||||||
else raise (Pdf.PDFError "PageSpecUnknownPage")
|
|
||||||
|
|
||||||
let parse_pagespec_without_pdf spec =
|
|
||||||
parse_pagespec_without_pdf_inner 100 spec
|
|
||||||
|
|
||||||
(* Convert an integer list representing a set to a page specification, in order. *)
|
|
||||||
let string_of_pagespec pdf = function
|
|
||||||
| [] -> ""
|
|
||||||
| is ->
|
|
||||||
let iseven len is =
|
|
||||||
drop_odds (ilist 1 len) = is
|
|
||||||
in let isodd len is =
|
|
||||||
really_drop_evens (ilist 1 len) = is
|
|
||||||
in let isall len is =
|
|
||||||
ilist 1 len = is
|
|
||||||
in let is = sort compare is
|
|
||||||
in let len = Pdfpage.endpage pdf in
|
|
||||||
let rec mkranges prev = function
|
|
||||||
| [] -> map extremes (rev (map rev prev))
|
|
||||||
| h::t ->
|
|
||||||
match prev with
|
|
||||||
| (ph::pht)::pt when h = ph + 1 -> mkranges ((h::ph::pht)::pt) t
|
|
||||||
| (_::_)::_ -> mkranges ([h]::prev) t
|
|
||||||
| []::_ -> assert false
|
|
||||||
| [] -> mkranges [[h]] t
|
|
||||||
in
|
|
||||||
if iseven len is && len > 3 then "even" else
|
|
||||||
if isodd len is && len > 2 then "odd" else
|
|
||||||
if isall len is then "all" else
|
|
||||||
let ranges = mkranges [] is in
|
|
||||||
let rangestrings =
|
|
||||||
map
|
|
||||||
(function (s, e) ->
|
|
||||||
if s = e
|
|
||||||
then string_of_int s
|
|
||||||
else string_of_int s ^ "-" ^ string_of_int e)
|
|
||||||
ranges
|
|
||||||
in
|
|
||||||
fold_left ( ^ ) "" (interleave "," rangestrings)
|
|
||||||
|
|
||||||
let string_of_range r =
|
|
||||||
fold_left (fun a b -> a ^ " " ^ b) "" (map string_of_int r)
|
|
||||||
|
|
||||||
let print_pdf_objs pdf =
|
let print_pdf_objs pdf =
|
||||||
Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict);
|
Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict);
|
||||||
|
@ -3114,7 +2901,7 @@ let list_page_annotations encoding pdf num page =
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
|
|
||||||
let list_annotations encoding pdf =
|
let list_annotations encoding pdf =
|
||||||
let range = parse_pagespec pdf "all" in
|
let range = Cpdfpagespec.parse_pagespec pdf "all" in
|
||||||
iter_pages (list_page_annotations encoding pdf) pdf range
|
iter_pages (list_page_annotations encoding pdf) pdf range
|
||||||
|
|
||||||
let get_annotations encoding pdf =
|
let get_annotations encoding pdf =
|
||||||
|
|
27
cpdf.mli
27
cpdf.mli
|
@ -53,33 +53,6 @@ val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit
|
||||||
(** Same as [process_pages] but return the list of outputs of the map function. *)
|
(** Same as [process_pages] but return the list of outputs of the map function. *)
|
||||||
val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list
|
val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list
|
||||||
|
|
||||||
(** {2 Page specifications and ranges } *)
|
|
||||||
|
|
||||||
(** Here are the rules for building input ranges:
|
|
||||||
|
|
||||||
{ul
|
|
||||||
{- A comma (,) allows one to specify several ranges, e.g. 1-2,4-5.}
|
|
||||||
{- The word end represents the last page number. }
|
|
||||||
{- The words odd and even can be used in place of or at the end of a page range to restrict to just the odd or even pages. }
|
|
||||||
{- The word reverse is the same as end-1.}
|
|
||||||
{- The word all is the same as 1-end.}
|
|
||||||
{- A range must contain no spaces.}
|
|
||||||
{- A tilde (~) defines a page number counting from the end of the document rather than the beginning. Page ~1 is the last page, ~2 the penultimate page etc.}
|
|
||||||
}
|
|
||||||
*)
|
|
||||||
|
|
||||||
(** Parse a (valid) page specification to a page range *)
|
|
||||||
val parse_pagespec : Pdf.t -> string -> int list
|
|
||||||
|
|
||||||
(** Return a string for the given range. Knows how to identify all, odd, even,
|
|
||||||
x-y ranges etc. *)
|
|
||||||
val string_of_pagespec : Pdf.t -> int list -> string
|
|
||||||
|
|
||||||
(** Is a page specification, in theory, valid? This is the most we can find out
|
|
||||||
without supplying a PDF, and thus knowing how many pages there are in it. *)
|
|
||||||
val validate_pagespec : string -> bool
|
|
||||||
|
|
||||||
val parse_pagespec_without_pdf : string -> int list
|
|
||||||
|
|
||||||
(** Compresses all streams in the PDF document which are uncompressed, using
|
(** Compresses all streams in the PDF document which are uncompressed, using
|
||||||
/FlateDecode, leaving out metadata. If the PDF is encrypted, does nothing. *)
|
/FlateDecode, leaving out metadata. If the PDF is encrypted, does nothing. *)
|
||||||
|
|
|
@ -62,7 +62,7 @@ let soft_error s =
|
||||||
if not !stay_on_error then exit 1 else raise StayOnError
|
if not !stay_on_error then exit 1 else raise StayOnError
|
||||||
|
|
||||||
let parse_pagespec pdf spec =
|
let parse_pagespec pdf spec =
|
||||||
try Cpdf.parse_pagespec pdf spec with
|
try Cpdfpagespec.parse_pagespec pdf spec with
|
||||||
Failure x -> error x
|
Failure x -> error x
|
||||||
|
|
||||||
(* We allow an operation such as ScaleToFit on a range such as 'portrait' to be silently null to allow, for example:
|
(* We allow an operation such as ScaleToFit on a range such as 'portrait' to be silently null to allow, for example:
|
||||||
|
@ -70,7 +70,7 @@ let parse_pagespec pdf spec =
|
||||||
cpdf -scale-to-fit a4portrait in.pdf portrait AND -scale-to-fit a4landscape landscape -o out.pdf
|
cpdf -scale-to-fit a4portrait in.pdf portrait AND -scale-to-fit a4landscape landscape -o out.pdf
|
||||||
*)
|
*)
|
||||||
let parse_pagespec_allow_empty pdf spec =
|
let parse_pagespec_allow_empty pdf spec =
|
||||||
try Cpdf.parse_pagespec pdf spec with
|
try Cpdfpagespec.parse_pagespec pdf spec with
|
||||||
Pdf.PDFError ("Page range specifies no pages") -> []
|
Pdf.PDFError ("Page range specifies no pages") -> []
|
||||||
|
|
||||||
(* Operations. *)
|
(* Operations. *)
|
||||||
|
@ -1124,7 +1124,7 @@ let setbates n =
|
||||||
(* Calculate -bates automatically so that n is applied to the first page in the range *)
|
(* Calculate -bates automatically so that n is applied to the first page in the range *)
|
||||||
let setbatesrange n =
|
let setbatesrange n =
|
||||||
let first_page =
|
let first_page =
|
||||||
let range = Cpdf.parse_pagespec_without_pdf (get_pagespec ()) in
|
let range = Cpdfpagespec.parse_pagespec_without_pdf (get_pagespec ()) in
|
||||||
fold_left min max_int range
|
fold_left min max_int range
|
||||||
in
|
in
|
||||||
args.bates <- n + 1 - first_page
|
args.bates <- n + 1 - first_page
|
||||||
|
|
|
@ -0,0 +1,216 @@
|
||||||
|
open Pdfutil
|
||||||
|
|
||||||
|
(* Raised when syntax is ok, but endpage is too low. Caught by validator.
|
||||||
|
Caught and reraised as normal failure by parse_pagespec. *)
|
||||||
|
exception PageSpecUnknownPage of int
|
||||||
|
|
||||||
|
(* There would be no pages *)
|
||||||
|
exception PageSpecWouldBeNoPages
|
||||||
|
|
||||||
|
(* Raised when syntax is wrong. Caught and reraised by parse_pagespec and
|
||||||
|
validator. *)
|
||||||
|
exception PageSpecBadSyntax
|
||||||
|
|
||||||
|
(* Parsing range specifications *)
|
||||||
|
let rec splitat_commas toks =
|
||||||
|
match cleavewhile (neq (Pdfgenlex.LexName ",")) toks with
|
||||||
|
| [], [] -> []
|
||||||
|
| [], _ -> raise PageSpecBadSyntax
|
||||||
|
| some, [] -> [some]
|
||||||
|
| _::_ as before, _::rest -> before::splitat_commas rest
|
||||||
|
|
||||||
|
let is_dimension comparison {Pdfpage.mediabox = box} =
|
||||||
|
let minx, miny, maxx, maxy = Pdf.parse_rectangle box in
|
||||||
|
comparison (maxx -. minx) (maxy -. miny)
|
||||||
|
|
||||||
|
let select_dimensions comparison pdf candidates =
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
let pagenums, kept_pages =
|
||||||
|
split
|
||||||
|
(option_map
|
||||||
|
(fun (index, page) ->
|
||||||
|
if mem index candidates then Some (index, page) else None)
|
||||||
|
(combine (indx pages) pages))
|
||||||
|
in
|
||||||
|
option_map2
|
||||||
|
(fun pagenum page ->
|
||||||
|
if is_dimension comparison page then Some pagenum else None)
|
||||||
|
pagenums
|
||||||
|
kept_pages
|
||||||
|
|
||||||
|
let select_portrait = select_dimensions ( < )
|
||||||
|
|
||||||
|
let select_landscape = select_dimensions ( > )
|
||||||
|
|
||||||
|
let rec mk_numbers pdf endpage lexemes =
|
||||||
|
match lexemes with
|
||||||
|
| [Pdfgenlex.LexInt n] -> [n]
|
||||||
|
| [Pdfgenlex.LexName "end"] -> [endpage]
|
||||||
|
| [Pdfgenlex.LexInt n; Pdfgenlex.LexName "-"; Pdfgenlex.LexInt n'] ->
|
||||||
|
if n > n' then rev (ilist n' n) else ilist n n'
|
||||||
|
| [Pdfgenlex.LexName "end"; Pdfgenlex.LexName "-"; Pdfgenlex.LexInt n] ->
|
||||||
|
if n <= endpage
|
||||||
|
then rev (ilist n endpage)
|
||||||
|
else raise (PageSpecUnknownPage n)
|
||||||
|
| [Pdfgenlex.LexInt n; Pdfgenlex.LexName "-"; Pdfgenlex.LexName "end"] ->
|
||||||
|
if n <= endpage
|
||||||
|
then ilist n endpage
|
||||||
|
else raise (PageSpecUnknownPage n)
|
||||||
|
| [Pdfgenlex.LexName "end"; Pdfgenlex.LexName "-"; Pdfgenlex.LexName "end"] ->
|
||||||
|
[endpage]
|
||||||
|
| [Pdfgenlex.LexName "even"] ->
|
||||||
|
drop_odds (ilist 1 endpage)
|
||||||
|
| [Pdfgenlex.LexName "portrait"] ->
|
||||||
|
select_portrait pdf (ilist 1 endpage)
|
||||||
|
| [Pdfgenlex.LexName "landscape"] ->
|
||||||
|
select_landscape pdf (ilist 1 endpage)
|
||||||
|
| [Pdfgenlex.LexName "odd"] ->
|
||||||
|
really_drop_evens (ilist 1 endpage)
|
||||||
|
| [Pdfgenlex.LexName "all"] ->
|
||||||
|
ilist 1 endpage
|
||||||
|
| [Pdfgenlex.LexName "reverse"] ->
|
||||||
|
rev (ilist 1 endpage)
|
||||||
|
| toks ->
|
||||||
|
let ranges = splitat_commas toks in
|
||||||
|
if ranges = [toks] then raise PageSpecBadSyntax else
|
||||||
|
flatten (map (mk_numbers pdf endpage) ranges)
|
||||||
|
|
||||||
|
(* Space dashes and commas *)
|
||||||
|
let rec add_spaces = function
|
||||||
|
| [] -> []
|
||||||
|
| ('-' | ',') as h::t -> ' '::h::' '::add_spaces t
|
||||||
|
| h::t -> h::add_spaces t
|
||||||
|
|
||||||
|
let space_string s =
|
||||||
|
implode (add_spaces (explode s))
|
||||||
|
|
||||||
|
let fixup_negatives endpage = function
|
||||||
|
| Pdfgenlex.LexName s when String.length s > 1 && s.[0] = '~' ->
|
||||||
|
Pdfgenlex.LexInt (endpage + 1 + ~-(int_of_string (implode (tl (explode s)))))
|
||||||
|
| x -> x
|
||||||
|
|
||||||
|
let invert_range endpage r =
|
||||||
|
option_map (fun p -> if mem p r then None else Some p) (ilist 1 endpage)
|
||||||
|
|
||||||
|
let rec parse_pagespec_inner endpage pdf spec =
|
||||||
|
let spec = space_string spec in
|
||||||
|
if endpage < 1 then raise (Pdf.PDFError "This PDF file has no pages and is therefore malformed") else
|
||||||
|
let numbers =
|
||||||
|
try
|
||||||
|
match explode spec with
|
||||||
|
| 'N'::'O'::'T'::r ->
|
||||||
|
invert_range endpage (parse_pagespec_inner endpage pdf (implode r))
|
||||||
|
| _ ->
|
||||||
|
match rev (explode spec) with
|
||||||
|
| ['n'; 'e'; 'v'; 'e'] ->
|
||||||
|
keep even (ilist 1 endpage)
|
||||||
|
| ['d'; 'd'; 'o'] ->
|
||||||
|
keep odd (ilist 1 endpage)
|
||||||
|
| ['t'; 'i'; 'a'; 'r'; 't'; 'r'; 'o'; 'p'] ->
|
||||||
|
select_portrait pdf (ilist 1 endpage)
|
||||||
|
| ['e'; 'p'; 'a'; 'c'; 's'; 'd'; 'n'; 'a'; 'l'] ->
|
||||||
|
select_landscape pdf (ilist 1 endpage)
|
||||||
|
| 't'::'i'::'a'::'r'::'t'::'r'::'o'::'p'::more ->
|
||||||
|
select_portrait
|
||||||
|
pdf
|
||||||
|
(mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string (implode (rev more)))))
|
||||||
|
| 'e'::'p'::'a'::'c'::'s'::'d'::'n'::'a'::'l'::more ->
|
||||||
|
select_landscape
|
||||||
|
pdf
|
||||||
|
(mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string (implode (rev more)))))
|
||||||
|
| 'd'::'d'::'o'::more ->
|
||||||
|
keep
|
||||||
|
odd
|
||||||
|
(mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string (implode (rev more)))))
|
||||||
|
| 'n'::'e'::'v'::'e'::more ->
|
||||||
|
keep
|
||||||
|
even
|
||||||
|
(mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string (implode (rev more)))))
|
||||||
|
| _ ->
|
||||||
|
mk_numbers pdf endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string spec))
|
||||||
|
with
|
||||||
|
e -> raise PageSpecBadSyntax
|
||||||
|
in
|
||||||
|
if numbers = [] then raise PageSpecWouldBeNoPages else
|
||||||
|
iter
|
||||||
|
(fun n ->
|
||||||
|
if n <= 0 || n > endpage then raise (PageSpecUnknownPage n))
|
||||||
|
numbers;
|
||||||
|
numbers
|
||||||
|
|
||||||
|
let parse_pagespec pdf spec =
|
||||||
|
try parse_pagespec_inner (Pdfpage.endpage pdf) pdf spec with
|
||||||
|
| PageSpecUnknownPage n ->
|
||||||
|
raise (Pdf.PDFError ("Page " ^ string_of_int n ^ " does not exist."))
|
||||||
|
| PageSpecWouldBeNoPages ->
|
||||||
|
raise (Pdf.PDFError ("Page range specifies no pages"))
|
||||||
|
| e ->
|
||||||
|
raise
|
||||||
|
(Pdf.PDFError
|
||||||
|
("Bad page specification " ^ spec ^
|
||||||
|
". Raw error was " ^ Printexc.to_string e ^
|
||||||
|
". Last page was " ^ string_of_int (Pdfpage.endpage pdf)))
|
||||||
|
|
||||||
|
(* To validate a pagespec as being syntactically correct without the PDF in
|
||||||
|
question. This is nasty, since the parser above includes checking based on the
|
||||||
|
endpage of the PDF (which we don't have). Pass 100 as the endpage, doubling on
|
||||||
|
page range exception, bailng out above 500000. *)
|
||||||
|
let rec validate_pagespec_inner n spec =
|
||||||
|
try
|
||||||
|
ignore (parse_pagespec_inner n (Pdf.empty ()) spec); true
|
||||||
|
with
|
||||||
|
| PageSpecUnknownPage _ -> if n < 500000 then validate_pagespec_inner (n * 2) spec else false
|
||||||
|
| PageSpecBadSyntax | _ -> false
|
||||||
|
|
||||||
|
let validate_pagespec spec =
|
||||||
|
validate_pagespec_inner 100 spec
|
||||||
|
|
||||||
|
let rec parse_pagespec_without_pdf_inner n spec =
|
||||||
|
try
|
||||||
|
parse_pagespec_inner n (Pdf.empty ()) spec
|
||||||
|
with
|
||||||
|
PageSpecUnknownPage _ ->
|
||||||
|
if n < 500000
|
||||||
|
then parse_pagespec_without_pdf_inner (n * 2) spec
|
||||||
|
else raise (Pdf.PDFError "PageSpecUnknownPage")
|
||||||
|
|
||||||
|
let parse_pagespec_without_pdf spec =
|
||||||
|
parse_pagespec_without_pdf_inner 100 spec
|
||||||
|
|
||||||
|
(* Convert an integer list representing a set to a page specification, in order. *)
|
||||||
|
let string_of_pagespec pdf = function
|
||||||
|
| [] -> ""
|
||||||
|
| is ->
|
||||||
|
let iseven len is =
|
||||||
|
drop_odds (ilist 1 len) = is
|
||||||
|
in let isodd len is =
|
||||||
|
really_drop_evens (ilist 1 len) = is
|
||||||
|
in let isall len is =
|
||||||
|
ilist 1 len = is
|
||||||
|
in let is = sort compare is
|
||||||
|
in let len = Pdfpage.endpage pdf in
|
||||||
|
let rec mkranges prev = function
|
||||||
|
| [] -> map extremes (rev (map rev prev))
|
||||||
|
| h::t ->
|
||||||
|
match prev with
|
||||||
|
| (ph::pht)::pt when h = ph + 1 -> mkranges ((h::ph::pht)::pt) t
|
||||||
|
| (_::_)::_ -> mkranges ([h]::prev) t
|
||||||
|
| []::_ -> assert false
|
||||||
|
| [] -> mkranges [[h]] t
|
||||||
|
in
|
||||||
|
if iseven len is && len > 3 then "even" else
|
||||||
|
if isodd len is && len > 2 then "odd" else
|
||||||
|
if isall len is then "all" else
|
||||||
|
let ranges = mkranges [] is in
|
||||||
|
let rangestrings =
|
||||||
|
map
|
||||||
|
(function (s, e) ->
|
||||||
|
if s = e
|
||||||
|
then string_of_int s
|
||||||
|
else string_of_int s ^ "-" ^ string_of_int e)
|
||||||
|
ranges
|
||||||
|
in
|
||||||
|
fold_left ( ^ ) "" (interleave "," rangestrings)
|
||||||
|
|
||||||
|
let string_of_range r =
|
||||||
|
fold_left (fun a b -> a ^ " " ^ b) "" (map string_of_int r)
|
|
@ -0,0 +1,31 @@
|
||||||
|
(** {2 Page specifications and ranges } *)
|
||||||
|
|
||||||
|
(** Here are the rules for building input ranges:
|
||||||
|
|
||||||
|
{ul
|
||||||
|
{- A dash (-) defines ranges e.g 1-5 or 6-3.}
|
||||||
|
{- A comma (,) allows one to specify several ranges, e.g. 1-2,4-5.}
|
||||||
|
{- The word end represents the last page number.}
|
||||||
|
{- The words odd and even can be used in place of or at the end of a page range to restrict to just the odd or even pages. }
|
||||||
|
{- The words portrait and landscape can be used in place of or at the end of a page range to restrict to just those pages which are portrait or landscape. Note that the meaning of portrait and landscape does not take account of any viewing rotation in place. A page with equal width and height is considered neither portrait nor landscape.}
|
||||||
|
{- The word reverse is the same as end-1.}
|
||||||
|
{- The word all is the same as 1-end.}
|
||||||
|
{- A range must contain no spaces.}
|
||||||
|
{- A tilde (~) defines a page number counting from the end of the document rather than the beginning. Page ~1 is the last page, ~2 the penultimate page etc.}
|
||||||
|
{- Prepending NOT to a whole page range inverts it.}
|
||||||
|
}
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** Parse a (valid) page specification to a page range *)
|
||||||
|
val parse_pagespec : Pdf.t -> string -> int list
|
||||||
|
|
||||||
|
(** Same, but without a PDF. Thus 'end' etc. don't work *)
|
||||||
|
val parse_pagespec_without_pdf : string -> int list
|
||||||
|
|
||||||
|
(** Is a page specification, in theory, valid? This is the most we can find out
|
||||||
|
without supplying a PDF, and thus knowing how many pages there are in it. *)
|
||||||
|
val validate_pagespec : string -> bool
|
||||||
|
|
||||||
|
(** Return a string for the given range. Knows how to identify all, odd, even,
|
||||||
|
x-y ranges etc. *)
|
||||||
|
val string_of_pagespec : Pdf.t -> int list -> string
|
Loading…
Reference in New Issue