more
This commit is contained in:
parent
a9bfd16142
commit
c6d606136b
4
Makefile
4
Makefile
|
@ -1,8 +1,8 @@
|
||||||
# Build the cpdf command line tools and top level
|
# Build the cpdf command line tools and top level
|
||||||
MODS = cpdfyojson cpdfxmlm \
|
MODS = cpdfyojson cpdfxmlm \
|
||||||
cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
||||||
cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdf cpdffont cpdftype \
|
cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdfbookmarks cpdfpage cpdf cpdffont cpdftype \
|
||||||
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfspot cpdfpagelabels cpdfcreate cpdfcommand
|
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfspot cpdfpagelabels cpdfcreate cpdfannot cpdfcommand
|
||||||
|
|
||||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||||
|
|
||||||
|
|
650
cpdf.ml
650
cpdf.ml
|
@ -23,30 +23,6 @@ let rec process_text time text m =
|
||||||
function transforms into one which returns the identity matrix *)
|
function transforms into one which returns the identity matrix *)
|
||||||
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
|
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
|
||||||
|
|
||||||
let process_pages f pdf range =
|
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
let pages', pagenumbers, matrices = (* new page objects, page number, matrix *)
|
|
||||||
split3
|
|
||||||
(map2
|
|
||||||
(fun n p -> if mem n range then f n p else (p, n, Pdftransform.i_matrix))
|
|
||||||
(ilist 1 (length pages))
|
|
||||||
pages)
|
|
||||||
in
|
|
||||||
Pdfpage.change_pages ~matrices:(combine pagenumbers matrices) true pdf pages'
|
|
||||||
|
|
||||||
let iter_pages f pdf range =
|
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
iter2
|
|
||||||
(fun n p -> if mem n range then f n p)
|
|
||||||
(ilist 1 (length pages))
|
|
||||||
pages
|
|
||||||
|
|
||||||
let map_pages f pdf range =
|
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
option_map2
|
|
||||||
(fun n p -> if mem n range then Some (f n p) else None)
|
|
||||||
(ilist 1 (length pages))
|
|
||||||
pages
|
|
||||||
|
|
||||||
(* Add stack operators to a content stream to ensure it is composeable. On
|
(* Add stack operators to a content stream to ensure it is composeable. On
|
||||||
-fast, we don't check for Q deficit, assuming PDF is ISO. *)
|
-fast, we don't check for Q deficit, assuming PDF is ISO. *)
|
||||||
|
@ -67,7 +43,7 @@ let protect fast pdf resources content =
|
||||||
|
|
||||||
(* If a cropbox exists, make it the mediabox. If not, change nothing. *)
|
(* If a cropbox exists, make it the mediabox. If not, change nothing. *)
|
||||||
let copy_cropbox_to_mediabox pdf range =
|
let copy_cropbox_to_mediabox pdf range =
|
||||||
process_pages
|
Cpdfpage.process_pages
|
||||||
(ppstub (fun _ page ->
|
(ppstub (fun _ page ->
|
||||||
match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
|
match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
|
||||||
| Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject}
|
| Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject}
|
||||||
|
@ -113,333 +89,6 @@ let combine_pdf_resources pdf a b =
|
||||||
(Pdf.Dictionary [])
|
(Pdf.Dictionary [])
|
||||||
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)
|
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)
|
||||||
|
|
||||||
(* \section{Remove bookmarks} *)
|
|
||||||
|
|
||||||
(* \section{Add bookmarks} *)
|
|
||||||
let read_lines input =
|
|
||||||
let lines = ref [] in
|
|
||||||
try
|
|
||||||
while true do
|
|
||||||
let c = read_line input in
|
|
||||||
lines =| c
|
|
||||||
done; []
|
|
||||||
with
|
|
||||||
_ -> rev !lines
|
|
||||||
|
|
||||||
(* Verify a list of bookmarks. Positive jumps of > 1 not allowed, no numbers
|
|
||||||
smaller than 0. *)
|
|
||||||
let rec verify_bookmarks pdf lastlevel fastrefnums endpage = function
|
|
||||||
| [] -> true
|
|
||||||
| {Pdfmarks.level = level; Pdfmarks.target = target}::more ->
|
|
||||||
let page = Pdfpage.pagenumber_of_target pdf ~fastrefnums target in
|
|
||||||
level < lastlevel + 2 &&
|
|
||||||
level >= 0 &&
|
|
||||||
page <= endpage &&
|
|
||||||
page >= 0 &&
|
|
||||||
verify_bookmarks pdf level fastrefnums endpage more
|
|
||||||
|
|
||||||
let verify_bookmarks pdf lastlevel endpage marks =
|
|
||||||
let refnums = Pdf.page_reference_numbers pdf in
|
|
||||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
|
||||||
match marks with
|
|
||||||
| [] -> true
|
|
||||||
| m::more -> m.Pdfmarks.level = 0 && verify_bookmarks pdf lastlevel fastrefnums endpage more
|
|
||||||
|
|
||||||
(* Parse a line of the bookmarks file. *)
|
|
||||||
|
|
||||||
(* Un-escape things which are escaped. Quotes, newlines and backslashes *)
|
|
||||||
let rec fixup_characters prev = function
|
|
||||||
| [] -> rev prev
|
|
||||||
| '\\'::'\\'::t -> fixup_characters ('\\'::prev) t
|
|
||||||
| '\\'::'"'::t -> fixup_characters ('"'::prev) t
|
|
||||||
| '\\'::'n'::t -> fixup_characters ('\n'::prev) t
|
|
||||||
| h::t -> fixup_characters (h::prev) t
|
|
||||||
|
|
||||||
let debug_bookmark_string s =
|
|
||||||
Printf.printf "STR: %s\n" s
|
|
||||||
|
|
||||||
(* If optionaldest = [Pdfgenlex.LexString s], we parse the string, convert the
|
|
||||||
* integer to an indirect of the real page target, and then put it in. *)
|
|
||||||
let target_of_markfile_obj pdf i' pdfobj =
|
|
||||||
(*Printf.printf "Parsed %s\n" (Pdfwrite.string_of_pdf pdfobj);*)
|
|
||||||
match pdfobj with
|
|
||||||
Pdf.Array (Pdf.Integer x::more) ->
|
|
||||||
let pageobjnum = Pdfpage.page_object_number pdf i' in
|
|
||||||
begin match pageobjnum with
|
|
||||||
None ->
|
|
||||||
raise (Pdf.PDFError "bookmark_of_data: page obj num not found")
|
|
||||||
| Some p ->
|
|
||||||
Pdfdest.read_destination pdf (Pdf.Array (Pdf.Indirect p::more))
|
|
||||||
end
|
|
||||||
(* Need to deal with "null", "(string)", and "<<other thing like action" *)
|
|
||||||
| Pdf.Null -> Pdfdest.NullDestination
|
|
||||||
| Pdf.String s -> Pdfdest.read_destination pdf (Pdf.String s)
|
|
||||||
| x -> Pdfdest.Action x
|
|
||||||
|
|
||||||
let target_of_markfile_target pdf i' = function
|
|
||||||
| [Pdfgenlex.LexString s] ->
|
|
||||||
let pdfobj = Pdfread.parse_single_object s in
|
|
||||||
target_of_markfile_obj pdf i' pdfobj
|
|
||||||
| _ -> Pdfpage.target_of_pagenumber pdf i'
|
|
||||||
|
|
||||||
let bookmark_of_data pdf i s i' isopen optionaldest =
|
|
||||||
(*debug_bookmark_string s;
|
|
||||||
debug_bookmark_string (implode (fixup_characters [] (explode s)));
|
|
||||||
debug_bookmark_string (Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))));*)
|
|
||||||
{Pdfmarks.level = i;
|
|
||||||
Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s)));
|
|
||||||
Pdfmarks.target = target_of_markfile_target pdf i' optionaldest;
|
|
||||||
Pdfmarks.isopen = isopen}
|
|
||||||
|
|
||||||
let target_of_json_target pdf pagenumber target =
|
|
||||||
target_of_markfile_obj pdf pagenumber (Cpdfjson.object_of_json target)
|
|
||||||
|
|
||||||
let mark_of_json pdf = function
|
|
||||||
| `Assoc [("level", `Int level);
|
|
||||||
("text", `String text);
|
|
||||||
("page", `Int pagenumber);
|
|
||||||
("open", `Bool openstatus);
|
|
||||||
("target", target)] ->
|
|
||||||
{Pdfmarks.level = level;
|
|
||||||
Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 text;
|
|
||||||
Pdfmarks.target = target_of_json_target pdf pagenumber target;
|
|
||||||
Pdfmarks.isopen = openstatus}
|
|
||||||
| _ -> error "malformed mark in mark_of_json"
|
|
||||||
|
|
||||||
let marks_of_json pdf = function
|
|
||||||
| `List ms -> map (mark_of_json pdf) ms
|
|
||||||
| _ -> error "top level of JSON boomark file not a list"
|
|
||||||
|
|
||||||
let parse_bookmark_file_json verify pdf i =
|
|
||||||
let module J = Cpdfyojson.Safe in
|
|
||||||
try
|
|
||||||
let json =
|
|
||||||
match i.Pdfio.caml_channel with
|
|
||||||
| Some ch -> J.from_channel ch
|
|
||||||
| None ->
|
|
||||||
let content = Pdfio.string_of_bytes (Pdfio.bytes_of_input i 0 i.Pdfio.in_channel_length) in
|
|
||||||
J.from_string content
|
|
||||||
in
|
|
||||||
let marks = marks_of_json pdf json in
|
|
||||||
if verify then
|
|
||||||
if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) marks then marks else
|
|
||||||
error "Bad bookmark file (References non-existant pages or is malformed)"
|
|
||||||
else
|
|
||||||
marks
|
|
||||||
with
|
|
||||||
e ->
|
|
||||||
error (Printf.sprintf "Malformed JSON bookmark file (%s)" (Printexc.to_string e))
|
|
||||||
|
|
||||||
let parse_bookmark_file verify pdf input =
|
|
||||||
let currline = ref 0 in
|
|
||||||
try
|
|
||||||
let lines = Pdfio.read_lines input in
|
|
||||||
let currline = ref 0 in
|
|
||||||
let bookmarks = ref [] in
|
|
||||||
iter
|
|
||||||
(function line ->
|
|
||||||
match
|
|
||||||
incr currline;
|
|
||||||
Pdfgenlex.lex_string line
|
|
||||||
with
|
|
||||||
| Pdfgenlex.LexInt i::Pdfgenlex.LexString s::Pdfgenlex.LexInt i'::Pdfgenlex.LexName "open"::optionaldest ->
|
|
||||||
bookmarks =| bookmark_of_data pdf i s i' true optionaldest
|
|
||||||
| Pdfgenlex.LexInt i::Pdfgenlex.LexString s::Pdfgenlex.LexInt i'::optionaldest ->
|
|
||||||
bookmarks =| bookmark_of_data pdf i s i' false optionaldest
|
|
||||||
| [] -> () (* ignore blank lines *)
|
|
||||||
| _ ->
|
|
||||||
error ("Bad bookmark file, line " ^ (string_of_int !currline)))
|
|
||||||
lines;
|
|
||||||
let bookmarks = rev !bookmarks in
|
|
||||||
if verify then
|
|
||||||
if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) bookmarks
|
|
||||||
then bookmarks
|
|
||||||
else
|
|
||||||
error
|
|
||||||
"Bad bookmark file (References non-existant pages or is malformed)"
|
|
||||||
else
|
|
||||||
bookmarks
|
|
||||||
with
|
|
||||||
e ->
|
|
||||||
error
|
|
||||||
(Printf.sprintf
|
|
||||||
"Bad bookmark file (syntax) at line %i (error was %s)"
|
|
||||||
!currline
|
|
||||||
(Printexc.to_string e))
|
|
||||||
|
|
||||||
|
|
||||||
let add_bookmarks ~json verify input pdf =
|
|
||||||
let parsed =
|
|
||||||
(if json then parse_bookmark_file_json else parse_bookmark_file) verify pdf input in
|
|
||||||
(*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*)
|
|
||||||
Pdfmarks.add_bookmarks parsed pdf
|
|
||||||
|
|
||||||
(* List bookmarks *)
|
|
||||||
let output_string_of_target pdf fastrefnums x =
|
|
||||||
match Pdfdest.pdfobject_of_destination x with
|
|
||||||
| Pdf.Array (_::more) ->
|
|
||||||
let a =
|
|
||||||
Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more)
|
|
||||||
in
|
|
||||||
"\"" ^ Pdfwrite.string_of_pdf a ^ "\""
|
|
||||||
| x -> "\"" ^ Pdfwrite.string_of_pdf x ^ "\""
|
|
||||||
|
|
||||||
let json_of_target pdf fastrefnums x =
|
|
||||||
match Pdfdest.pdfobject_of_destination x with
|
|
||||||
| Pdf.Array (_::more) ->
|
|
||||||
let a =
|
|
||||||
Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more)
|
|
||||||
in
|
|
||||||
Cpdfjson.json_of_object pdf (fun _ -> ()) false false a
|
|
||||||
| x -> Cpdfjson.json_of_object pdf (fun _ -> ()) false false x
|
|
||||||
|
|
||||||
let output_json_marks ch calculate_page_number pdf fastrefnums marks =
|
|
||||||
let module J = Cpdfyojson.Safe in
|
|
||||||
let json_of_mark m =
|
|
||||||
`Assoc
|
|
||||||
[("level", `Int m.Pdfmarks.level);
|
|
||||||
("text", `String (Pdftext.utf8_of_pdfdocstring m.Pdfmarks.text));
|
|
||||||
("page", `Int (calculate_page_number m));
|
|
||||||
("open", `Bool m.Pdfmarks.isopen);
|
|
||||||
("target", json_of_target pdf fastrefnums m.Pdfmarks.target)]
|
|
||||||
in
|
|
||||||
let json = `List (map json_of_mark marks) in
|
|
||||||
J.pretty_to_channel ch json
|
|
||||||
|
|
||||||
(* List the bookmarks in the given range to the given output *)
|
|
||||||
let list_bookmarks ~json encoding range pdf output =
|
|
||||||
let process_stripped escaped =
|
|
||||||
let b = Buffer.create 200 in
|
|
||||||
iter
|
|
||||||
(fun x ->
|
|
||||||
if x <= 127 then Buffer.add_char b (char_of_int x))
|
|
||||||
escaped;
|
|
||||||
Buffer.contents b
|
|
||||||
in
|
|
||||||
let process_string s =
|
|
||||||
let rec replace c x y = function
|
|
||||||
| [] -> []
|
|
||||||
| h::t when h = c -> x::y::replace c x y t
|
|
||||||
| h::t -> h::replace c x y t
|
|
||||||
in
|
|
||||||
(* Convert to UTF8, raw, or stripped, and escape backslashed and quotation marks *)
|
|
||||||
let codepoints = Pdftext.codepoints_of_pdfdocstring s in
|
|
||||||
let escaped =
|
|
||||||
let bs = int_of_char '\\'
|
|
||||||
and nl = int_of_char '\n'
|
|
||||||
and n = int_of_char 'n'
|
|
||||||
and q = int_of_char '\"' in
|
|
||||||
replace q bs q (replace nl bs n (replace bs bs bs codepoints))
|
|
||||||
in
|
|
||||||
match encoding with
|
|
||||||
| Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped
|
|
||||||
| Cpdfmetadata.Stripped -> process_stripped escaped
|
|
||||||
| Cpdfmetadata.Raw -> s
|
|
||||||
in
|
|
||||||
let bookmarks = Pdfmarks.read_bookmarks pdf in
|
|
||||||
let refnums = Pdf.page_reference_numbers pdf in
|
|
||||||
let rangetable = hashset_of_list range in
|
|
||||||
let range_is_all = range = ilist 1 (Pdfpage.endpage pdf) in
|
|
||||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
|
||||||
(* Find the pagenumber of each bookmark target. If it is in the range,
|
|
||||||
* keep that bookmark. Also keep the bookmark if its target is the null
|
|
||||||
* destination. *)
|
|
||||||
let inrange =
|
|
||||||
keep
|
|
||||||
(function x ->
|
|
||||||
range_is_all ||
|
|
||||||
x.Pdfmarks.target = Pdfdest.NullDestination ||
|
|
||||||
(match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) ||
|
|
||||||
Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks
|
|
||||||
in
|
|
||||||
let calculate_page_number mark =
|
|
||||||
(* Some buggy PDFs use integers for page numbers instead of page
|
|
||||||
* object references. Adobe Reader and Preview seem to support
|
|
||||||
* this, for presumably historical reasons. So if we see a
|
|
||||||
* OtherDocPageNumber (which is what Pdfdest parses these as,
|
|
||||||
* because that's what they are legitimately, we use this as the
|
|
||||||
* page number. It is zero based, though, and we are one-based, so
|
|
||||||
* we add one. Pdfpage.pagenumber_of_target has been modified to support this.*)
|
|
||||||
Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target
|
|
||||||
in
|
|
||||||
if json then
|
|
||||||
output_json_marks stdout calculate_page_number pdf fastrefnums inrange
|
|
||||||
else
|
|
||||||
iter
|
|
||||||
(function mark ->
|
|
||||||
output.Pdfio.output_string
|
|
||||||
(Printf.sprintf "%i \"%s\" %i%s %s\n"
|
|
||||||
mark.Pdfmarks.level
|
|
||||||
(process_string mark.Pdfmarks.text)
|
|
||||||
(calculate_page_number mark)
|
|
||||||
(if mark.Pdfmarks.isopen then " open" else "")
|
|
||||||
(output_string_of_target pdf fastrefnums mark.Pdfmarks.target)))
|
|
||||||
inrange
|
|
||||||
|
|
||||||
(* o is the stamp, u is the main pdf page *)
|
|
||||||
|
|
||||||
(* \section{Split at bookmarks} *)
|
|
||||||
|
|
||||||
let get_bookmark_name pdf marks splitlevel n _ =
|
|
||||||
let refnums = Pdf.page_reference_numbers pdf in
|
|
||||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
|
||||||
match keep (function m -> n = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with
|
|
||||||
| {Pdfmarks.text = title}::_ -> Cpdfattach.remove_unsafe_characters Cpdfmetadata.UTF8 title
|
|
||||||
| _ -> ""
|
|
||||||
|
|
||||||
(* Find the stem of a filename *)
|
|
||||||
(*let stem s =
|
|
||||||
implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s))))))*)
|
|
||||||
|
|
||||||
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
|
|
||||||
let bookmark_pages level pdf =
|
|
||||||
let refnums = Pdf.page_reference_numbers pdf in
|
|
||||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
|
||||||
setify_preserving_order
|
|
||||||
(option_map
|
|
||||||
(function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target ~fastrefnums pdf l.Pdfmarks.target) | _ -> None)
|
|
||||||
(Pdfmarks.read_bookmarks pdf))
|
|
||||||
|
|
||||||
(* Called from cpdflib.ml - different from above *)
|
|
||||||
let split_on_bookmarks pdf level =
|
|
||||||
let points = lose (eq 0) (map pred (bookmark_pages level pdf))
|
|
||||||
in let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
let ranges = splitat points (indx pdf_pages) in
|
|
||||||
map (fun rs -> Pdfpage.pdf_of_pages pdf rs) ranges
|
|
||||||
|
|
||||||
(* Output information for each page *)
|
|
||||||
let output_page_info pdf range =
|
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf
|
|
||||||
and labels = Pdfpagelabels.read pdf in
|
|
||||||
let getbox page box =
|
|
||||||
if box = "/MediaBox" then
|
|
||||||
match page.Pdfpage.mediabox with
|
|
||||||
| Pdf.Array [a; b; c; d] ->
|
|
||||||
Printf.sprintf "%f %f %f %f"
|
|
||||||
(Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d)
|
|
||||||
| _ -> ""
|
|
||||||
else
|
|
||||||
match Pdf.lookup_direct pdf box page.Pdfpage.rest with
|
|
||||||
| Some (Pdf.Array [a; b; c; d]) ->
|
|
||||||
Printf.sprintf "%f %f %f %f"
|
|
||||||
(Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d)
|
|
||||||
| _ -> ""
|
|
||||||
and rotation page =
|
|
||||||
Pdfpage.int_of_rotation page.Pdfpage.rotate
|
|
||||||
in
|
|
||||||
iter
|
|
||||||
(fun pnum ->
|
|
||||||
let page = select pnum pages in
|
|
||||||
Printf.printf "Page %i:\n" pnum;
|
|
||||||
Printf.printf "Label: %s\n"
|
|
||||||
(try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> "");
|
|
||||||
Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox");
|
|
||||||
Printf.printf "CropBox: %s\n" (getbox page "/CropBox");
|
|
||||||
Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox");
|
|
||||||
Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox");
|
|
||||||
Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox");
|
|
||||||
Printf.printf "Rotation: %i\n" (rotation page))
|
|
||||||
range
|
|
||||||
|
|
||||||
(* Does the page have a defined box e.g "/CropBox" *)
|
(* Does the page have a defined box e.g "/CropBox" *)
|
||||||
let hasbox pdf page boxname =
|
let hasbox pdf page boxname =
|
||||||
|
@ -450,47 +99,6 @@ let hasbox pdf page boxname =
|
||||||
| Some _ -> true
|
| Some _ -> true
|
||||||
| _ -> false
|
| _ -> false
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(* List fonts *)
|
|
||||||
let list_font pdf page (name, dict) =
|
|
||||||
let subtype =
|
|
||||||
match Pdf.lookup_direct pdf "/Subtype" dict with
|
|
||||||
| Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n)
|
|
||||||
| _ -> ""
|
|
||||||
in let basefont =
|
|
||||||
match Pdf.lookup_direct pdf "/BaseFont" dict with
|
|
||||||
| Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n)
|
|
||||||
| _ -> ""
|
|
||||||
in let encoding =
|
|
||||||
match Pdf.lookup_direct pdf "/Encoding" dict with
|
|
||||||
| Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n)
|
|
||||||
| _ -> ""
|
|
||||||
in
|
|
||||||
(page, name, subtype, basefont, encoding)
|
|
||||||
|
|
||||||
let list_fonts pdf range =
|
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
flatten
|
|
||||||
(map
|
|
||||||
(fun (num, page) ->
|
|
||||||
if mem num range then
|
|
||||||
begin match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
|
||||||
| Some (Pdf.Dictionary fontdict) ->
|
|
||||||
map (list_font pdf num) fontdict
|
|
||||||
| _ -> []
|
|
||||||
end
|
|
||||||
else
|
|
||||||
[])
|
|
||||||
(combine (ilist 1 (length pages)) pages))
|
|
||||||
|
|
||||||
let string_of_font (p, n, s, b, e) =
|
|
||||||
Printf.sprintf "%i %s %s %s %s\n" p n s b e
|
|
||||||
|
|
||||||
let print_fonts pdf range =
|
|
||||||
flprint
|
|
||||||
(fold_left ( ^ ) "" (map string_of_font (list_fonts pdf range)))
|
|
||||||
|
|
||||||
(* \section{Superimpose text, page numbers etc.} *)
|
(* \section{Superimpose text, page numbers etc.} *)
|
||||||
|
|
||||||
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
|
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
|
||||||
|
@ -744,9 +352,7 @@ let extract_page_text only_fontsize pdf _ page =
|
||||||
(* For each page, extract all the ops with text in them, and concatenate it all together *)
|
(* For each page, extract all the ops with text in them, and concatenate it all together *)
|
||||||
let extract_text extract_text_font_size pdf range =
|
let extract_text extract_text_font_size pdf range =
|
||||||
fold_left (fun x y -> x ^ (if x <> "" && y <> "" then "\n" else "") ^ y) ""
|
fold_left (fun x y -> x ^ (if x <> "" && y <> "" then "\n" else "") ^ y) ""
|
||||||
(map_pages (extract_page_text extract_text_font_size pdf) pdf range)
|
(Cpdfpage.map_pages (extract_page_text extract_text_font_size pdf) pdf range)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
let addtext
|
let addtext
|
||||||
metrics lines linewidth outline fast colour fontname embed bates batespad fontsize font
|
metrics lines linewidth outline fast colour fontname embed bates batespad fontsize font
|
||||||
|
@ -883,9 +489,9 @@ let addtext
|
||||||
else Pdfpage.postpend_operators pdf ops ~fast:fast page
|
else Pdfpage.postpend_operators pdf ops ~fast:fast page
|
||||||
in
|
in
|
||||||
if metrics then
|
if metrics then
|
||||||
(ignore (iter_pages (fun a b -> ignore (addtext_page a b)) pdf pages); pdf)
|
(ignore (Cpdfpage.iter_pages (fun a b -> ignore (addtext_page a b)) pdf pages); pdf)
|
||||||
else
|
else
|
||||||
process_pages (ppstub addtext_page) pdf pages
|
Cpdfpage.process_pages (ppstub addtext_page) pdf pages
|
||||||
|
|
||||||
(* Prev is a list of lists of characters *)
|
(* Prev is a list of lists of characters *)
|
||||||
let split_at_newline t =
|
let split_at_newline t =
|
||||||
|
@ -1050,8 +656,7 @@ let removetext range pdf =
|
||||||
let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in
|
let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in
|
||||||
[Pdfops.stream_of_ops (remove_stamps [] ops)]}
|
[Pdfops.stream_of_ops (remove_stamps [] ops)]}
|
||||||
in
|
in
|
||||||
process_pages (ppstub removetext_page) pdf range
|
Cpdfpage.process_pages (ppstub removetext_page) pdf range
|
||||||
|
|
||||||
|
|
||||||
(* \section{Shift page data} *)
|
(* \section{Shift page data} *)
|
||||||
let make_mediabox (xmin, ymin, xmax, ymax) =
|
let make_mediabox (xmin, ymin, xmax, ymax) =
|
||||||
|
@ -1255,7 +860,7 @@ let shift_page ?(fast=false) dxdylist pdf pnum page =
|
||||||
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, Pdftransform.mktranslate dx dy)
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, Pdftransform.mktranslate dx dy)
|
||||||
|
|
||||||
let shift_pdf ?(fast=false) dxdylist pdf range =
|
let shift_pdf ?(fast=false) dxdylist pdf range =
|
||||||
process_pages (shift_page ~fast dxdylist pdf) pdf range
|
Cpdfpage.process_pages (shift_page ~fast dxdylist pdf) pdf range
|
||||||
|
|
||||||
(* Change a page's media box so its minimum x and y are 0, making other
|
(* Change a page's media box so its minimum x and y are 0, making other
|
||||||
operations simpler to think about. Any shift that is done is reflected in
|
operations simpler to think about. Any shift that is done is reflected in
|
||||||
|
@ -1288,14 +893,14 @@ let vflip_pdf ?(fast=false) pdf range =
|
||||||
Pdftransform.matrix_of_op
|
Pdftransform.matrix_of_op
|
||||||
(Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.))
|
(Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.))
|
||||||
in
|
in
|
||||||
process_pages (flip_page ~fast transform_op pdf) pdf range
|
Cpdfpage.process_pages (flip_page ~fast transform_op pdf) pdf range
|
||||||
|
|
||||||
let hflip_pdf ?(fast=false) pdf range =
|
let hflip_pdf ?(fast=false) pdf range =
|
||||||
let transform_op minx _ maxx _ =
|
let transform_op minx _ maxx _ =
|
||||||
Pdftransform.matrix_of_op
|
Pdftransform.matrix_of_op
|
||||||
(Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.))
|
(Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.))
|
||||||
in
|
in
|
||||||
process_pages (flip_page ~fast transform_op pdf) pdf range
|
Cpdfpage.process_pages (flip_page ~fast transform_op pdf) pdf range
|
||||||
|
|
||||||
let stamp_shift_of_position topline midline sw sh w h p =
|
let stamp_shift_of_position topline midline sw sh w h p =
|
||||||
let half x = x /. 2.
|
let half x = x /. 2.
|
||||||
|
@ -1625,7 +1230,7 @@ let set_mediabox xywhlist pdf range =
|
||||||
[Pdf.Real x; Pdf.Real y;
|
[Pdf.Real x; Pdf.Real y;
|
||||||
Pdf.Real (x +. w); Pdf.Real (y +. h)])}
|
Pdf.Real (x +. w); Pdf.Real (y +. h)])}
|
||||||
in
|
in
|
||||||
process_pages (ppstub crop_page) pdf range
|
Cpdfpage.process_pages (ppstub crop_page) pdf range
|
||||||
|
|
||||||
(* Just used by cpdflib for historical reasons *)
|
(* Just used by cpdflib for historical reasons *)
|
||||||
let setBox box minx maxx miny maxy pdf range =
|
let setBox box minx maxx miny maxy pdf range =
|
||||||
|
@ -1636,7 +1241,7 @@ let setBox box minx maxx miny maxy pdf range =
|
||||||
page.Pdfpage.rest box
|
page.Pdfpage.rest box
|
||||||
(Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])}
|
(Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])}
|
||||||
in
|
in
|
||||||
process_pages (ppstub set_box_page) pdf range
|
Cpdfpage.process_pages (ppstub set_box_page) pdf range
|
||||||
|
|
||||||
(* \section{Cropping} *)
|
(* \section{Cropping} *)
|
||||||
let crop_pdf ?(box="/CropBox") xywhlist pdf range =
|
let crop_pdf ?(box="/CropBox") xywhlist pdf range =
|
||||||
|
@ -1651,14 +1256,14 @@ let crop_pdf ?(box="/CropBox") xywhlist pdf range =
|
||||||
[Pdf.Real x; Pdf.Real y;
|
[Pdf.Real x; Pdf.Real y;
|
||||||
Pdf.Real (x +. w); Pdf.Real (y +. h)])))}
|
Pdf.Real (x +. w); Pdf.Real (y +. h)])))}
|
||||||
in
|
in
|
||||||
process_pages (ppstub crop_page) pdf range
|
Cpdfpage.process_pages (ppstub crop_page) pdf range
|
||||||
|
|
||||||
(* Clip a page to one of its boxes, or the media box if that box is not
|
(* Clip a page to one of its boxes, or the media box if that box is not
|
||||||
* present. This is a hard clip, done by using a clipping rectangle, so that
|
* present. This is a hard clip, done by using a clipping rectangle, so that
|
||||||
* the page may then be used as a stamp without extraneous material reapearing.
|
* the page may then be used as a stamp without extraneous material reapearing.
|
||||||
* *)
|
* *)
|
||||||
let hard_box pdf range boxname mediabox_if_missing fast =
|
let hard_box pdf range boxname mediabox_if_missing fast =
|
||||||
process_pages
|
Cpdfpage.process_pages
|
||||||
(ppstub (fun pagenum page ->
|
(ppstub (fun pagenum page ->
|
||||||
let minx, miny, maxx, maxy =
|
let minx, miny, maxx, maxy =
|
||||||
if boxname = "/MediaBox" then
|
if boxname = "/MediaBox" then
|
||||||
|
@ -1682,7 +1287,7 @@ let remove_cropping_pdf pdf range =
|
||||||
Pdfpage.rest =
|
Pdfpage.rest =
|
||||||
(Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")}
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")}
|
||||||
in
|
in
|
||||||
process_pages (ppstub remove_cropping_page) pdf range
|
Cpdfpage.process_pages (ppstub remove_cropping_page) pdf range
|
||||||
|
|
||||||
let remove_trim_pdf pdf range =
|
let remove_trim_pdf pdf range =
|
||||||
let remove_trim_page _ page =
|
let remove_trim_page _ page =
|
||||||
|
@ -1690,7 +1295,7 @@ let remove_trim_pdf pdf range =
|
||||||
Pdfpage.rest =
|
Pdfpage.rest =
|
||||||
(Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")}
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")}
|
||||||
in
|
in
|
||||||
process_pages (ppstub remove_trim_page) pdf range
|
Cpdfpage.process_pages (ppstub remove_trim_page) pdf range
|
||||||
|
|
||||||
let remove_art_pdf pdf range =
|
let remove_art_pdf pdf range =
|
||||||
let remove_art_page _ page =
|
let remove_art_page _ page =
|
||||||
|
@ -1698,7 +1303,7 @@ let remove_art_pdf pdf range =
|
||||||
Pdfpage.rest =
|
Pdfpage.rest =
|
||||||
(Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")}
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")}
|
||||||
in
|
in
|
||||||
process_pages (ppstub remove_art_page) pdf range
|
Cpdfpage.process_pages (ppstub remove_art_page) pdf range
|
||||||
|
|
||||||
let remove_bleed_pdf pdf range =
|
let remove_bleed_pdf pdf range =
|
||||||
let remove_bleed_page _ page =
|
let remove_bleed_page _ page =
|
||||||
|
@ -1706,7 +1311,7 @@ let remove_bleed_pdf pdf range =
|
||||||
Pdfpage.rest =
|
Pdfpage.rest =
|
||||||
(Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")}
|
(Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")}
|
||||||
in
|
in
|
||||||
process_pages (ppstub remove_bleed_page) pdf range
|
Cpdfpage.process_pages (ppstub remove_bleed_page) pdf range
|
||||||
|
|
||||||
(* \section{Rotating pages} *)
|
(* \section{Rotating pages} *)
|
||||||
let rotate_pdf r pdf range =
|
let rotate_pdf r pdf range =
|
||||||
|
@ -1714,14 +1319,14 @@ let rotate_pdf r pdf range =
|
||||||
{page with Pdfpage.rotate =
|
{page with Pdfpage.rotate =
|
||||||
Pdfpage.rotation_of_int r}
|
Pdfpage.rotation_of_int r}
|
||||||
in
|
in
|
||||||
process_pages (ppstub rotate_page) pdf range
|
Cpdfpage.process_pages (ppstub rotate_page) pdf range
|
||||||
|
|
||||||
let rotate_pdf_by r pdf range =
|
let rotate_pdf_by r pdf range =
|
||||||
let rotate_page_by _ page =
|
let rotate_page_by _ page =
|
||||||
{page with Pdfpage.rotate =
|
{page with Pdfpage.rotate =
|
||||||
Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)}
|
Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)}
|
||||||
in
|
in
|
||||||
process_pages (ppstub rotate_page_by) pdf range
|
Cpdfpage.process_pages (ppstub rotate_page_by) pdf range
|
||||||
|
|
||||||
let rotate_page_contents ~fast rotpoint r pdf pnum page =
|
let rotate_page_contents ~fast rotpoint r pdf pnum page =
|
||||||
let rotation_point =
|
let rotation_point =
|
||||||
|
@ -1744,7 +1349,7 @@ let rotate_page_contents ~fast rotpoint r pdf pnum page =
|
||||||
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr)
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr)
|
||||||
|
|
||||||
let rotate_contents ?(fast=false) r pdf range =
|
let rotate_contents ?(fast=false) r pdf range =
|
||||||
process_pages (rotate_page_contents ~fast None r pdf) pdf range
|
Cpdfpage.process_pages (rotate_page_contents ~fast None r pdf) pdf range
|
||||||
|
|
||||||
(* Return the pages from the pdf in the range, unordered. *)
|
(* Return the pages from the pdf in the range, unordered. *)
|
||||||
let select_pages range pdf =
|
let select_pages range pdf =
|
||||||
|
@ -1795,7 +1400,7 @@ let upright ?(fast=false) range pdf =
|
||||||
let page = transform_contents ~fast tr pdf page in
|
let page = transform_contents ~fast tr pdf page in
|
||||||
(rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr)
|
(rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr)
|
||||||
in
|
in
|
||||||
process_pages (upright_page pdf) pdf range
|
Cpdfpage.process_pages (upright_page pdf) pdf range
|
||||||
|
|
||||||
(* \section{Scale page data} *)
|
(* \section{Scale page data} *)
|
||||||
let scale_pdf ?(fast=false) sxsylist pdf range =
|
let scale_pdf ?(fast=false) sxsylist pdf range =
|
||||||
|
@ -1814,7 +1419,7 @@ let scale_pdf ?(fast=false) sxsylist pdf range =
|
||||||
transform_annotations pdf matrix page.Pdfpage.rest;
|
transform_annotations pdf matrix page.Pdfpage.rest;
|
||||||
(Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix)
|
(Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix)
|
||||||
in
|
in
|
||||||
process_pages scale_page pdf range
|
Cpdfpage.process_pages scale_page pdf range
|
||||||
|
|
||||||
(* Scale to fit page of size x * y *)
|
(* Scale to fit page of size x * y *)
|
||||||
let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range =
|
let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range =
|
||||||
|
@ -1855,7 +1460,7 @@ let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range =
|
||||||
(Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast
|
(Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast
|
||||||
(change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page), pnum, matrix)
|
(change_pattern_matrices_page pdf (Pdftransform.matrix_invert matrix) page), pnum, matrix)
|
||||||
in
|
in
|
||||||
process_pages scale_page_to_fit pdf range
|
Cpdfpage.process_pages scale_page_to_fit pdf range
|
||||||
|
|
||||||
(* Scale contents *)
|
(* Scale contents *)
|
||||||
let scale_page_contents ?(fast=false) scale position pdf pnum page =
|
let scale_page_contents ?(fast=false) scale position pdf pnum page =
|
||||||
|
@ -1891,177 +1496,8 @@ let scale_page_contents ?(fast=false) scale position pdf pnum page =
|
||||||
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform)
|
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform)
|
||||||
|
|
||||||
let scale_contents ?(fast=false) position scale pdf range =
|
let scale_contents ?(fast=false) position scale pdf range =
|
||||||
process_pages (scale_page_contents ~fast scale position pdf) pdf range
|
Cpdfpage.process_pages (scale_page_contents ~fast scale position pdf) pdf range
|
||||||
|
|
||||||
(* \section{List annotations} *)
|
|
||||||
let get_annotation_string encoding pdf annot =
|
|
||||||
match Pdf.lookup_direct pdf "/Contents" annot with
|
|
||||||
| Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s
|
|
||||||
| _ -> ""
|
|
||||||
|
|
||||||
let print_annotation encoding pdf num s =
|
|
||||||
let s = get_annotation_string encoding pdf s in
|
|
||||||
match s with
|
|
||||||
| "" -> ()
|
|
||||||
| s ->
|
|
||||||
flprint (Printf.sprintf "Page %d: " num);
|
|
||||||
flprint s;
|
|
||||||
flprint "\n"
|
|
||||||
|
|
||||||
let list_page_annotations encoding pdf num page =
|
|
||||||
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
|
||||||
| Some (Pdf.Array annots) ->
|
|
||||||
iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots)
|
|
||||||
| _ -> ()
|
|
||||||
|
|
||||||
let annotations_json_page pdf page pagenum =
|
|
||||||
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
|
||||||
| Some (Pdf.Array annots) ->
|
|
||||||
map
|
|
||||||
(fun annot ->
|
|
||||||
`List [`Int pagenum; Cpdfjson.json_of_object pdf (fun _ -> ()) false false annot])
|
|
||||||
(map (Pdf.direct pdf) annots)
|
|
||||||
| _ -> []
|
|
||||||
|
|
||||||
let list_annotations_json pdf =
|
|
||||||
let module J = Cpdfyojson.Safe in
|
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
let pagenums = indx pages in
|
|
||||||
let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in
|
|
||||||
J.pretty_to_channel stdout json
|
|
||||||
|
|
||||||
let list_annotations ~json encoding pdf =
|
|
||||||
let range = Cpdfpagespec.parse_pagespec pdf "all" in
|
|
||||||
if json
|
|
||||||
then list_annotations_json pdf
|
|
||||||
else iter_pages (list_page_annotations encoding pdf) pdf range
|
|
||||||
|
|
||||||
let get_annotations encoding pdf =
|
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
flatten
|
|
||||||
(map2
|
|
||||||
(fun page pagenumber ->
|
|
||||||
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
|
||||||
| Some (Pdf.Array annots) ->
|
|
||||||
let strings =
|
|
||||||
map (get_annotation_string encoding pdf) (map (Pdf.direct pdf) annots)
|
|
||||||
in
|
|
||||||
combine (many pagenumber (length strings)) strings
|
|
||||||
| _ -> [])
|
|
||||||
pages
|
|
||||||
(ilist 1 (length pages)))
|
|
||||||
|
|
||||||
(* Equalise the page lengths of two PDFs by chopping or extending the first one.
|
|
||||||
*)
|
|
||||||
let equalise_lengths a b =
|
|
||||||
let a' =
|
|
||||||
if Pdfpage.endpage a < Pdfpage.endpage b then
|
|
||||||
Pdfpage.change_pages false a
|
|
||||||
(Pdfpage.pages_of_pagetree a @
|
|
||||||
many (Pdfpage.blankpage Pdfpaper.a4) (Pdfpage.endpage b - Pdfpage.endpage a))
|
|
||||||
else if Pdfpage.endpage a > Pdfpage.endpage b then
|
|
||||||
Pdfpage.change_pages false a
|
|
||||||
(take (Pdfpage.pages_of_pagetree a) (Pdfpage.endpage b))
|
|
||||||
else a
|
|
||||||
in
|
|
||||||
a', b
|
|
||||||
|
|
||||||
(* Copy annotations *)
|
|
||||||
|
|
||||||
(* FIXME: Why does this chop the files to the same length? Should be able to
|
|
||||||
apply annotations from a longer file to a shorter? *)
|
|
||||||
|
|
||||||
(* Rewrite any annotation destinations to point to pages in the
|
|
||||||
destination file. This prevents pages being copied, and ensures the links are
|
|
||||||
correct Any Indirect link inside a /Dest is rewritten if in the table. If not
|
|
||||||
inside a /Dest, nothing is rewritten. *)
|
|
||||||
let rec renumber_in_dest table indest = function
|
|
||||||
Pdf.Indirect i ->
|
|
||||||
begin
|
|
||||||
try Pdf.Indirect (Hashtbl.find table i) with _ -> Pdf.Indirect i
|
|
||||||
end
|
|
||||||
| Pdf.Array a ->
|
|
||||||
Pdf.recurse_array (renumber_in_dest table indest) a
|
|
||||||
| Pdf.Dictionary d ->
|
|
||||||
Pdf.Dictionary
|
|
||||||
(map
|
|
||||||
(function
|
|
||||||
("/Dest", v) -> ("/Dest", renumber_in_dest table true v)
|
|
||||||
| (k, v) -> (k, renumber_in_dest table indest v))
|
|
||||||
d)
|
|
||||||
| x -> x
|
|
||||||
|
|
||||||
let renumber_in_object pdf objnum table =
|
|
||||||
Pdf.addobj_given_num
|
|
||||||
pdf (objnum, (renumber_in_dest table false (Pdf.lookup_obj pdf objnum)))
|
|
||||||
|
|
||||||
let copy_annotations_page topdf frompdf frompage topage =
|
|
||||||
match Pdf.lookup_direct frompdf "/Annots" frompage.Pdfpage.rest with
|
|
||||||
Some (Pdf.Array frompage_annots as annots) ->
|
|
||||||
let table =
|
|
||||||
hashtable_of_dictionary
|
|
||||||
(combine
|
|
||||||
(Pdf.page_reference_numbers frompdf)
|
|
||||||
(Pdf.page_reference_numbers topdf))
|
|
||||||
in
|
|
||||||
iter
|
|
||||||
(function
|
|
||||||
(* FIXME: We assume they are indirects. Must also do direct, though rare.*)
|
|
||||||
Pdf.Indirect x ->
|
|
||||||
(*Printf.printf "Copying annotation %s which is\n%s\n"
|
|
||||||
(Pdfwrite.string_of_pdf (Pdf.Indirect x))
|
|
||||||
(Pdfwrite.string_of_pdf (Pdf.direct frompdf (Pdf.Indirect
|
|
||||||
x)));*)
|
|
||||||
renumber_in_object frompdf x table
|
|
||||||
| _ -> ())
|
|
||||||
frompage_annots;
|
|
||||||
let objects_to_copy = Pdf.objects_referenced [] [] frompdf annots in
|
|
||||||
iter
|
|
||||||
(fun n ->
|
|
||||||
ignore (Pdf.addobj_given_num topdf (n, Pdf.lookup_obj frompdf n)))
|
|
||||||
objects_to_copy;
|
|
||||||
let topage_annots =
|
|
||||||
match Pdf.lookup_direct frompdf "/Annots" topage.Pdfpage.rest with
|
|
||||||
| Some (Pdf.Array annots) -> annots
|
|
||||||
| _ -> []
|
|
||||||
in
|
|
||||||
let merged_dict = Pdf.Array (frompage_annots @ topage_annots) in
|
|
||||||
let topage' =
|
|
||||||
{topage with Pdfpage.rest =
|
|
||||||
Pdf.add_dict_entry topage.Pdfpage.rest "/Annots" merged_dict}
|
|
||||||
in
|
|
||||||
topdf, topage'
|
|
||||||
| Some x -> topdf, topage
|
|
||||||
| None -> topdf, topage
|
|
||||||
|
|
||||||
let copy_annotations range frompdf topdf =
|
|
||||||
let frompdf, topdf = equalise_lengths frompdf topdf in
|
|
||||||
match Pdf.renumber_pdfs [frompdf; topdf] with
|
|
||||||
| [frompdf; topdf] ->
|
|
||||||
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
|
|
||||||
let topdf_pages = Pdfpage.pages_of_pagetree topdf in
|
|
||||||
let pdf = ref topdf
|
|
||||||
and pages = ref []
|
|
||||||
and pnum = ref 1
|
|
||||||
and frompdf_pages = ref frompdf_pages
|
|
||||||
and topdf_pages = ref topdf_pages in
|
|
||||||
(* Go through, updating pdf and collecting new pages. *)
|
|
||||||
while not (isnull !frompdf_pages) do
|
|
||||||
let frompdf_page = hd !frompdf_pages
|
|
||||||
and topdf_page = hd !topdf_pages in
|
|
||||||
let pdf', page =
|
|
||||||
if mem !pnum range
|
|
||||||
then copy_annotations_page !pdf frompdf frompdf_page topdf_page
|
|
||||||
else !pdf, topdf_page
|
|
||||||
in
|
|
||||||
pdf := pdf';
|
|
||||||
pages =| page;
|
|
||||||
incr pnum;
|
|
||||||
frompdf_pages := tl !frompdf_pages;
|
|
||||||
topdf_pages := tl !topdf_pages
|
|
||||||
done;
|
|
||||||
Pdfpage.change_pages true !pdf (rev !pages)
|
|
||||||
| _ -> assert false
|
|
||||||
|
|
||||||
let addrectangle
|
let addrectangle
|
||||||
fast (w, h) colour outline linewidth opacity position relative_to_cropbox
|
fast (w, h) colour outline linewidth opacity position relative_to_cropbox
|
||||||
|
@ -2125,7 +1561,7 @@ let addrectangle
|
||||||
then Pdfpage.prepend_operators pdf ops ~fast:fast page
|
then Pdfpage.prepend_operators pdf ops ~fast:fast page
|
||||||
else Pdfpage.postpend_operators pdf ops ~fast:fast page
|
else Pdfpage.postpend_operators pdf ops ~fast:fast page
|
||||||
in
|
in
|
||||||
process_pages (ppstub addrectangle_page) pdf range
|
Cpdfpage.process_pages (ppstub addrectangle_page) pdf range
|
||||||
|
|
||||||
|
|
||||||
(* Imposition *)
|
(* Imposition *)
|
||||||
|
@ -2497,7 +1933,7 @@ let blacktext c range pdf =
|
||||||
process_xobjects pdf page (blacktext_ops c);
|
process_xobjects pdf page (blacktext_ops c);
|
||||||
{page with Pdfpage.content = content'}
|
{page with Pdfpage.content = content'}
|
||||||
in
|
in
|
||||||
process_pages (ppstub blacktext_page) pdf range
|
Cpdfpage.process_pages (ppstub blacktext_page) pdf range
|
||||||
|
|
||||||
(* \section{Blacken lines} *)
|
(* \section{Blacken lines} *)
|
||||||
let blacklines_ops c pdf resources content =
|
let blacklines_ops c pdf resources content =
|
||||||
|
@ -2523,7 +1959,7 @@ let blacklines c range pdf =
|
||||||
process_xobjects pdf page (blacklines_ops c);
|
process_xobjects pdf page (blacklines_ops c);
|
||||||
{page with Pdfpage.content = content'}
|
{page with Pdfpage.content = content'}
|
||||||
in
|
in
|
||||||
process_pages (ppstub blacklines_page) pdf range
|
Cpdfpage.process_pages (ppstub blacklines_page) pdf range
|
||||||
|
|
||||||
(* \section{Blacken Fills} *)
|
(* \section{Blacken Fills} *)
|
||||||
let blackfills_ops c pdf resources content =
|
let blackfills_ops c pdf resources content =
|
||||||
|
@ -2549,7 +1985,7 @@ let blackfills c range pdf =
|
||||||
process_xobjects pdf page (blackfills_ops c);
|
process_xobjects pdf page (blackfills_ops c);
|
||||||
{page with Pdfpage.content = content'}
|
{page with Pdfpage.content = content'}
|
||||||
in
|
in
|
||||||
process_pages (ppstub blackfills_page) pdf range
|
Cpdfpage.process_pages (ppstub blackfills_page) pdf range
|
||||||
|
|
||||||
(* \section{Set a minimum line width to avoid dropout} *)
|
(* \section{Set a minimum line width to avoid dropout} *)
|
||||||
let thinlines range width pdf =
|
let thinlines range width pdf =
|
||||||
|
@ -2624,20 +2060,8 @@ let thinlines range width pdf =
|
||||||
let content' = [Pdfops.stream_of_ops operators] in
|
let content' = [Pdfops.stream_of_ops operators] in
|
||||||
{page with Pdfpage.content = content'}
|
{page with Pdfpage.content = content'}
|
||||||
in
|
in
|
||||||
process_pages (ppstub thinpage) pdf range
|
Cpdfpage.process_pages (ppstub thinpage) pdf range
|
||||||
|
|
||||||
(* \section{Remove annotations} *)
|
|
||||||
let remove_annotations range pdf =
|
|
||||||
let remove_annotations_page pagenum page =
|
|
||||||
if mem pagenum range then
|
|
||||||
let rest' =
|
|
||||||
Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"
|
|
||||||
in
|
|
||||||
{page with Pdfpage.rest = rest'}
|
|
||||||
else
|
|
||||||
page
|
|
||||||
in
|
|
||||||
process_pages (ppstub remove_annotations_page) pdf range
|
|
||||||
|
|
||||||
(* \section{Making draft documents} *)
|
(* \section{Making draft documents} *)
|
||||||
|
|
||||||
|
@ -2809,7 +2233,7 @@ let append_page_content_page fast s before pdf n page =
|
||||||
pdf ops ~fast page
|
pdf ops ~fast page
|
||||||
|
|
||||||
let append_page_content s before fast range pdf =
|
let append_page_content s before fast range pdf =
|
||||||
process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range
|
Cpdfpage.process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range
|
||||||
|
|
||||||
(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes.
|
(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes.
|
||||||
*
|
*
|
||||||
|
@ -2855,7 +2279,9 @@ let show_boxes_page fast pdf _ page =
|
||||||
Pdfpage.postpend_operators pdf ops ~fast page
|
Pdfpage.postpend_operators pdf ops ~fast page
|
||||||
|
|
||||||
let show_boxes ?(fast=false) pdf range =
|
let show_boxes ?(fast=false) pdf range =
|
||||||
process_pages (ppstub (show_boxes_page fast pdf)) pdf range
|
Cpdfpage.process_pages (ppstub (show_boxes_page fast pdf)) pdf range
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
let allowance = 9.
|
let allowance = 9.
|
||||||
|
|
||||||
|
@ -2887,7 +2313,7 @@ let trim_marks_page fast pdf n page =
|
||||||
page
|
page
|
||||||
|
|
||||||
let trim_marks ?(fast=false) pdf range =
|
let trim_marks ?(fast=false) pdf range =
|
||||||
process_pages (ppstub (trim_marks_page fast pdf)) pdf range
|
Cpdfpage.process_pages (ppstub (trim_marks_page fast pdf)) pdf range
|
||||||
|
|
||||||
let rec remove_all_text_ops pdf resources content =
|
let rec remove_all_text_ops pdf resources content =
|
||||||
let is_textop = function
|
let is_textop = function
|
||||||
|
@ -2989,7 +2415,7 @@ let remove_clipping pdf range =
|
||||||
process_xobjects pdf page remove_clipping_ops;
|
process_xobjects pdf page remove_clipping_ops;
|
||||||
{page with Pdfpage.content = content'}
|
{page with Pdfpage.content = content'}
|
||||||
in
|
in
|
||||||
process_pages (ppstub remove_clipping_page) pdf range
|
Cpdfpage.process_pages (ppstub remove_clipping_page) pdf range
|
||||||
|
|
||||||
(* Image resolution *)
|
(* Image resolution *)
|
||||||
type xobj =
|
type xobj =
|
||||||
|
@ -3079,7 +2505,7 @@ let rec image_resolution_page pdf page pagenum dpi (images : (int * string * xob
|
||||||
|
|
||||||
and image_resolution pdf range dpi =
|
and image_resolution pdf range dpi =
|
||||||
let images = ref [] in
|
let images = ref [] in
|
||||||
iter_pages
|
Cpdfpage.iter_pages
|
||||||
(fun pagenum page ->
|
(fun pagenum page ->
|
||||||
(* 1. Get all image names and their native resolutions from resources as string * int * int *)
|
(* 1. Get all image names and their native resolutions from resources as string * int * int *)
|
||||||
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
||||||
|
@ -3142,7 +2568,7 @@ let image_resolution pdf range dpi =
|
||||||
the contents of the mediabox will be used if the from fox is not available. If
|
the contents of the mediabox will be used if the from fox is not available. If
|
||||||
mediabox_is_missing is false, the page is unaltered. *)
|
mediabox_is_missing is false, the page is unaltered. *)
|
||||||
let copy_box f t mediabox_if_missing pdf range =
|
let copy_box f t mediabox_if_missing pdf range =
|
||||||
process_pages
|
Cpdfpage.process_pages
|
||||||
(ppstub (fun _ page ->
|
(ppstub (fun _ page ->
|
||||||
if f = "/MediaBox" then
|
if f = "/MediaBox" then
|
||||||
{page with Pdfpage.rest =
|
{page with Pdfpage.rest =
|
||||||
|
@ -3178,7 +2604,7 @@ let remove_unused_resources_page pdf n page =
|
||||||
{page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict}
|
{page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict}
|
||||||
|
|
||||||
let remove_unused_resources pdf =
|
let remove_unused_resources pdf =
|
||||||
process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))
|
Cpdfpage.process_pages (ppstub (remove_unused_resources_page pdf)) pdf (ilist 1 (Pdfpage.endpage pdf))
|
||||||
|
|
||||||
(* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *)
|
(* Indent bookmarks in each file by one and add a title bookmark pointing to the first page. *)
|
||||||
let add_bookmark_title filename use_title pdf =
|
let add_bookmark_title filename use_title pdf =
|
||||||
|
@ -3413,5 +2839,3 @@ let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf rang
|
||||||
iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms)
|
iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms)
|
||||||
pages
|
pages
|
||||||
(indx pages)
|
(indx pages)
|
||||||
|
|
||||||
|
|
||||||
|
|
55
cpdf.mli
55
cpdf.mli
|
@ -8,34 +8,9 @@ type color =
|
||||||
|
|
||||||
(** {2 Working with pages} *)
|
(** {2 Working with pages} *)
|
||||||
|
|
||||||
(** Given a function from page number and page to page, a document, and a list
|
|
||||||
of page numbers to apply it to, apply the function to all those pages. *)
|
|
||||||
val process_pages : (int -> Pdfpage.t -> Pdfpage.t * int * Pdftransform.transform_matrix) ->
|
|
||||||
Pdf.t -> int list -> Pdf.t
|
|
||||||
|
|
||||||
(** Same as [process_pages], but iterate rather than map. *)
|
|
||||||
val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit
|
|
||||||
|
|
||||||
(** Same as [process_pages] but return the list of outputs of the map function. *)
|
|
||||||
val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list
|
|
||||||
|
|
||||||
val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t
|
val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
(** {2 Bookmarks} *)
|
|
||||||
|
|
||||||
(** [parse_bookmark_file verify pdf input] parses the bookmark file in [input].
|
|
||||||
Details of the bookmark file format can be found in cpdfmanual.pdf *)
|
|
||||||
val parse_bookmark_file : bool -> Pdf.t -> Pdfio.input -> Pdfmarks.t list
|
|
||||||
|
|
||||||
(** [add_bookmarks verify input pdf] adds bookmarks from the bookmark file
|
|
||||||
give. If [verify] is given, bookmarks will be verified to ensure, for example,
|
|
||||||
that they are not out of the page range. *)
|
|
||||||
val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
|
|
||||||
output in the format specified in cpdfmanual.pdf *)
|
|
||||||
val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit
|
|
||||||
|
|
||||||
(** {2 Stamping} *)
|
(** {2 Stamping} *)
|
||||||
|
|
||||||
(** [combine_pages fast under over scaletofit swap equalize] combines the page
|
(** [combine_pages fast under over scaletofit swap equalize] combines the page
|
||||||
|
@ -51,19 +26,6 @@ val combine_pages : bool -> Pdf.t -> Pdf.t -> bool -> bool -> bool -> Pdf.t
|
||||||
[combine_pages]. *)
|
[combine_pages]. *)
|
||||||
val stamp : bool -> Cpdfposition.position -> bool -> bool -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t
|
val stamp : bool -> Cpdfposition.position -> bool -> bool -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
(** {2 Splitting PDFs} *)
|
|
||||||
|
|
||||||
(** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *)
|
|
||||||
val split_on_bookmarks : Pdf.t -> int -> Pdf.t list
|
|
||||||
|
|
||||||
(** {2 Listing fonts} *)
|
|
||||||
|
|
||||||
(** Print font list to stdout *)
|
|
||||||
val print_fonts : Pdf.t -> int list -> unit
|
|
||||||
|
|
||||||
(** Return font list. Page number, name, subtype, basefont, encoding. *)
|
|
||||||
val list_fonts : Pdf.t -> int list -> (int * string * string * string * string) list
|
|
||||||
|
|
||||||
(** {2 Adding text} *)
|
(** {2 Adding text} *)
|
||||||
|
|
||||||
(** Justification of multiline text *)
|
(** Justification of multiline text *)
|
||||||
|
@ -128,9 +90,6 @@ val removetext : int list -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
(** {2 Page geometry} *)
|
(** {2 Page geometry} *)
|
||||||
|
|
||||||
(** Print page info (Mediabox etc) to standard output. *)
|
|
||||||
val output_page_info : Pdf.t -> int list -> unit
|
|
||||||
|
|
||||||
(** True if a given page in a PDF has a given box *)
|
(** True if a given page in a PDF has a given box *)
|
||||||
val hasbox : Pdf.t -> int -> string -> bool
|
val hasbox : Pdf.t -> int -> string -> bool
|
||||||
|
|
||||||
|
@ -195,20 +154,6 @@ val trim_marks : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
(** {2 Annotations} *)
|
|
||||||
|
|
||||||
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *)
|
|
||||||
val list_annotations : json:bool -> Cpdfmetadata.encoding -> Pdf.t -> unit
|
|
||||||
|
|
||||||
(** Return the annotations as a (pagenumber, content) list *)
|
|
||||||
val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list
|
|
||||||
|
|
||||||
(** Copy the annotations on a given set of pages from a to b. b is returned. *)
|
|
||||||
val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** Remove the annotations on given pages. *)
|
|
||||||
val remove_annotations : int list -> Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** {2 Imposition} *)
|
(** {2 Imposition} *)
|
||||||
|
|
||||||
val impose : x:float -> y:float -> fit:bool -> columns:bool -> rtl:bool -> btt:bool -> center:bool -> margin:float -> spacing:float -> linewidth:float -> fast:bool -> Pdf.t -> Pdf.t
|
val impose : x:float -> y:float -> fit:bool -> columns:bool -> rtl:bool -> btt:bool -> center:bool -> margin:float -> spacing:float -> linewidth:float -> fast:bool -> Pdf.t -> Pdf.t
|
||||||
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
open Pdfutil
|
||||||
|
|
||||||
|
(* For uses of process_pages which don't need to deal with matrices, this
|
||||||
|
function transforms into one which returns the identity matrix *)
|
||||||
|
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
|
||||||
|
|
||||||
|
(* \section{List annotations} *)
|
||||||
|
let get_annotation_string encoding pdf annot =
|
||||||
|
match Pdf.lookup_direct pdf "/Contents" annot with
|
||||||
|
| Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s
|
||||||
|
| _ -> ""
|
||||||
|
|
||||||
|
let print_annotation encoding pdf num s =
|
||||||
|
let s = get_annotation_string encoding pdf s in
|
||||||
|
match s with
|
||||||
|
| "" -> ()
|
||||||
|
| s ->
|
||||||
|
flprint (Printf.sprintf "Page %d: " num);
|
||||||
|
flprint s;
|
||||||
|
flprint "\n"
|
||||||
|
|
||||||
|
let list_page_annotations encoding pdf num page =
|
||||||
|
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
||||||
|
| Some (Pdf.Array annots) ->
|
||||||
|
iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots)
|
||||||
|
| _ -> ()
|
||||||
|
|
||||||
|
let annotations_json_page pdf page pagenum =
|
||||||
|
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
||||||
|
| Some (Pdf.Array annots) ->
|
||||||
|
map
|
||||||
|
(fun annot ->
|
||||||
|
`List [`Int pagenum; Cpdfjson.json_of_object pdf (fun _ -> ()) false false annot])
|
||||||
|
(map (Pdf.direct pdf) annots)
|
||||||
|
| _ -> []
|
||||||
|
|
||||||
|
let list_annotations_json pdf =
|
||||||
|
let module J = Cpdfyojson.Safe in
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
let pagenums = indx pages in
|
||||||
|
let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in
|
||||||
|
J.pretty_to_channel stdout json
|
||||||
|
|
||||||
|
let list_annotations ~json encoding pdf =
|
||||||
|
let range = Cpdfpagespec.parse_pagespec pdf "all" in
|
||||||
|
if json
|
||||||
|
then list_annotations_json pdf
|
||||||
|
else Cpdfpage.iter_pages (list_page_annotations encoding pdf) pdf range
|
||||||
|
|
||||||
|
let get_annotations encoding pdf =
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
flatten
|
||||||
|
(map2
|
||||||
|
(fun page pagenumber ->
|
||||||
|
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
|
||||||
|
| Some (Pdf.Array annots) ->
|
||||||
|
let strings =
|
||||||
|
map (get_annotation_string encoding pdf) (map (Pdf.direct pdf) annots)
|
||||||
|
in
|
||||||
|
combine (many pagenumber (length strings)) strings
|
||||||
|
| _ -> [])
|
||||||
|
pages
|
||||||
|
(ilist 1 (length pages)))
|
||||||
|
|
||||||
|
(* Equalise the page lengths of two PDFs by chopping or extending the first one.
|
||||||
|
*)
|
||||||
|
let equalise_lengths a b =
|
||||||
|
let a' =
|
||||||
|
if Pdfpage.endpage a < Pdfpage.endpage b then
|
||||||
|
Pdfpage.change_pages false a
|
||||||
|
(Pdfpage.pages_of_pagetree a @
|
||||||
|
many (Pdfpage.blankpage Pdfpaper.a4) (Pdfpage.endpage b - Pdfpage.endpage a))
|
||||||
|
else if Pdfpage.endpage a > Pdfpage.endpage b then
|
||||||
|
Pdfpage.change_pages false a
|
||||||
|
(take (Pdfpage.pages_of_pagetree a) (Pdfpage.endpage b))
|
||||||
|
else a
|
||||||
|
in
|
||||||
|
a', b
|
||||||
|
|
||||||
|
(* Copy annotations *)
|
||||||
|
|
||||||
|
(* FIXME: Why does this chop the files to the same length? Should be able to
|
||||||
|
apply annotations from a longer file to a shorter? *)
|
||||||
|
|
||||||
|
(* Rewrite any annotation destinations to point to pages in the
|
||||||
|
destination file. This prevents pages being copied, and ensures the links are
|
||||||
|
correct Any Indirect link inside a /Dest is rewritten if in the table. If not
|
||||||
|
inside a /Dest, nothing is rewritten. *)
|
||||||
|
let rec renumber_in_dest table indest = function
|
||||||
|
Pdf.Indirect i ->
|
||||||
|
begin
|
||||||
|
try Pdf.Indirect (Hashtbl.find table i) with _ -> Pdf.Indirect i
|
||||||
|
end
|
||||||
|
| Pdf.Array a ->
|
||||||
|
Pdf.recurse_array (renumber_in_dest table indest) a
|
||||||
|
| Pdf.Dictionary d ->
|
||||||
|
Pdf.Dictionary
|
||||||
|
(map
|
||||||
|
(function
|
||||||
|
("/Dest", v) -> ("/Dest", renumber_in_dest table true v)
|
||||||
|
| (k, v) -> (k, renumber_in_dest table indest v))
|
||||||
|
d)
|
||||||
|
| x -> x
|
||||||
|
|
||||||
|
let renumber_in_object pdf objnum table =
|
||||||
|
Pdf.addobj_given_num
|
||||||
|
pdf (objnum, (renumber_in_dest table false (Pdf.lookup_obj pdf objnum)))
|
||||||
|
|
||||||
|
let copy_annotations_page topdf frompdf frompage topage =
|
||||||
|
match Pdf.lookup_direct frompdf "/Annots" frompage.Pdfpage.rest with
|
||||||
|
Some (Pdf.Array frompage_annots as annots) ->
|
||||||
|
let table =
|
||||||
|
hashtable_of_dictionary
|
||||||
|
(combine
|
||||||
|
(Pdf.page_reference_numbers frompdf)
|
||||||
|
(Pdf.page_reference_numbers topdf))
|
||||||
|
in
|
||||||
|
iter
|
||||||
|
(function
|
||||||
|
(* FIXME: We assume they are indirects. Must also do direct, though rare.*)
|
||||||
|
Pdf.Indirect x ->
|
||||||
|
(*Printf.printf "Copying annotation %s which is\n%s\n"
|
||||||
|
(Pdfwrite.string_of_pdf (Pdf.Indirect x))
|
||||||
|
(Pdfwrite.string_of_pdf (Pdf.direct frompdf (Pdf.Indirect
|
||||||
|
x)));*)
|
||||||
|
renumber_in_object frompdf x table
|
||||||
|
| _ -> ())
|
||||||
|
frompage_annots;
|
||||||
|
let objects_to_copy = Pdf.objects_referenced [] [] frompdf annots in
|
||||||
|
iter
|
||||||
|
(fun n ->
|
||||||
|
ignore (Pdf.addobj_given_num topdf (n, Pdf.lookup_obj frompdf n)))
|
||||||
|
objects_to_copy;
|
||||||
|
let topage_annots =
|
||||||
|
match Pdf.lookup_direct frompdf "/Annots" topage.Pdfpage.rest with
|
||||||
|
| Some (Pdf.Array annots) -> annots
|
||||||
|
| _ -> []
|
||||||
|
in
|
||||||
|
let merged_dict = Pdf.Array (frompage_annots @ topage_annots) in
|
||||||
|
let topage' =
|
||||||
|
{topage with Pdfpage.rest =
|
||||||
|
Pdf.add_dict_entry topage.Pdfpage.rest "/Annots" merged_dict}
|
||||||
|
in
|
||||||
|
topdf, topage'
|
||||||
|
| Some x -> topdf, topage
|
||||||
|
| None -> topdf, topage
|
||||||
|
|
||||||
|
let copy_annotations range frompdf topdf =
|
||||||
|
let frompdf, topdf = equalise_lengths frompdf topdf in
|
||||||
|
match Pdf.renumber_pdfs [frompdf; topdf] with
|
||||||
|
| [frompdf; topdf] ->
|
||||||
|
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
|
||||||
|
let topdf_pages = Pdfpage.pages_of_pagetree topdf in
|
||||||
|
let pdf = ref topdf
|
||||||
|
and pages = ref []
|
||||||
|
and pnum = ref 1
|
||||||
|
and frompdf_pages = ref frompdf_pages
|
||||||
|
and topdf_pages = ref topdf_pages in
|
||||||
|
(* Go through, updating pdf and collecting new pages. *)
|
||||||
|
while not (isnull !frompdf_pages) do
|
||||||
|
let frompdf_page = hd !frompdf_pages
|
||||||
|
and topdf_page = hd !topdf_pages in
|
||||||
|
let pdf', page =
|
||||||
|
if mem !pnum range
|
||||||
|
then copy_annotations_page !pdf frompdf frompdf_page topdf_page
|
||||||
|
else !pdf, topdf_page
|
||||||
|
in
|
||||||
|
pdf := pdf';
|
||||||
|
pages =| page;
|
||||||
|
incr pnum;
|
||||||
|
frompdf_pages := tl !frompdf_pages;
|
||||||
|
topdf_pages := tl !topdf_pages
|
||||||
|
done;
|
||||||
|
Pdfpage.change_pages true !pdf (rev !pages)
|
||||||
|
| _ -> assert false
|
||||||
|
|
||||||
|
(* \section{Remove annotations} *)
|
||||||
|
let remove_annotations range pdf =
|
||||||
|
let remove_annotations_page pagenum page =
|
||||||
|
if mem pagenum range then
|
||||||
|
let rest' =
|
||||||
|
Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"
|
||||||
|
in
|
||||||
|
{page with Pdfpage.rest = rest'}
|
||||||
|
else
|
||||||
|
page
|
||||||
|
in
|
||||||
|
Cpdfpage.process_pages (ppstub remove_annotations_page) pdf range
|
|
@ -0,0 +1,13 @@
|
||||||
|
(** {2 Annotations} *)
|
||||||
|
|
||||||
|
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *)
|
||||||
|
val list_annotations : json:bool -> Cpdfmetadata.encoding -> Pdf.t -> unit
|
||||||
|
|
||||||
|
(** Return the annotations as a (pagenumber, content) list *)
|
||||||
|
val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list
|
||||||
|
|
||||||
|
(** Copy the annotations on a given set of pages from a to b. b is returned. *)
|
||||||
|
val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** Remove the annotations on given pages. *)
|
||||||
|
val remove_annotations : int list -> Pdf.t -> Pdf.t
|
|
@ -0,0 +1,294 @@
|
||||||
|
open Pdfutil
|
||||||
|
open Cpdferror
|
||||||
|
|
||||||
|
(* \section{Add bookmarks} *)
|
||||||
|
let read_lines input =
|
||||||
|
let lines = ref [] in
|
||||||
|
try
|
||||||
|
while true do
|
||||||
|
let c = read_line input in
|
||||||
|
lines =| c
|
||||||
|
done; []
|
||||||
|
with
|
||||||
|
_ -> rev !lines
|
||||||
|
|
||||||
|
(* Verify a list of bookmarks. Positive jumps of > 1 not allowed, no numbers
|
||||||
|
smaller than 0. *)
|
||||||
|
let rec verify_bookmarks pdf lastlevel fastrefnums endpage = function
|
||||||
|
| [] -> true
|
||||||
|
| {Pdfmarks.level = level; Pdfmarks.target = target}::more ->
|
||||||
|
let page = Pdfpage.pagenumber_of_target pdf ~fastrefnums target in
|
||||||
|
level < lastlevel + 2 &&
|
||||||
|
level >= 0 &&
|
||||||
|
page <= endpage &&
|
||||||
|
page >= 0 &&
|
||||||
|
verify_bookmarks pdf level fastrefnums endpage more
|
||||||
|
|
||||||
|
let verify_bookmarks pdf lastlevel endpage marks =
|
||||||
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
|
match marks with
|
||||||
|
| [] -> true
|
||||||
|
| m::more -> m.Pdfmarks.level = 0 && verify_bookmarks pdf lastlevel fastrefnums endpage more
|
||||||
|
|
||||||
|
(* Parse a line of the bookmarks file. *)
|
||||||
|
|
||||||
|
(* Un-escape things which are escaped. Quotes, newlines and backslashes *)
|
||||||
|
let rec fixup_characters prev = function
|
||||||
|
| [] -> rev prev
|
||||||
|
| '\\'::'\\'::t -> fixup_characters ('\\'::prev) t
|
||||||
|
| '\\'::'"'::t -> fixup_characters ('"'::prev) t
|
||||||
|
| '\\'::'n'::t -> fixup_characters ('\n'::prev) t
|
||||||
|
| h::t -> fixup_characters (h::prev) t
|
||||||
|
|
||||||
|
let debug_bookmark_string s =
|
||||||
|
Printf.printf "STR: %s\n" s
|
||||||
|
|
||||||
|
(* If optionaldest = [Pdfgenlex.LexString s], we parse the string, convert the
|
||||||
|
* integer to an indirect of the real page target, and then put it in. *)
|
||||||
|
let target_of_markfile_obj pdf i' pdfobj =
|
||||||
|
(*Printf.printf "Parsed %s\n" (Pdfwrite.string_of_pdf pdfobj);*)
|
||||||
|
match pdfobj with
|
||||||
|
Pdf.Array (Pdf.Integer x::more) ->
|
||||||
|
let pageobjnum = Pdfpage.page_object_number pdf i' in
|
||||||
|
begin match pageobjnum with
|
||||||
|
None ->
|
||||||
|
raise (Pdf.PDFError "bookmark_of_data: page obj num not found")
|
||||||
|
| Some p ->
|
||||||
|
Pdfdest.read_destination pdf (Pdf.Array (Pdf.Indirect p::more))
|
||||||
|
end
|
||||||
|
(* Need to deal with "null", "(string)", and "<<other thing like action" *)
|
||||||
|
| Pdf.Null -> Pdfdest.NullDestination
|
||||||
|
| Pdf.String s -> Pdfdest.read_destination pdf (Pdf.String s)
|
||||||
|
| x -> Pdfdest.Action x
|
||||||
|
|
||||||
|
let target_of_markfile_target pdf i' = function
|
||||||
|
| [Pdfgenlex.LexString s] ->
|
||||||
|
let pdfobj = Pdfread.parse_single_object s in
|
||||||
|
target_of_markfile_obj pdf i' pdfobj
|
||||||
|
| _ -> Pdfpage.target_of_pagenumber pdf i'
|
||||||
|
|
||||||
|
let bookmark_of_data pdf i s i' isopen optionaldest =
|
||||||
|
(*debug_bookmark_string s;
|
||||||
|
debug_bookmark_string (implode (fixup_characters [] (explode s)));
|
||||||
|
debug_bookmark_string (Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))));*)
|
||||||
|
{Pdfmarks.level = i;
|
||||||
|
Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s)));
|
||||||
|
Pdfmarks.target = target_of_markfile_target pdf i' optionaldest;
|
||||||
|
Pdfmarks.isopen = isopen}
|
||||||
|
|
||||||
|
let target_of_json_target pdf pagenumber target =
|
||||||
|
target_of_markfile_obj pdf pagenumber (Cpdfjson.object_of_json target)
|
||||||
|
|
||||||
|
let mark_of_json pdf = function
|
||||||
|
| `Assoc [("level", `Int level);
|
||||||
|
("text", `String text);
|
||||||
|
("page", `Int pagenumber);
|
||||||
|
("open", `Bool openstatus);
|
||||||
|
("target", target)] ->
|
||||||
|
{Pdfmarks.level = level;
|
||||||
|
Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 text;
|
||||||
|
Pdfmarks.target = target_of_json_target pdf pagenumber target;
|
||||||
|
Pdfmarks.isopen = openstatus}
|
||||||
|
| _ -> error "malformed mark in mark_of_json"
|
||||||
|
|
||||||
|
let marks_of_json pdf = function
|
||||||
|
| `List ms -> map (mark_of_json pdf) ms
|
||||||
|
| _ -> error "top level of JSON boomark file not a list"
|
||||||
|
|
||||||
|
let parse_bookmark_file_json verify pdf i =
|
||||||
|
let module J = Cpdfyojson.Safe in
|
||||||
|
try
|
||||||
|
let json =
|
||||||
|
match i.Pdfio.caml_channel with
|
||||||
|
| Some ch -> J.from_channel ch
|
||||||
|
| None ->
|
||||||
|
let content = Pdfio.string_of_bytes (Pdfio.bytes_of_input i 0 i.Pdfio.in_channel_length) in
|
||||||
|
J.from_string content
|
||||||
|
in
|
||||||
|
let marks = marks_of_json pdf json in
|
||||||
|
if verify then
|
||||||
|
if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) marks then marks else
|
||||||
|
error "Bad bookmark file (References non-existant pages or is malformed)"
|
||||||
|
else
|
||||||
|
marks
|
||||||
|
with
|
||||||
|
e ->
|
||||||
|
error (Printf.sprintf "Malformed JSON bookmark file (%s)" (Printexc.to_string e))
|
||||||
|
|
||||||
|
let parse_bookmark_file verify pdf input =
|
||||||
|
let currline = ref 0 in
|
||||||
|
try
|
||||||
|
let lines = Pdfio.read_lines input in
|
||||||
|
let currline = ref 0 in
|
||||||
|
let bookmarks = ref [] in
|
||||||
|
iter
|
||||||
|
(function line ->
|
||||||
|
match
|
||||||
|
incr currline;
|
||||||
|
Pdfgenlex.lex_string line
|
||||||
|
with
|
||||||
|
| Pdfgenlex.LexInt i::Pdfgenlex.LexString s::Pdfgenlex.LexInt i'::Pdfgenlex.LexName "open"::optionaldest ->
|
||||||
|
bookmarks =| bookmark_of_data pdf i s i' true optionaldest
|
||||||
|
| Pdfgenlex.LexInt i::Pdfgenlex.LexString s::Pdfgenlex.LexInt i'::optionaldest ->
|
||||||
|
bookmarks =| bookmark_of_data pdf i s i' false optionaldest
|
||||||
|
| [] -> () (* ignore blank lines *)
|
||||||
|
| _ ->
|
||||||
|
error ("Bad bookmark file, line " ^ (string_of_int !currline)))
|
||||||
|
lines;
|
||||||
|
let bookmarks = rev !bookmarks in
|
||||||
|
if verify then
|
||||||
|
if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) bookmarks
|
||||||
|
then bookmarks
|
||||||
|
else
|
||||||
|
error
|
||||||
|
"Bad bookmark file (References non-existant pages or is malformed)"
|
||||||
|
else
|
||||||
|
bookmarks
|
||||||
|
with
|
||||||
|
e ->
|
||||||
|
error
|
||||||
|
(Printf.sprintf
|
||||||
|
"Bad bookmark file (syntax) at line %i (error was %s)"
|
||||||
|
!currline
|
||||||
|
(Printexc.to_string e))
|
||||||
|
|
||||||
|
|
||||||
|
let add_bookmarks ~json verify input pdf =
|
||||||
|
let parsed =
|
||||||
|
(if json then parse_bookmark_file_json else parse_bookmark_file) verify pdf input in
|
||||||
|
(*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*)
|
||||||
|
Pdfmarks.add_bookmarks parsed pdf
|
||||||
|
|
||||||
|
(* List bookmarks *)
|
||||||
|
let output_string_of_target pdf fastrefnums x =
|
||||||
|
match Pdfdest.pdfobject_of_destination x with
|
||||||
|
| Pdf.Array (_::more) ->
|
||||||
|
let a =
|
||||||
|
Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more)
|
||||||
|
in
|
||||||
|
"\"" ^ Pdfwrite.string_of_pdf a ^ "\""
|
||||||
|
| x -> "\"" ^ Pdfwrite.string_of_pdf x ^ "\""
|
||||||
|
|
||||||
|
let json_of_target pdf fastrefnums x =
|
||||||
|
match Pdfdest.pdfobject_of_destination x with
|
||||||
|
| Pdf.Array (_::more) ->
|
||||||
|
let a =
|
||||||
|
Pdf.Array (Pdf.Integer (Pdfpage.pagenumber_of_target ~fastrefnums pdf x)::more)
|
||||||
|
in
|
||||||
|
Cpdfjson.json_of_object pdf (fun _ -> ()) false false a
|
||||||
|
| x -> Cpdfjson.json_of_object pdf (fun _ -> ()) false false x
|
||||||
|
|
||||||
|
let output_json_marks ch calculate_page_number pdf fastrefnums marks =
|
||||||
|
let module J = Cpdfyojson.Safe in
|
||||||
|
let json_of_mark m =
|
||||||
|
`Assoc
|
||||||
|
[("level", `Int m.Pdfmarks.level);
|
||||||
|
("text", `String (Pdftext.utf8_of_pdfdocstring m.Pdfmarks.text));
|
||||||
|
("page", `Int (calculate_page_number m));
|
||||||
|
("open", `Bool m.Pdfmarks.isopen);
|
||||||
|
("target", json_of_target pdf fastrefnums m.Pdfmarks.target)]
|
||||||
|
in
|
||||||
|
let json = `List (map json_of_mark marks) in
|
||||||
|
J.pretty_to_channel ch json
|
||||||
|
|
||||||
|
(* List the bookmarks in the given range to the given output *)
|
||||||
|
let list_bookmarks ~json encoding range pdf output =
|
||||||
|
let process_stripped escaped =
|
||||||
|
let b = Buffer.create 200 in
|
||||||
|
iter
|
||||||
|
(fun x ->
|
||||||
|
if x <= 127 then Buffer.add_char b (char_of_int x))
|
||||||
|
escaped;
|
||||||
|
Buffer.contents b
|
||||||
|
in
|
||||||
|
let process_string s =
|
||||||
|
let rec replace c x y = function
|
||||||
|
| [] -> []
|
||||||
|
| h::t when h = c -> x::y::replace c x y t
|
||||||
|
| h::t -> h::replace c x y t
|
||||||
|
in
|
||||||
|
(* Convert to UTF8, raw, or stripped, and escape backslashed and quotation marks *)
|
||||||
|
let codepoints = Pdftext.codepoints_of_pdfdocstring s in
|
||||||
|
let escaped =
|
||||||
|
let bs = int_of_char '\\'
|
||||||
|
and nl = int_of_char '\n'
|
||||||
|
and n = int_of_char 'n'
|
||||||
|
and q = int_of_char '\"' in
|
||||||
|
replace q bs q (replace nl bs n (replace bs bs bs codepoints))
|
||||||
|
in
|
||||||
|
match encoding with
|
||||||
|
| Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped
|
||||||
|
| Cpdfmetadata.Stripped -> process_stripped escaped
|
||||||
|
| Cpdfmetadata.Raw -> s
|
||||||
|
in
|
||||||
|
let bookmarks = Pdfmarks.read_bookmarks pdf in
|
||||||
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let rangetable = hashset_of_list range in
|
||||||
|
let range_is_all = range = ilist 1 (Pdfpage.endpage pdf) in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
|
(* Find the pagenumber of each bookmark target. If it is in the range,
|
||||||
|
* keep that bookmark. Also keep the bookmark if its target is the null
|
||||||
|
* destination. *)
|
||||||
|
let inrange =
|
||||||
|
keep
|
||||||
|
(function x ->
|
||||||
|
range_is_all ||
|
||||||
|
x.Pdfmarks.target = Pdfdest.NullDestination ||
|
||||||
|
(match x.Pdfmarks.target with Pdfdest.NamedDestinationElsewhere _ -> true | _ -> false) ||
|
||||||
|
Hashtbl.mem rangetable (Pdfpage.pagenumber_of_target ~fastrefnums pdf x.Pdfmarks.target)) bookmarks
|
||||||
|
in
|
||||||
|
let calculate_page_number mark =
|
||||||
|
(* Some buggy PDFs use integers for page numbers instead of page
|
||||||
|
* object references. Adobe Reader and Preview seem to support
|
||||||
|
* this, for presumably historical reasons. So if we see a
|
||||||
|
* OtherDocPageNumber (which is what Pdfdest parses these as,
|
||||||
|
* because that's what they are legitimately, we use this as the
|
||||||
|
* page number. It is zero based, though, and we are one-based, so
|
||||||
|
* we add one. Pdfpage.pagenumber_of_target has been modified to support this.*)
|
||||||
|
Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target
|
||||||
|
in
|
||||||
|
if json then
|
||||||
|
output_json_marks stdout calculate_page_number pdf fastrefnums inrange
|
||||||
|
else
|
||||||
|
iter
|
||||||
|
(function mark ->
|
||||||
|
output.Pdfio.output_string
|
||||||
|
(Printf.sprintf "%i \"%s\" %i%s %s\n"
|
||||||
|
mark.Pdfmarks.level
|
||||||
|
(process_string mark.Pdfmarks.text)
|
||||||
|
(calculate_page_number mark)
|
||||||
|
(if mark.Pdfmarks.isopen then " open" else "")
|
||||||
|
(output_string_of_target pdf fastrefnums mark.Pdfmarks.target)))
|
||||||
|
inrange
|
||||||
|
|
||||||
|
(* o is the stamp, u is the main pdf page *)
|
||||||
|
|
||||||
|
(* \section{Split at bookmarks} *)
|
||||||
|
|
||||||
|
let get_bookmark_name pdf marks splitlevel n _ =
|
||||||
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
|
match keep (function m -> n = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with
|
||||||
|
| {Pdfmarks.text = title}::_ -> Cpdfattach.remove_unsafe_characters Cpdfmetadata.UTF8 title
|
||||||
|
| _ -> ""
|
||||||
|
|
||||||
|
(* Find the stem of a filename *)
|
||||||
|
(*let stem s =
|
||||||
|
implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s))))))*)
|
||||||
|
|
||||||
|
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
|
||||||
|
let bookmark_pages level pdf =
|
||||||
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
|
setify_preserving_order
|
||||||
|
(option_map
|
||||||
|
(function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target ~fastrefnums pdf l.Pdfmarks.target) | _ -> None)
|
||||||
|
(Pdfmarks.read_bookmarks pdf))
|
||||||
|
|
||||||
|
(* Called from cpdflib.ml - different from above *)
|
||||||
|
let split_on_bookmarks pdf level =
|
||||||
|
let points = lose (eq 0) (map pred (bookmark_pages level pdf))
|
||||||
|
in let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
let ranges = splitat points (indx pdf_pages) in
|
||||||
|
map (fun rs -> Pdfpage.pdf_of_pages pdf rs) ranges
|
|
@ -0,0 +1,14 @@
|
||||||
|
(** {2 Bookmarks} *)
|
||||||
|
|
||||||
|
(** [parse_bookmark_file verify pdf input] parses the bookmark file in [input].
|
||||||
|
Details of the bookmark file format can be found in cpdfmanual.pdf *)
|
||||||
|
val parse_bookmark_file : bool -> Pdf.t -> Pdfio.input -> Pdfmarks.t list
|
||||||
|
|
||||||
|
(** [add_bookmarks verify input pdf] adds bookmarks from the bookmark file
|
||||||
|
give. If [verify] is given, bookmarks will be verified to ensure, for example,
|
||||||
|
that they are not out of the page range. *)
|
||||||
|
val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
|
||||||
|
output in the format specified in cpdfmanual.pdf *)
|
||||||
|
val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit
|
|
@ -3152,7 +3152,7 @@ let go () =
|
||||||
| (_, pagespec, _, _, _, _)::_, _ ->
|
| (_, pagespec, _, _, _, _)::_, _ ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf pagespec in
|
let range = parse_pagespec_allow_empty pdf pagespec in
|
||||||
Cpdf.output_page_info pdf range
|
Cpdfpage.output_page_info pdf range
|
||||||
| _ -> error "list-bookmarks: bad command line"
|
| _ -> error "list-bookmarks: bad command line"
|
||||||
end
|
end
|
||||||
| Some Metadata ->
|
| Some Metadata ->
|
||||||
|
@ -3162,7 +3162,7 @@ let go () =
|
||||||
| (_, pagespec, _, _, _, _)::_, _ ->
|
| (_, pagespec, _, _, _, _)::_, _ ->
|
||||||
let pdf = get_single_pdf (Some Fonts) true in
|
let pdf = get_single_pdf (Some Fonts) true in
|
||||||
let range = parse_pagespec_allow_empty pdf pagespec in
|
let range = parse_pagespec_allow_empty pdf pagespec in
|
||||||
Cpdf.print_fonts pdf range
|
Cpdffont.print_fonts pdf range
|
||||||
| _ -> error "-list-fonts: bad command line"
|
| _ -> error "-list-fonts: bad command line"
|
||||||
end
|
end
|
||||||
| Some ListBookmarks ->
|
| Some ListBookmarks ->
|
||||||
|
@ -3170,7 +3170,7 @@ let go () =
|
||||||
| (_, pagespec, _, _, _, _)::_, _ ->
|
| (_, pagespec, _, _, _, _)::_, _ ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf pagespec in
|
let range = parse_pagespec_allow_empty pdf pagespec in
|
||||||
Cpdf.list_bookmarks ~json:args.format_json args.encoding range pdf (Pdfio.output_of_channel stdout);
|
Cpdfbookmarks.list_bookmarks ~json:args.format_json args.encoding range pdf (Pdfio.output_of_channel stdout);
|
||||||
flush stdout
|
flush stdout
|
||||||
| _ -> error "list-bookmarks: bad command line"
|
| _ -> error "list-bookmarks: bad command line"
|
||||||
end
|
end
|
||||||
|
@ -3540,14 +3540,14 @@ let go () =
|
||||||
| Some RemoveAnnotations ->
|
| Some RemoveAnnotations ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
write_pdf false (Cpdf.remove_annotations range pdf)
|
write_pdf false (Cpdfannot.remove_annotations range pdf)
|
||||||
| Some (CopyAnnotations getfrom) ->
|
| Some (CopyAnnotations getfrom) ->
|
||||||
begin match args.inputs with
|
begin match args.inputs with
|
||||||
| [(k, _, u, o, _, _) as input] ->
|
| [(k, _, u, o, _, _) as input] ->
|
||||||
let input_pdf = get_pdf_from_input_kind input args.op k in
|
let input_pdf = get_pdf_from_input_kind input args.op k in
|
||||||
let range = parse_pagespec_allow_empty input_pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty input_pdf (get_pagespec ()) in
|
||||||
let pdf =
|
let pdf =
|
||||||
Cpdf.copy_annotations
|
Cpdfannot.copy_annotations
|
||||||
range
|
range
|
||||||
(pdfread_pdf_of_file (optstring u) (optstring o) getfrom)
|
(pdfread_pdf_of_file (optstring u) (optstring o) getfrom)
|
||||||
input_pdf
|
input_pdf
|
||||||
|
@ -3556,7 +3556,7 @@ let go () =
|
||||||
| _ -> error "copy-annotations: No input file specified"
|
| _ -> error "copy-annotations: No input file specified"
|
||||||
end
|
end
|
||||||
| Some ListAnnotations ->
|
| Some ListAnnotations ->
|
||||||
Cpdf.list_annotations ~json:args.format_json args.encoding (get_single_pdf args.op true)
|
Cpdfannot.list_annotations ~json:args.format_json args.encoding (get_single_pdf args.op true)
|
||||||
| Some Shift ->
|
| Some Shift ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
|
@ -3687,7 +3687,7 @@ let go () =
|
||||||
args.relative_to_cropbox args.underneath range pdf)
|
args.relative_to_cropbox args.underneath range pdf)
|
||||||
| Some (AddBookmarks file) ->
|
| Some (AddBookmarks file) ->
|
||||||
write_pdf false
|
write_pdf false
|
||||||
(Cpdf.add_bookmarks ~json:args.format_json true (Pdfio.input_of_channel (open_in_bin file))
|
(Cpdfbookmarks.add_bookmarks ~json:args.format_json true (Pdfio.input_of_channel (open_in_bin file))
|
||||||
(get_single_pdf args.op false))
|
(get_single_pdf args.op false))
|
||||||
| Some RemoveBookmarks ->
|
| Some RemoveBookmarks ->
|
||||||
write_pdf false (Pdfmarks.remove_bookmarks (get_single_pdf args.op false))
|
write_pdf false (Pdfmarks.remove_bookmarks (get_single_pdf args.op false))
|
||||||
|
|
42
cpdffont.ml
42
cpdffont.ml
|
@ -112,7 +112,7 @@ let missing_font pdf page (name, dict) =
|
||||||
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
|
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
|
||||||
|
|
||||||
let missing_fonts pdf range =
|
let missing_fonts pdf range =
|
||||||
Cpdf.iter_pages
|
Cpdfpage.iter_pages
|
||||||
(fun num page ->
|
(fun num page ->
|
||||||
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
||||||
| Some (Pdf.Dictionary fontdict) ->
|
| Some (Pdf.Dictionary fontdict) ->
|
||||||
|
@ -257,3 +257,43 @@ let remove_fontdescriptor pdf = function
|
||||||
let remove_fonts pdf =
|
let remove_fonts pdf =
|
||||||
Pdf.objiter (fun k v -> ignore (Pdf.addobj_given_num pdf (k, remove_fontdescriptor pdf v))) pdf;
|
Pdf.objiter (fun k v -> ignore (Pdf.addobj_given_num pdf (k, remove_fontdescriptor pdf v))) pdf;
|
||||||
pdf
|
pdf
|
||||||
|
|
||||||
|
(* List fonts *)
|
||||||
|
let list_font pdf page (name, dict) =
|
||||||
|
let subtype =
|
||||||
|
match Pdf.lookup_direct pdf "/Subtype" dict with
|
||||||
|
| Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n)
|
||||||
|
| _ -> ""
|
||||||
|
in let basefont =
|
||||||
|
match Pdf.lookup_direct pdf "/BaseFont" dict with
|
||||||
|
| Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n)
|
||||||
|
| _ -> ""
|
||||||
|
in let encoding =
|
||||||
|
match Pdf.lookup_direct pdf "/Encoding" dict with
|
||||||
|
| Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n)
|
||||||
|
| _ -> ""
|
||||||
|
in
|
||||||
|
(page, name, subtype, basefont, encoding)
|
||||||
|
|
||||||
|
let list_fonts pdf range =
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
flatten
|
||||||
|
(map
|
||||||
|
(fun (num, page) ->
|
||||||
|
if mem num range then
|
||||||
|
begin match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
||||||
|
| Some (Pdf.Dictionary fontdict) ->
|
||||||
|
map (list_font pdf num) fontdict
|
||||||
|
| _ -> []
|
||||||
|
end
|
||||||
|
else
|
||||||
|
[])
|
||||||
|
(combine (ilist 1 (length pages)) pages))
|
||||||
|
|
||||||
|
let string_of_font (p, n, s, b, e) =
|
||||||
|
Printf.sprintf "%i %s %s %s %s\n" p n s b e
|
||||||
|
|
||||||
|
let print_fonts pdf range =
|
||||||
|
flprint
|
||||||
|
(fold_left ( ^ ) "" (map string_of_font (list_fonts pdf range)))
|
||||||
|
|
||||||
|
|
|
@ -9,3 +9,12 @@ val print_font_table : Pdf.t -> string -> int -> unit
|
||||||
val extract_fontfile : int -> string -> Pdf.t -> unit
|
val extract_fontfile : int -> string -> Pdf.t -> unit
|
||||||
|
|
||||||
val remove_fonts : Pdf.t -> Pdf.t
|
val remove_fonts : Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** {2 Listing fonts} *)
|
||||||
|
|
||||||
|
(** Print font list to stdout *)
|
||||||
|
val print_fonts : Pdf.t -> int list -> unit
|
||||||
|
|
||||||
|
(** Return font list. Page number, name, subtype, basefont, encoding. *)
|
||||||
|
val list_fonts : Pdf.t -> int list -> (int * string * string * string * string) list
|
||||||
|
|
||||||
|
|
|
@ -656,6 +656,5 @@ let set_open_action pdf fit pagenumber =
|
||||||
{pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'}
|
{pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'}
|
||||||
| None -> error "bad root"
|
| None -> error "bad root"
|
||||||
|
|
||||||
|
|
||||||
let set_version v pdf =
|
let set_version v pdf =
|
||||||
pdf.Pdf.minor <- v
|
pdf.Pdf.minor <- v
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
open Pdfutil
|
||||||
|
|
||||||
|
(* Output information for each page *)
|
||||||
|
let output_page_info pdf range =
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf
|
||||||
|
and labels = Pdfpagelabels.read pdf in
|
||||||
|
let getbox page box =
|
||||||
|
if box = "/MediaBox" then
|
||||||
|
match page.Pdfpage.mediabox with
|
||||||
|
| Pdf.Array [a; b; c; d] ->
|
||||||
|
Printf.sprintf "%f %f %f %f"
|
||||||
|
(Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d)
|
||||||
|
| _ -> ""
|
||||||
|
else
|
||||||
|
match Pdf.lookup_direct pdf box page.Pdfpage.rest with
|
||||||
|
| Some (Pdf.Array [a; b; c; d]) ->
|
||||||
|
Printf.sprintf "%f %f %f %f"
|
||||||
|
(Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d)
|
||||||
|
| _ -> ""
|
||||||
|
and rotation page =
|
||||||
|
Pdfpage.int_of_rotation page.Pdfpage.rotate
|
||||||
|
in
|
||||||
|
iter
|
||||||
|
(fun pnum ->
|
||||||
|
let page = select pnum pages in
|
||||||
|
Printf.printf "Page %i:\n" pnum;
|
||||||
|
Printf.printf "Label: %s\n"
|
||||||
|
(try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> "");
|
||||||
|
Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox");
|
||||||
|
Printf.printf "CropBox: %s\n" (getbox page "/CropBox");
|
||||||
|
Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox");
|
||||||
|
Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox");
|
||||||
|
Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox");
|
||||||
|
Printf.printf "Rotation: %i\n" (rotation page))
|
||||||
|
range
|
||||||
|
|
||||||
|
let process_pages f pdf range =
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
let pages', pagenumbers, matrices = (* new page objects, page number, matrix *)
|
||||||
|
split3
|
||||||
|
(map2
|
||||||
|
(fun n p -> if mem n range then f n p else (p, n, Pdftransform.i_matrix))
|
||||||
|
(ilist 1 (length pages))
|
||||||
|
pages)
|
||||||
|
in
|
||||||
|
Pdfpage.change_pages ~matrices:(combine pagenumbers matrices) true pdf pages'
|
||||||
|
|
||||||
|
let iter_pages f pdf range =
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
iter2
|
||||||
|
(fun n p -> if mem n range then f n p)
|
||||||
|
(ilist 1 (length pages))
|
||||||
|
pages
|
||||||
|
|
||||||
|
let map_pages f pdf range =
|
||||||
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
option_map2
|
||||||
|
(fun n p -> if mem n range then Some (f n p) else None)
|
||||||
|
(ilist 1 (length pages))
|
||||||
|
pages
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
(** Print page info (Mediabox etc) to standard output. *)
|
||||||
|
val output_page_info : Pdf.t -> int list -> unit
|
||||||
|
|
||||||
|
(** Given a function from page number and page to page, a document, and a list
|
||||||
|
of page numbers to apply it to, apply the function to all those pages. *)
|
||||||
|
val process_pages : (int -> Pdfpage.t -> Pdfpage.t * int * Pdftransform.transform_matrix) ->
|
||||||
|
Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
|
(** Same as [process_pages], but iterate rather than map. *)
|
||||||
|
val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit
|
||||||
|
|
||||||
|
(** Same as [process_pages] but return the list of outputs of the map function. *)
|
||||||
|
val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list
|
||||||
|
|
Loading…
Reference in New Issue