477 lines
18 KiB
OCaml
477 lines
18 KiB
OCaml
(** Coherent PDF Tools Core Routines *)
|
|
open Pdfutil
|
|
|
|
(** {2 Types and Exceptions} *)
|
|
|
|
(** Possible output encodings for some function. [Raw] does no processing at
|
|
all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8.
|
|
[Stripped] extracts the unicode codepoints and returns only those which
|
|
correspond to 7 bit ASCII. *)
|
|
type encoding = Raw | UTF8 | Stripped
|
|
|
|
exception SoftError of string
|
|
exception HardError of string
|
|
(** Two exceptions recommended for use with the library, though currently not
|
|
raised by any function in this module. Cpdfcommand uses them extensively. *)
|
|
|
|
(** Possible positions for adding text and other uses. See cpdfmanual.pdf *)
|
|
type position =
|
|
| PosCentre of float * float
|
|
| PosLeft of float * float
|
|
| PosRight of float * float
|
|
| Top of float
|
|
| TopLeft of float
|
|
| TopRight of float
|
|
| Left of float
|
|
| BottomLeft of float
|
|
| Bottom of float
|
|
| BottomRight of float
|
|
| Right of float
|
|
| Diagonal
|
|
| ReverseDiagonal
|
|
| Centre
|
|
|
|
(** {2 Debug} *)
|
|
|
|
(** Debug: Print out a PDF in readable form to the terminal *)
|
|
val print_pdf_objs : Pdf.t -> unit
|
|
|
|
(** {2 Working with pages} *)
|
|
|
|
(** Like [Pdfpage.endpage], but from an input and possible password - does the
|
|
minimal work to find the number of pages. *)
|
|
val endpage_io : ?revision:int -> Pdfio.input -> string option -> string option -> int
|
|
|
|
(** Given a function from page number and page to page, a document, and a list
|
|
of page numbers to apply it to, apply the function to all those pages. *)
|
|
val process_pages : (int -> Pdfpage.t -> Pdfpage.t * int * Pdftransform.transform_matrix) ->
|
|
Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Same as [process_pages], but iterate rather than map. *)
|
|
val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit
|
|
|
|
(** Same as [process_pages] but return the list of outputs of the map function. *)
|
|
val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list
|
|
|
|
(** {2 Page specifications and ranges } *)
|
|
|
|
(** Here are the rules for building input ranges:
|
|
|
|
{ul
|
|
{- A comma (,) allows one to specify several ranges, e.g. 1-2,4-5.}
|
|
{- The word end represents the last page number. }
|
|
{- The words odd and even can be used in place of or at the end of a page range to restrict to just the odd or even pages. }
|
|
{- The word reverse is the same as end-1.}
|
|
{- The word all is the same as 1-end.}
|
|
{- A range must contain no spaces.}
|
|
{- A tilde (~) defines a page number counting from the end of the document rather than the beginning. Page ~1 is the last page, ~2 the penultimate page etc.}
|
|
}
|
|
*)
|
|
|
|
(** Parse a (valid) page specification to a page range *)
|
|
val parse_pagespec : Pdf.t -> string -> int list
|
|
|
|
(** Return a string for the given range. Knows how to identify all, odd, even,
|
|
x-y ranges etc. *)
|
|
val string_of_pagespec : Pdf.t -> int list -> string
|
|
|
|
(** Is a page specification, in theory, valid? This is the most we can find out
|
|
without supplying a PDF, and thus knowing how many pages there are in it. *)
|
|
val validate_pagespec : string -> bool
|
|
|
|
val parse_pagespec_without_pdf : string -> int list
|
|
|
|
(** Compresses all streams in the PDF document which are uncompressed, using
|
|
/FlateDecode, leaving out metadata. If the PDF is encrypted, does nothing. *)
|
|
val recompress_pdf : Pdf.t -> Pdf.t
|
|
|
|
(** Decompresses all streams in a PDF document, assuming it isn't encrypted. *)
|
|
val decompress_pdf : Pdf.t -> Pdf.t
|
|
|
|
(** {2 Metadata and settings} *)
|
|
|
|
(** [copy_id keepversion copyfrom copyto] copies the ID, if any, from
|
|
[copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto]
|
|
won't be affected. *)
|
|
val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
|
|
|
|
(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
|
|
the PDF minor version to [version].*)
|
|
val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
|
|
|
|
val get_xmp_info : Pdf.t -> string -> string
|
|
|
|
(** [set_pdf_info (key, value, version)] sets the entry [key] in the
|
|
/ViewerPreferences directory, updating the PDF minor version to [version].*)
|
|
val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
|
|
|
|
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
|
|
val set_page_layout : Pdf.t -> string -> Pdf.t
|
|
|
|
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
|
|
val set_page_mode : Pdf.t -> string -> Pdf.t
|
|
|
|
(** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *)
|
|
val set_open_action : Pdf.t -> bool -> int -> Pdf.t
|
|
|
|
(** Set the PDF version number *)
|
|
val set_version : int -> Pdf.t -> unit
|
|
|
|
(** Given a PDF, returns a function which can lookup a given dictionary entry
|
|
from the /Info dictionary, returning it as a UTF8 string *)
|
|
val get_info_utf8 : Pdf.t -> string -> string
|
|
|
|
(** Output to standard output general information about a PDF. *)
|
|
val output_info : encoding -> Pdf.t -> unit
|
|
|
|
(** Output to standard output information from any XMP metadata stream in a PDF. *)
|
|
val output_xmp_info : encoding -> Pdf.t -> unit
|
|
|
|
(** {2 Presentations} *)
|
|
|
|
(** [presentation range t d horizontal inward direction effect_duration pdf]
|
|
adds a presentation on the pages in [range]. See cpdfmanual.pdf for details.
|
|
*)
|
|
val presentation : int list -> string option ->
|
|
float option -> bool -> bool -> int -> float -> Pdf.t -> Pdf.t
|
|
|
|
(** {2 File Attachments} *)
|
|
(** [attach_file keepversion topage pdf filename] attaches the file in [filename] to the pdf, optionally to a page (rather than document-level). If keepversion is true, the PDF version number won't be altered. *)
|
|
val attach_file : ?memory:Pdfio.bytes -> bool -> int option -> Pdf.t -> string -> Pdf.t
|
|
|
|
(** Remove attached files. *)
|
|
val remove_attached_files : Pdf.t -> Pdf.t
|
|
|
|
type attachment =
|
|
{name : string;
|
|
pagenumber : int;
|
|
data : unit -> Pdfio.bytes}
|
|
|
|
(** List attached files. Attachment name and page number. Page 0 is document level. *)
|
|
val list_attached_files : Pdf.t -> attachment list
|
|
|
|
(** {2 Bookmarks} *)
|
|
|
|
(** [parse_bookmark_file verify pdf input] parses the bookmark file in [input].
|
|
Details of the bookmark file format can be found in cpdfmanual.pdf *)
|
|
val parse_bookmark_file : bool -> Pdf.t -> Pdfio.input -> Pdfmarks.t list
|
|
|
|
(** [add_bookmarks verify input pdf] adds bookmarks from the bookmark file
|
|
give. If [verify] is given, bookmarks will be verified to ensure, for example,
|
|
that they are not out of the page range. *)
|
|
val add_bookmarks : bool -> Pdfio.input -> Pdf.t -> Pdf.t
|
|
|
|
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
|
|
output in the format specified in cpdfmanual.pdf *)
|
|
val list_bookmarks : encoding -> int list -> Pdf.t -> Pdfio.output -> unit
|
|
|
|
(** {2 XML Metadata} *)
|
|
|
|
(** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *)
|
|
val set_metadata : bool -> string -> Pdf.t -> Pdf.t
|
|
|
|
(** The same, but the content comes from [bytes]. *)
|
|
val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t
|
|
|
|
(** Remove the metadata from a file *)
|
|
val remove_metadata : Pdf.t -> Pdf.t
|
|
|
|
(** Extract metadata to a [Pdfio.bytes] *)
|
|
val get_metadata : Pdf.t -> Pdfio.bytes option
|
|
|
|
(** Print metadate to stdout *)
|
|
val print_metadata : Pdf.t -> unit
|
|
|
|
(** Set the metadata date *)
|
|
val set_metadata_date : Pdf.t -> string -> Pdf.t
|
|
|
|
(** Create XMP metadata from scratch *)
|
|
val create_metadata : Pdf.t -> Pdf.t
|
|
|
|
(** {2 Stamping} *)
|
|
|
|
(** [combine_pages fast under over scaletofit swap equalize] combines the page
|
|
content of two PDFs, page-by-page. If [equalize] is true the output will have
|
|
the same number of pages as the shorter file. If [scaletofit] is true, the
|
|
[over] file will be scaled to fit. If [swap] is true, [over] and [under] are
|
|
swapped. If [fast] is true, the PDFs are assumed to be well-formed and no
|
|
fixes are done. *)
|
|
val combine_pages : bool -> Pdf.t -> Pdf.t -> bool -> bool -> bool -> Pdf.t
|
|
|
|
(** [stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf] stamps the first page of
|
|
[over] over each page of the PDF. The arguments have the same meaning as in
|
|
[combine_pages]. *)
|
|
val stamp : bool -> position -> bool -> bool -> bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t
|
|
|
|
(** {2 Splitting PDFs} *)
|
|
|
|
(** Split a PDF on bookmarks of a given level or below. Level 0 is top level. *)
|
|
val split_on_bookmarks : Pdf.t -> int -> Pdf.t list
|
|
|
|
(** {2 Listing fonts} *)
|
|
|
|
(** Print font list to stdout *)
|
|
val print_fonts : Pdf.t -> unit
|
|
|
|
(** Return font list. Page number, name, subtype, basefont, encoding. *)
|
|
val list_fonts : Pdf.t -> (int * string * string * string * string) list
|
|
|
|
(** {2 Adding text} *)
|
|
|
|
(** Expand the string "now" to a PDF date string, ignoring any other string *)
|
|
val expand_date : string -> string
|
|
|
|
|
|
(** Produce a debug string of a [position] *)
|
|
val string_of_position : position -> string
|
|
|
|
(** Orientation of the string on the page *)
|
|
type orientation =
|
|
| Horizontal
|
|
| Vertical
|
|
| VerticalDown
|
|
|
|
(** Justification of multiline text *)
|
|
type justification =
|
|
| LeftJustify
|
|
| CentreJustify
|
|
| RightJustify
|
|
|
|
(** [calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos] calculates
|
|
the absolute position of text given its width, bounding box, orientation and
|
|
position. If [ignore_d] is true, the distance from the position (e.g 10 in
|
|
TopLeft 10) is ignored (considered zero). *)
|
|
val calculate_position :
|
|
bool ->
|
|
float ->
|
|
float * float * float * float ->
|
|
orientation -> position -> float * float * float
|
|
|
|
(** Call [add_texts metrics linewidth outline fast fontname font bates batespad colour
|
|
position linespacing fontsize underneath text pages orientation
|
|
relative_to_cropbox midline_adjust topline filename pdf]. For details see cpdfmanual.pdf *)
|
|
val addtexts :
|
|
bool -> (*metrics*)
|
|
float -> (*linewidth*)
|
|
bool -> (*outline*)
|
|
bool -> (*fast*)
|
|
string -> (*fontname*)
|
|
Pdftext.standard_font option -> (*font*)
|
|
bool -> (* embed font *)
|
|
int -> (* bates number *)
|
|
int option -> (* bates padding width *)
|
|
float * float * float -> (*colour*)
|
|
position -> (*position*)
|
|
float -> (*linespacing*)
|
|
float -> (*fontsize*)
|
|
bool -> (*underneath*)
|
|
string ->(*text*)
|
|
int list ->(*page range*)
|
|
orientation ->(*orientation*)
|
|
bool ->(*relative to cropbox?*)
|
|
float ->(*opacity*)
|
|
justification ->(*justification*)
|
|
bool ->(*midline adjust?*)
|
|
bool ->(*topline adjust?*)
|
|
string ->(*filename*)
|
|
float option -> (*extract_text_font_size*)
|
|
string -> (* shift *)
|
|
Pdf.t ->(*pdf*)
|
|
Pdf.t
|
|
|
|
val metrics_howmany : unit -> int
|
|
val metrics_text : int -> string
|
|
val metrics_x : int -> float
|
|
val metrics_y : int -> float
|
|
val metrics_rot : int -> float
|
|
val metrics_baseline_adjustment : unit -> float
|
|
(** These functions returns some details about the text if [addtexts] is called with [metrics] true. The integer arguments are 1 for the first one, 2 for the second etc. Call [metrics_howmany] first to find out how many. *)
|
|
|
|
(** Remove text from the given pages. *)
|
|
val removetext : int list -> Pdf.t -> Pdf.t
|
|
|
|
(** {2 Page geometry} *)
|
|
|
|
(** Print page info (Mediabox etc) to standard output. *)
|
|
val output_page_info : Pdf.t -> int list -> unit
|
|
|
|
(** True if a given page in a PDF has a given box *)
|
|
val hasbox : Pdf.t -> int -> string -> bool
|
|
|
|
(** [crop_pdf xywhlist pdf range] sets the cropbox on the given pages. *)
|
|
val crop_pdf : ?box:string -> (float * float * float * float) list -> Pdf.t -> int list -> Pdf.t
|
|
|
|
val hard_box : Pdf.t -> int list -> string -> bool -> bool -> Pdf.t
|
|
|
|
(** [set_mediabox xywhlist pdf range] sets the media box on the given pages. *)
|
|
val set_mediabox : (float * float * float * float) list -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** [setBox boxname x y w h pdf range] sets the given box on the given pages. *)
|
|
val setBox : string -> float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Remove any cropping from the given pages. *)
|
|
val remove_cropping_pdf : Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Remove any trim box from the given pages. *)
|
|
val remove_trim_pdf : Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Remove any bleed box from the given pages. *)
|
|
val remove_bleed_pdf : Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Remove any art box from the given pages. *)
|
|
val remove_art_pdf : Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Change rotation to a given value 0, 90, 180, 270 on given pages. *)
|
|
val rotate_pdf : int -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Rotate clockwise by 0, 90, 180, 270 on given pages. *)
|
|
val rotate_pdf_by : int -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Rotate the contents by the given angle on the given pages. If [fast] is true, assume PDF is well-formed. *)
|
|
val rotate_contents : ?fast:bool -> float -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Modify the rotation of the page and its contents to leave the rotation at 0 with the page effectively unaltered. *)
|
|
val upright : ?fast:bool -> int list -> Pdf.t -> Pdf.t
|
|
|
|
(** Flip the given pages vertically *)
|
|
val vflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Flip the given pages horizontally *)
|
|
val hflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Shift a PDF in x and y (in pts) in the given pages. List of (x, y) pairs is
|
|
for all pages in pdf. *)
|
|
val shift_pdf : ?fast:bool -> (float * float) list -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Scale a PDF in sx, sy in the given pages. List of (sx, sy) pairs is
|
|
for all pages in pdf. *)
|
|
val scale_pdf : ?fast:bool -> (float * float) list -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** [scale_to_fit_pdf fast position input_scale x y op pdf range] scales a page to fit the
|
|
page size given by (x, y) and by the [input_scale] (e.g 1.0 = scale to fit, 0.9
|
|
= scale to fit leaving a border etc.). [op] is unused. *)
|
|
val scale_to_fit_pdf : ?fast:bool -> position -> float -> (float * float) list -> 'a -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** Scale the contents of a page by a given factor centred around a given point in a given range. *)
|
|
val scale_contents : ?fast:bool -> position -> float -> Pdf.t -> int list -> Pdf.t
|
|
|
|
val trim_marks : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
|
|
|
val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
|
|
|
(** {2 Padding} *)
|
|
|
|
(** Put blank pages before the given page numbers *)
|
|
val padbefore : ?padwith:Pdf.t -> int list -> Pdf.t -> Pdf.t
|
|
|
|
(** Put blank pages after the given page numbers *)
|
|
val padafter : ?padwith:Pdf.t -> int list -> Pdf.t -> Pdf.t
|
|
|
|
(** Pad to a multiple of n pages *)
|
|
val padmultiple : int -> Pdf.t -> Pdf.t
|
|
|
|
(** {2 Annotations} *)
|
|
|
|
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *)
|
|
val list_annotations : encoding -> Pdf.t -> unit
|
|
|
|
(** The same, but giving more information. Deprecated *)
|
|
val list_annotations_more : Pdf.t -> unit
|
|
|
|
(** Return the annotations as a (pagenumber, content) list *)
|
|
val get_annotations : encoding -> Pdf.t -> (int * string) list
|
|
|
|
(** Copy the annotations on a given set of pages from a to b. b is returned. *)
|
|
val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t
|
|
|
|
(** Remove the annotations on given pages. *)
|
|
val remove_annotations : int list -> Pdf.t -> Pdf.t
|
|
|
|
(** {2 Imposition} *)
|
|
|
|
(** The twoup_stack operation puts two logical pages on each physical page,
|
|
rotating them 90 degrees to do so. The new mediabox is thus larger. Bool true
|
|
(fast) if assume well-formed ISO content streams. *)
|
|
val twoup_stack : bool -> Pdf.t -> Pdf.t
|
|
|
|
(** The twoup operation does the same, but scales the new sides down so that
|
|
the media box is unchanged. Bool true (fast) if assume well-formed ISO content streams. *)
|
|
val twoup : bool -> Pdf.t -> Pdf.t
|
|
|
|
(** {2 Making new documents} *)
|
|
|
|
(** Make a blank document given x and y page dimensions in points and a number of pages *)
|
|
val blank_document : float -> float -> int -> Pdf.t
|
|
|
|
(** The same, but give a Pdfpaper.t paper size. *)
|
|
val blank_document_paper : Pdfpaper.t -> int -> Pdf.t
|
|
|
|
(** {2 Page labels} *)
|
|
|
|
(** Add page labels. *)
|
|
val add_page_labels :
|
|
Pdf.t -> bool -> Pdfpagelabels.labelstyle -> string option -> int -> int list -> unit
|
|
|
|
(** {2 Miscellany} *)
|
|
|
|
(** Make all lines in the PDF at least a certain thickness. *)
|
|
val thinlines : int list -> float -> Pdf.t -> Pdf.t
|
|
|
|
(** Make all text on certain pages black. *)
|
|
val blacktext : float * float * float -> int list -> Pdf.t -> Pdf.t
|
|
|
|
(** Make all lines on certain pages black. *)
|
|
val blacklines : float * float * float -> int list -> Pdf.t -> Pdf.t
|
|
|
|
(** Make all fills on certain pages black. *)
|
|
val blackfills : float * float * float -> int list -> Pdf.t -> Pdf.t
|
|
|
|
(** Remove images from a PDF, optionally adding crossed boxes. *)
|
|
val draft : string option -> bool -> int list -> Pdf.t -> Pdf.t
|
|
|
|
(** Squeeze a PDF *)
|
|
val squeeze : ?logto:string -> ?pagedata:bool -> ?recompress:bool -> Pdf.t -> unit
|
|
|
|
val remove_all_text : int list -> Pdf.t -> Pdf.t
|
|
|
|
(**/**)
|
|
|
|
val process_xobjects : Pdf.t -> Pdfpage.t -> (Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) -> unit
|
|
|
|
(** Custom CSP1 *)
|
|
val custom_csp1 : Pdf.t -> Pdf.t
|
|
|
|
(** Custom CSP2 *)
|
|
val custom_csp2 : float -> Pdf.t -> Pdf.t
|
|
|
|
(** Nobble a page, given pdf, pagenumber and page *)
|
|
val nobble_page : Pdf.t -> 'a -> Pdfpage.t -> Pdfpage.t
|
|
|
|
val find_cpdflin : string option -> string
|
|
|
|
val call_cpdflin : string -> string -> string -> string -> int
|
|
|
|
val debug : bool ref
|
|
|
|
val extract_text : float option -> Pdf.t -> int list -> string
|
|
|
|
val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t
|
|
|
|
val ocg_coalesce : Pdf.t -> unit
|
|
|
|
val ocg_get_list : Pdf.t -> string list
|
|
|
|
val ocg_list : Pdf.t -> unit
|
|
|
|
val ocg_rename : string -> string -> Pdf.t -> unit
|
|
|
|
val ocg_order_all : Pdf.t -> unit
|
|
|
|
val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string
|
|
|
|
val remove_dict_entry : Pdf.t -> string -> unit
|
|
|
|
val remove_clipping : Pdf.t -> int list -> Pdf.t
|
|
|
|
val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * float * float) list
|