beginning python splits

This commit is contained in:
John Whitington 2021-08-10 13:40:37 +01:00
parent 5f8a0766ad
commit af576c0208
5 changed files with 221 additions and 327 deletions

Binary file not shown.

View File

@ -1,6 +1,6 @@
\documentclass{book} \documentclass{book}
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc. % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
\usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib} \usepackage{comment}\excludecomment{cpdflib}\includecomment{pycpdflib}
\usepackage{palatino} \usepackage{palatino}
\usepackage{listings} \usepackage{listings}
\usepackage{microtype} \usepackage{microtype}

View File

@ -1,8 +1,16 @@
/* """Pycpdf: a python interface to cpdf.
* A C wrapper to cpdf PDF tools library. Free for non-commercial use. See
* LICENSE for details. To purchase a license, please visit
* http://www.coherentpdf.com/
*
* Text arguments and results are in UTF8.
*/
Before using the library, you must load the libpycpdf and libcpdf DLLs. This is
achieved with the pycpdf.loadDLL function, given the filename or full path of
the libpycpdf DLL. On Windows, you may have to call os.add_dll_directory
first. On MacOS, you may need to give the full path, and you may need to
install libcpdf.so in a standard location /usr/local/lib/, or use the
install_name_tool command to tell libpycpdf.so where to find libcpdf.so.
A 'range' is a list of integers specifying page numbers.
Text arguments and results are in UTF8.
Any function may raise the exception CPDFError, carrying a string describing
the error.
"""

View File

@ -1,36 +1,45 @@
/* CHAPTER 0. Preliminaries */ # CHAPTER 0. Preliminaries
/* The function cpdf_startup(argv) must be called before using the library. */ def loadDLL(f):
void cpdf_startup(char **); """Load the libpycpdf DLL from a given file, and set up pycpdflib."""
/* Return the version of the cpdflib library as a string */ class Pdf:
char *cpdf_version(); """The type of PDF documents."""
/* class CPDFError(Exception):
* Some operations have a fast mode. The default is 'slow' mode, which works """Any function may raise an exception CPDFError, carrying a string
* even on old-fashioned files. For more details, see section 1.13 of the describing what went wrong"""
* CPDF manual. These functions set the mode globally.
*/
void cpdf_setFast();
void cpdf_setSlow();
/* def lastError():
* Errors. cpdf_lastError and cpdf_lastErrorString hold information about the """Return the last error. Not usually used directly, since pycpdflib
* last error to have occurred. They should be consulted after each call. If functions raise exceptions."""
* cpdf_lastError is non-zero, there was an error, and cpdf_lastErrorString
* gives details. If cpdf_lastError is zero, there was no error on the most
* recent cpdf call.
*/
extern int cpdf_lastError;
extern char *cpdf_lastErrorString;
/* cpdf_clearError clears the current error state. */ def lastErrorString():
void cpdf_clearError(void); """Return the last error string. Not usually used directly, since pycpdflib
functions raise exceptions."""
/* def checkerror():
* cpdf_onExit is a debug function which prints some information about """Raise an exception if the last function call resulted in an error. Not
* resource usage. This can be used to detect if PDFs or ranges are being used directly, since pycpdflib functions will raise the exception
* deallocated properly. Contrary to its name, it may be run at any time. directly."""
*/
void cpdf_onExit(void);
def version():
"""Returns the version number of the pycpdflib library."""
def setFast():
""" Some operations have a fast mode. The default is 'slow' mode, which
works even on old-fashioned files. For more details, see section 1.13 of
the CPDF manual. These functions set the mode globally. """
def setSlow():
""" Some operations have a fast mode. The default is 'slow' mode, which
works even on old-fashioned files. For more details, see section 1.13 of
the CPDF manual. These functions set the mode globally. """
def clearError():
""" clearError clears the current error state. """
def onExit():
""" onExit is a debug function which prints some information about
resource usage. This can be used to detect if PDFs or ranges are being
deallocated properly."""

View File

@ -1,332 +1,209 @@
/* CHAPTER 1. Basics */ # CHAPTER 1. Basics
/* def fromFile(filename, userpw):
* cpdf_fromFile(filename, userpw) loads a PDF file from a given file. Supply """ fromFile(filename, userpw) loads a PDF file from a given file.
* a user password (possibly blank) in case the file is encrypted. It won't be Supply a user password (possibly blank) in case the file is encypted. It
* decrypted, but sometimes the password is needed just to load the file. won't be decrypted, but sometimes the password is needed just to load the
*/ file."""
int cpdf_fromFile(const char[], const char[]);
/* def fromFileLazy(filename, userpw):
* cpdf_fromFileLazy(pdf, userpw) loads a PDF from a file, doing only minimal """ fromFileLazy(pdf, userpw) loads a PDF from a file, doing only
* parsing. The objects will be read and parsed when they are actually minimal parsing. The objects will be read and parsed when they are actually
* needed. Use this when the whole file won't be required. Also supply a user needed. Use this when the whole file won't be required. Also supply a user
* password (possibly blank) in case the file is encrypted. It won't be password (possibly blank) in case the file is encypted. It won't be
* decrypted, but sometimes the password is needed just to load the file. decrypted, but sometimes the password is needed just to load the file."""
*/
int cpdf_fromFileLazy(const char[], const char[]);
/* def fromMemory(data, userpw):
* cpdf_fromMemory(data, length, userpw) loads a file from memory, given a """ fromMemory(data, length, userpw) loads a file from memory, given a
* pointer and a length, and the user password. pointer and a length, and the user password."""
*/
int cpdf_fromMemory(void *, int, const char[]);
/* def fromMemoryLazy(data, userpw):
* cpdf_fromMemory(data, length, userpw) loads a file from memory, given a """ fromMemoryLazy(data, length, userpw) loads a file from memory, given a
* pointer and a length, and the user password, but lazily like pointer and a length, and the user password, but lazily like
* cpdf_fromFileLazy. fromFileLazy."""
*/
int cpdf_fromMemoryLazy(void *, int, const char[]);
/* def blankDocument(w, h, pages):
* cpdf_blankDocument(width, height, num_pages) creates a blank document with """ blankDocument(width, height, num_pages) creates a blank document
* pages of the given width (in points), height (in points), and number of with pages of the given width (in points), height (in points), and number
* pages. of pages."""
*/
int cpdf_blankDocument(double, double, int);
/* Standard page sizes. */ """Paper sizes."""
enum cpdf_papersize { a0portrait
cpdf_a0portrait, /* A0 portrait */ a1portrait
cpdf_a1portrait, /* A1 portrait */ a2portrait
cpdf_a2portrait, /* A2 portrait */ a3portrait
cpdf_a3portrait, /* A3 portrait */ a4portrait
cpdf_a4portrait, /* A4 portrait */ a5portrait
cpdf_a5portrait, /* A5 portrait */ a0landscape
cpdf_a0landscape, /* A0 landscape */ a1landscape
cpdf_a1landscape, /* A1 landscape */ a2landscape
cpdf_a2landscape, /* A2 landscape */ a3landscape
cpdf_a3landscape, /* A3 landscape */ a4landscape
cpdf_a4landscape, /* A4 landscape */ a5landscape
cpdf_a5landscape, /* A5 landscape */ usletterportrait
cpdf_usletterportrait, /* US Letter portrait */ usletterlandscape
cpdf_usletterlandscape, /* US Letter landscape */ uslegalportrait
cpdf_uslegalportrait, /* US Legal portrait */ uslegallandscape
cpdf_uslegallandscape /* US Legal landscape */
};
/* def blankDocumentPaper(papersize, pages):
* cpdf_blankDocumentPaper(papersize, num_pages) makes a blank document given """blankDocument(width, height, num_pages) creates a blank document
* a page size and number of pages. with pages of the given width (in points), height (in points), and number
*/ of pages. """
int cpdf_blankDocumentPaper(enum cpdf_papersize, int);
/* Remove a PDF from memory, given its number. */ def ptOfCm(i):
void cpdf_deletePdf(int); """Convert a figure in centimetres to points (72 points to 1 inch)"""
/* def ptOfMm(i):
* Calling cpdf_replacePdf(a, b) places PDF b under number a. Number b is no """Convert a figure in millimetres to points (72 points to 1 inch)"""
* longer available.
*/
void cpdf_replacePdf(int, int);
/* def ptOfIn(i):
* To enumerate the list of currently allocated PDFs, call """Convert a figure in inches to points (72 points to 1 inch)"""
* cpdf_startEnumeratePDFs which gives the number, n, of PDFs allocated, then
* cpdf_enumeratePDFsInfo and cpdf_enumeratePDFsKey with index numbers from
* 0...(n - 1). Call cpdf_endEnumeratePDFs to clean up.
*/
int cpdf_startEnumeratePDFs(void);
int cpdf_enumeratePDFsKey(int);
char *cpdf_enumeratePDFsInfo(int);
void cpdf_endEnumeratePDFs(void);
/* Convert a figure in centimetres to points (72 points to 1 inch) */ def cmOfPt(i):
double cpdf_ptOfCm(double); """Convert a figure in points to centimetres (72 points to 1 inch)"""
/* Convert a figure in millimetres to points (72 points to 1 inch) */ def mmOfPt(i):
double cpdf_ptOfMm(double); """Convert a figure in points to millimetres (72 points to 1 inch)"""
/* Convert a figure in inches to points (72 points to 1 inch) */ def inOfPt(i):
double cpdf_ptOfIn(double); """Convert a figure in points to inches (72 points to 1 inch)"""
/* Convert a figure in points to centimetres (72 points to 1 inch) */ def parsePagespec(pdf, pagespec):
double cpdf_cmOfPt(double); """parsePagespec(pdf, pagespec) parses a page specification with reference to
a given PDF (the PDF is supplied so that page ranges which reference pages
which do not exist are rejected)."""
/* Convert a figure in points to millimetres (72 points to 1 inch) */ def validatePagespec(pagespec):
double cpdf_mmOfPt(double); """validatePagespec(range) validates a page specification so far as is
possible in the absence of the actual document."""
/* Convert a figure in points to inches (72 points to 1 inch) */ def stringOfPagespec(pdf, r):
double cpdf_inOfPt(double); """stringOfPagespec(pdf, range) builds a page specification from a page
range. For example, the range containing 1,2,3,6,7,8 in a document of 8
pages might yield "1-3,6-end" """
/* def blankRange():
* A page range is a list of page numbers used to restrict operations to """blankRange() creates a range with no pages in."""
* certain pages. A page specification is a textual description of a page
* range, such as "1-12,18-end". Here is the syntax:
*
* o A range must contain no spaces.
*
* o A dash (-) defines ranges, e.g. 1-5 or 6-3.
*
* o A comma (,) allows one to specify several ranges, e.g. 1-2,4-5.
*
* o The word end represents the last page number.
*
* o The words odd and even can be used in place of or at the end of a page
* range to restrict to just the odd or even pages.
*
* o The words portrait and landscape can be used in place of or at the end of
* a page range to restrict to just those pages which are portrait or
* landscape. Note that the meaning of "portrait" and "landscape" does not
* take account of any viewing rotation in place (use cpdf_upright first, if
* required). A page with equal width and height is considered neither
* portrait nor landscape.
*
* o The word reverse is the same as end-1.
*
* o The word all is the same as 1-end.
*
* o A tilde (~) defines a page number counting from the end of the document
* rather than the beginning. Page ~1 is the last page, ~2 the penultimate
* page etc.
*/
/* def pageRange(f, t):
* cpdf_parsePagespec(pdf, range) parses a page specification with reference """ pageRange(from, to) build a range from one page to another inclusive.
* to a given PDF (the PDF is supplied so that page ranges which reference For example, pageRange(3,7) gives the range 3,4,5,6,7 """
* pages which do not exist are rejected).
*/
int cpdf_parsePagespec(int, const char[]);
/* def all(pdf):
* cpdf_validatePagespec(range) validates a page specification so far as is """all(pdf) is the range containing all the pages in a given document."""
* possible in the absence of the actual document. Result is true if valid.
*/
int cpdf_validatePagespec(const char[]);
/* def even(r):
* cpdf_stringOfPagespec(pdf, range) builds a page specification from a page """even(range) makes a range which contains just the even pages of another
* range. For example, the range containing 1,2,3,6,7,8 in a document of 8 range"""
* pages might yield "1-3,6-end"
*/
char *cpdf_stringOfPagespec(int, int);
/* cpdf_blankRange() creates a range with no pages in. */ def odd(r):
int cpdf_blankRange(void); """odd(range) makes a range which contains just the odd pages of another
range"""
/* cpdf_deleteRange(range) deletes a range. */ def rangeUnion(a, b):
void cpdf_deleteRange(int); """rangeUnion(a, b) makes the union of two ranges giving a range containing
the pages in range a and range b."""
/* def difference(a, b):
* cpdf_range(from, to) builds a range from one page to another inclusive. For """difference(a, b) makes the difference of two ranges, giving a range
* example, cpdf_range(3,7) gives the range 3,4,5,6,7 containing all the pages in a except for those which are also in b."""
*/
int cpdf_range(int, int);
/* cpdf_all(pdf) is the range containing all the pages in a given document. */ def removeDuplicates(r):
int cpdf_all(int); """removeDuplicates(range) deduplicates a range, making a new one."""
/* def rangeLength(r):
* cpdf_even(range) makes a range which contains just the even pages of """rangeLength gives the number of pages in a range."""
* another range.
*/
int cpdf_even(int);
/* def rangeGet(r, n):
* cpdf_odd(range) makes a range which contains just the odd pages of another """rangeGet(range, n) gets the page number at position n in a range, where
* range. n runs from 0 to rangeLength - 1."""
*/
int cpdf_odd(int);
/* def rangeAdd(r, p):
* cpdf_rangeUnion(a, b) makes the union of two ranges giving a range """rangeAdd(range, page) adds the page to a range, if it is not already
* containing the pages in range a and range b. there."""
*/
int cpdf_rangeUnion(int, int);
/* def isInRange(r, p):
* cpdf_difference(a, b) makes the difference of two ranges, giving a range """isInRange(range, page) returns True if the page is in the range, False
* containing all the pages in a except for those which are also in b. otherwise."""
*/
int cpdf_difference(int, int);
/* cpdf_removeDuplicates(range) deduplicates a range, making a new one. */ def pages(pdf):
int cpdf_removeDuplicates(int); """pages(pdf) returns the number of pages in a PDF."""
/* cpdf_rangeLength gives the number of pages in a range. */ def pagesFast(userpw, filename):
int cpdf_rangeLength(int); """pagesFast(password, filename) returns the number of pages in a given
PDF, with given user encryption password. It tries to do this as fast as
possible, without loading the whole file."""
/* def toFile(pdf, filename, linearize, make_id):
* cpdf_rangeGet(range, n) gets the page number at position n in a range, """toFile (pdf, filename, linearize, make_id) writes the file to a given
* where n runs from 0 to rangeLength - 1. filename. If linearize is True, it will be linearized. If make_id is True,
*/ it will be given a new ID."""
int cpdf_rangeGet(int, int);
/* def toFileExt(pdf, filename, linearize, make_id, preserve_objstm,
* cpdf_rangeAdd(range, page) adds the page to a range, if it is not already generate_objstm, compress_objstm):
* there. """toFileExt (pdf, filename, linearize, make_id, preserve_objstm,
*/ generate_objstm, compress_objstm) writes the file to a given filename. If
int cpdf_rangeAdd(int, int); make_id is True, it will be given a new ID. If preserve_objstm is True,
existing object streams will be preserved. If generate_objstm is True,
object streams will be generated even if not originally present. If
compress_objstm is True, object streams will be compressed (what we usually
want). WARNING: the pdf argument will be invalid after this call and should
not be used again."""
/* def toMemory(pdf, linearize, make_id):
* cpdf_isInRange(range, page) returns true if the page is in the range, """Given a buffer of the correct size, toMemory (pdf, linearize,
* false otherwise. make_id) writes it and returns the buffer as a byte array of type bytes."""
*/
int cpdf_isInRange(int, int);
/* cpdf_pages(pdf) returns the number of pages in a PDF. */ def isEncrypted(pdf):
int cpdf_pages(int); """isEncrypted(pdf) returns True if a documented is encrypted, False
otherwise."""
/* """Permissions."""
* cpdf_pagesFast(password, filename) returns the number of pages in a given noEdit
* PDF, with given user encryption password. It tries to do this as fast as noPrint
* possible, without loading the whole file. noCopy
*/ noAnnot
int cpdf_pagesFast(const char[], const char[]); noForms
noExtract
noAssemble
noHqPrint
/* """Encryption Methods."""
* cpdf_toFile (pdf, filename, linearize, make_id) writes the file to a given pdf40bit
* filename. If linearize is true, it will be linearized if a linearizer is pdf128bit
* available. If make_id is true, it will be given a new ID. aes128bitfalse
*/ aes128bittrue
void cpdf_toFile(int, const char[], int, int); aes256bitfalse
aes256bittrue
aes256bitisofalse
aes256bitisotrue
/* def toFileEncrypted(pdf, method, permissions, ownerpw, userpw, linearize,
* cpdf_toFile (pdf, filename, linearize, make_id, preserve_objstm, makeid, filename):
* generate_objstm, compress_objstm) writes the file to a given filename. If """toFileEncrypted(pdf, encryption_method, permissions, permission_length,
* make_id is true, it will be given a new ID. If preserve_objstm is true, owner_password, user password, linearize, makeid, filename) writes a file
* existing object streams will be preserved. If generate_objstm is true, as encrypted."""
* object streams will be generated even if not originally present. If
* compress_objstm is true, object streams will be compressed (what we
* usually want). WARNING: the pdf argument will be invalid after this call,
* and should be discarded.
*/
void cpdf_toFileExt(int, const char[], int, int, int, int, int);
/* def toFileEncryptedExt(pdf, method, permissions, ownerpw, userpw, linearize,
* Given a buffer of the correct size, cpdf_toFileMemory (pdf, linearize, makeid, preserve_objstm, generate_objstm,
* make_id, &length) writes it and returns the buffer. The buffer length is compress_objstm, filename):
* filled in &length. """toFileEncryptedExt(pdf, encryption_method, permissions,
*/ permission_length, owner_password, user_password, linearize, makeid,
void *cpdf_toMemory(int, int, int, int *); preserve_objstm, generate_objstm, compress_objstm, filename) WARNING: the
pdf argument will be invalid after this call, and should be discarded."""
/* def decryptPdf(pdf, userpw):
* cpdf_isEncrypted(pdf) returns true if a documented is encrypted, false """decryptPdf(pdf, userpw) attempts to decrypt a PDF using the given user
* otherwise. password."""
*/
int cpdf_isEncrypted(int);
/* def decryptPdfOwner(pdf, ownerpw):
* cpdf_decryptPdf(pdf, userpw) attempts to decrypt a PDF using the given """decryptPdfOwner(pdf, ownerpw) attempts to decrypt a PDF using the given
* user password. The error code is non-zero if the decryption fails. owner password."""
*/
void cpdf_decryptPdf(int, const char[]);
/* def hasPermission(pdf, perm):
* cpdf_decryptPdfOwner(pdf, ownerpw) attempts to decrypt a PDF using the """hasPermission(pdf, permission) returns True if the given permission
* given owner password. The error code is non-zero if the decryption fails. (restriction) is present."""
*/
void cpdf_decryptPdfOwner(int, const char[]);
/*
* File permissions. These are inverted, in the sense that the presence of
* one of them indicates a restriction.
*/
enum cpdf_permission {
cpdf_noEdit, /* Cannot edit the document */
cpdf_noPrint, /* Cannot print the document */
cpdf_noCopy, /* Cannot copy the document */
cpdf_noAnnot, /* Cannot annotate the document */
cpdf_noForms, /* Cannot edit forms in the document */
cpdf_noExtract, /* Cannot extract information */
cpdf_noAssemble, /* Cannot assemble into a bigger document */
cpdf_noHqPrint /* Cannot print high quality */
};
/*
* Encryption methods. Suffixes 'false' and 'true' indicates lack of or
* presence of encryption for XMP metadata streams.
*/
enum cpdf_encryptionMethod {
cpdf_pdf40bit, /* 40 bit RC4 encryption */
cpdf_pdf128bit, /* 128 bit RC4 encryption */
cpdf_aes128bitfalse, /* 128 bit AES encryption, do not encrypt
* metadata. */
cpdf_aes128bittrue, /* 128 bit AES encryption, encrypt metadata */
cpdf_aes256bitfalse, /* Deprecated. Do not use for new files */
cpdf_aes256bittrue, /* Deprecated. Do not use for new files */
cpdf_aes256bitisofalse, /* 256 bit AES encryption, do not encrypt
* metadata. */
cpdf_aes256bitisotrue /* 256 bit AES encryption, encrypt metadata */
};
/*
* cpdf_toFileEncrypted(pdf, encryption_method, permissions,
* permission_length, owner_password, user password, linearize, makeid,
* filename) writes a file as encrypted.
*/
void cpdf_toFileEncrypted(int, int, int *, int, const char[], const char[], int,
int, const char[]);
/*
* cpdf_toFileEncryptedExt(pdf, encryption_method, permissions,
* permission_length, owner_password, user_password, linearize, makeid,
* preserve_objstm, generate_objstm, compress_objstm, filename) WARNING: the
* pdf argument will be invalid after this call, and should be discarded.
*/
void cpdf_toFileEncryptedExt(int, int, int *, int, const char[], const char[],
int, int, int, int, int, const char[]);
/*
* cpdf_hasPermission(pdf, permission) returns true if the given permission
* (restriction) is present.
*/
int cpdf_hasPermission(int, enum cpdf_permission);
/*
* cpdf_encryptionKind(pdf) return the encryption method currently in use on
* a document.
*/
enum cpdf_encryptionMethod cpdf_encryptionKind(int);
def encryptionKind(pdf):
"""encryptionMethod(pdf) return the encryption method currently in use on
a document."""