more
This commit is contained in:
parent
c427f79d8b
commit
25eb56fbdf
|
@ -0,0 +1,15 @@
|
||||||
|
"""Pycpdflib: a python interface to cpdf.
|
||||||
|
|
||||||
|
Before using the library, you must load the libpycpdf and libcpdf DLLs. This is
|
||||||
|
achieved with the pycpdflib.loadDLL function, given the filename or full path
|
||||||
|
of the libpycpdf DLL. On Windows, you may have to call os.add_dll_directory
|
||||||
|
first. On MacOS, you may need to give the full path, and you may need to
|
||||||
|
install libcpdf.so in a standard location /usr/local/lib/, or use the
|
||||||
|
install_name_tool command to tell libpycpdf.so where to find libcpdf.so.
|
||||||
|
|
||||||
|
A 'range' is a list of integers specifying page numbers.
|
||||||
|
|
||||||
|
Text arguments and results are in UTF8.
|
||||||
|
|
||||||
|
Any function may raise the exception CPDFError, carrying a string describing
|
||||||
|
the error. """
|
|
@ -0,0 +1,129 @@
|
||||||
|
Loading the libpypcdf and libcpdf DLLs
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
Before using the library, you must load the ``libpycpdf`` and ``libcpdf`` DLLs.
|
||||||
|
This is achieved with the ``pycpdflib.loadDLL`` function, given the filename or
|
||||||
|
full path of the ``libpycpdf`` DLL.
|
||||||
|
|
||||||
|
On Windows, you may have to call ``os.add_dll_directory`` first. On MacOS, you
|
||||||
|
may need to give the full path, and you may need to install ``libcpdf.so`` in a
|
||||||
|
standard location ``/usr/local/lib/``, or use the ``install_name_tool`` command
|
||||||
|
to tell ``libpycpdf.so`` where to find ``libcpdf.so``.
|
||||||
|
|
||||||
|
Conventions
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Any function may raise the exception ``CPDFError``, carrying a string describing
|
||||||
|
the error.
|
||||||
|
|
||||||
|
A 'range' is a list of integers specifying page numbers. Page numbers start at
|
||||||
|
1. Range arguments are called `r`.
|
||||||
|
|
||||||
|
Text arguments and results are in UTF8.
|
||||||
|
|
||||||
|
Units are in PDF points (1/72 inch).
|
||||||
|
|
||||||
|
Angles are in degrees.
|
||||||
|
|
||||||
|
|
||||||
|
Built-in values
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Paper sizes:
|
||||||
|
|
||||||
|
a0portrait a1portrait a2portrait a3portrait a4portrait a5portrait a0landscape
|
||||||
|
a1landscape a2landscape a3landscape a4landscape a5landscape usletterportrait
|
||||||
|
usletterlandscape uslegalportrait uslegallandscape
|
||||||
|
|
||||||
|
Permissions:
|
||||||
|
|
||||||
|
noEdit noPrint noCopy noAnnot noForms noExtract noAssemble noHqPrint
|
||||||
|
|
||||||
|
Encryption methods:
|
||||||
|
|
||||||
|
pdf40bit pdf128bit aes128bitfalse aes128bittrue aes256bitfalse aes256bittrue
|
||||||
|
aes256bitisofalse aes256bitisotrue
|
||||||
|
|
||||||
|
Positions:
|
||||||
|
|
||||||
|
Positions with two numbers in a tuple e.g (posLeft, 10.0, 20.0)
|
||||||
|
|
||||||
|
posCentre posLeft posRight
|
||||||
|
|
||||||
|
Positions with one number in a tuple e.g (top, 5.0)
|
||||||
|
|
||||||
|
top topLeft topRight left bottomLeft bottomRight right
|
||||||
|
|
||||||
|
Positions with no numbers e.g diagonal
|
||||||
|
|
||||||
|
diagonal reverseDiagonal
|
||||||
|
|
||||||
|
Fonts:
|
||||||
|
|
||||||
|
timesRoman timesBold timesItalic timesBoldItalic helvetica helveticaBold
|
||||||
|
helveticaOblique helveticaBoldOblique courier courierBold courierOblique
|
||||||
|
courierBoldOblique
|
||||||
|
|
||||||
|
Justification:
|
||||||
|
|
||||||
|
leftJustify centreJustify rightJustify
|
||||||
|
|
||||||
|
Page layouts:
|
||||||
|
|
||||||
|
singlePage oneColumn twoColumnLeft twoColumnRight twoPageLeft twoPageRight
|
||||||
|
|
||||||
|
Page modes:
|
||||||
|
|
||||||
|
useNone useOutlines useThumbs useOC useAttachments
|
||||||
|
|
||||||
|
Page label styles:
|
||||||
|
|
||||||
|
decimalArabic uppercaseRoman lowercaseRoman uppercaseLetters lowercaseLetters
|
||||||
|
|
||||||
|
|
||||||
|
# CHAPTER 0. Preliminaries
|
||||||
|
|
||||||
|
class Pdf:
|
||||||
|
"""The type of PDF documents."""
|
||||||
|
|
||||||
|
def loadDLL(f):
|
||||||
|
"""Load the libpycpdf DLL from a given file, and set up pycpdflib. Must be
|
||||||
|
called prior to using any other function in the library."""
|
||||||
|
|
||||||
|
class CPDFError(Exception):
|
||||||
|
"""Any function may raise an exception CPDFError, carrying a string
|
||||||
|
describing what went wrong."""
|
||||||
|
|
||||||
|
def lastError():
|
||||||
|
"""Return the last error. Not usually used directly, since pycpdflib
|
||||||
|
functions raise exceptions."""
|
||||||
|
|
||||||
|
def lastErrorString():
|
||||||
|
"""Return the last error string. Not usually used directly, since pycpdflib
|
||||||
|
functions raise exceptions."""
|
||||||
|
|
||||||
|
def checkerror():
|
||||||
|
"""Raise an exception if the last function call resulted in an error. Not
|
||||||
|
used directly, since pycpdflib functions will raise the exception
|
||||||
|
directly."""
|
||||||
|
|
||||||
|
def version():
|
||||||
|
"""Return the version number of the pycpdflib library."""
|
||||||
|
|
||||||
|
def setFast():
|
||||||
|
""" Set fast mode. Some operations have a fast mode. The default is 'slow'
|
||||||
|
mode, which works even on old-fashioned files. For more details, see
|
||||||
|
section 1.13 of the CPDF manual. This function sets the mode globally. """
|
||||||
|
|
||||||
|
def setSlow():
|
||||||
|
""" Set slow mode. Some operations have a fast mode. The default is 'slow'
|
||||||
|
mode, which works even on old-fashioned files. For more details, see
|
||||||
|
section 1.13 of the CPDF manual. This function sets the mode globally. """
|
||||||
|
|
||||||
|
def clearError():
|
||||||
|
""" Clear the current error state. """
|
||||||
|
|
||||||
|
def onExit():
|
||||||
|
""" A debug function which prints some information about
|
||||||
|
resource usage. This can be used to detect if PDFs or ranges are being
|
||||||
|
deallocated properly."""
|
|
@ -0,0 +1,174 @@
|
||||||
|
# CHAPTER 1. Basics
|
||||||
|
|
||||||
|
def fromFile(filename, userpw):
|
||||||
|
""" Load a PDF file from a given file.
|
||||||
|
Supply a user password (possibly blank) in case the file is encypted. It
|
||||||
|
won't be decrypted, but sometimes the password is needed just to load the
|
||||||
|
file."""
|
||||||
|
|
||||||
|
def fromFileLazy(filename, userpw):
|
||||||
|
""" Loads a PDF from a file, doing only
|
||||||
|
minimal parsing. The objects will be read and parsed when they are actually
|
||||||
|
needed. Use this when the whole file won't be required. Also supply a user
|
||||||
|
password (possibly blank) in case the file is encypted. It won't be
|
||||||
|
decrypted, but sometimes the password is needed just to load the file."""
|
||||||
|
|
||||||
|
def fromMemory(data, userpw):
|
||||||
|
""" Load a file from a byte array and the user password (blank if none)."""
|
||||||
|
|
||||||
|
def fromMemoryLazy(data, userpw):
|
||||||
|
""" Load a file from from a byte array and the user password (blank if
|
||||||
|
none), but lazily like fromFileLazy."""
|
||||||
|
|
||||||
|
def blankDocument(w, h, pages):
|
||||||
|
""" Create a blank document
|
||||||
|
with pages of the given width (in points), height (in points), and number
|
||||||
|
of pages."""
|
||||||
|
|
||||||
|
def blankDocumentPaper(papersize, pages):
|
||||||
|
"""Create a blank document with pages of the given paper size, and number
|
||||||
|
of pages. """
|
||||||
|
|
||||||
|
def ptOfCm(i):
|
||||||
|
"""Convert a figure in centimetres to points (72 points to 1 inch)."""
|
||||||
|
|
||||||
|
def ptOfMm(i):
|
||||||
|
"""Convert a figure in millimetres to points (72 points to 1 inch)."""
|
||||||
|
|
||||||
|
def ptOfIn(i):
|
||||||
|
"""Convert a figure in inches to points (72 points to 1 inch)."""
|
||||||
|
|
||||||
|
def cmOfPt(i):
|
||||||
|
"""Convert a figure in points to centimetres (72 points to 1 inch)."""
|
||||||
|
|
||||||
|
def mmOfPt(i):
|
||||||
|
"""Convert a figure in points to millimetres (72 points to 1 inch)."""
|
||||||
|
|
||||||
|
def inOfPt(i):
|
||||||
|
"""Convert a figure in points to inches (72 points to 1 inch)."""
|
||||||
|
|
||||||
|
def parsePagespec(pdf, pagespec):
|
||||||
|
"""Parse a page specification such as "1-3,8-end" to a range with reference to
|
||||||
|
a given PDF (the PDF is supplied so that page ranges which reference pages
|
||||||
|
which do not exist are rejected)."""
|
||||||
|
|
||||||
|
def validatePagespec(pagespec):
|
||||||
|
"""Validate a page specification, returning True or False, so far as is
|
||||||
|
possible in the absence of the actual document."""
|
||||||
|
|
||||||
|
def stringOfPagespec(pdf, r):
|
||||||
|
"""Build a page specification from a page
|
||||||
|
range. For example, the range containing 1,2,3,6,7,8 in a document of 8
|
||||||
|
pages might yield "1-3,6-end" """
|
||||||
|
|
||||||
|
def blankRange():
|
||||||
|
"""Create a range with no pages in."""
|
||||||
|
|
||||||
|
def pageRange(f, t):
|
||||||
|
""" Nuild a range from one page to another inclusive.
|
||||||
|
For example, pageRange(3,7) gives the range 3,4,5,6,7. """
|
||||||
|
|
||||||
|
def all(pdf):
|
||||||
|
"""The range containing all the pages in a given document."""
|
||||||
|
|
||||||
|
def even(r):
|
||||||
|
"""A range which contains just the even pages of another
|
||||||
|
range."""
|
||||||
|
|
||||||
|
def odd(r):
|
||||||
|
"""A range which contains just the odd pages of another
|
||||||
|
range."""
|
||||||
|
|
||||||
|
def rangeUnion(a, b):
|
||||||
|
"""The union of two ranges giving a range containing
|
||||||
|
the pages in range a and range b."""
|
||||||
|
|
||||||
|
def difference(a, b):
|
||||||
|
"""The difference of two ranges, giving a range
|
||||||
|
containing all the pages in a except for those which are also in b."""
|
||||||
|
|
||||||
|
def removeDuplicates(r):
|
||||||
|
"""Deduplicates a range, returning a new one."""
|
||||||
|
|
||||||
|
def rangeLength(r):
|
||||||
|
"""The number of pages in a range."""
|
||||||
|
|
||||||
|
def rangeGet(r, n):
|
||||||
|
"""Get the page number at position n in a range, where
|
||||||
|
|
||||||
|
def rangeAdd(r, p):
|
||||||
|
"""Add the page to a range, if it is not already
|
||||||
|
there."""
|
||||||
|
|
||||||
|
def isInRange(r, p):
|
||||||
|
"""Returns True if the page p is in the range r, False otherwise."""
|
||||||
|
|
||||||
|
def pages(pdf):
|
||||||
|
"""Return the number of pages in a PDF."""
|
||||||
|
r = libc.pycpdf_pages(pdf.pdf)
|
||||||
|
checkerror()
|
||||||
|
return r
|
||||||
|
|
||||||
|
def pagesFast(userpw, filename):
|
||||||
|
"""Return the number of pages in a given
|
||||||
|
PDF, with given user password. It tries to do this as fast as
|
||||||
|
possible, without loading the whole file."""
|
||||||
|
|
||||||
|
def toFile(pdf, filename, linearize, make_id):
|
||||||
|
"""Write the file to a given filename. If linearize is True, it will be
|
||||||
|
linearized, if supported by libcpdf. If make_id is True, it will be given a
|
||||||
|
new ID."""
|
||||||
|
|
||||||
|
def toFileExt(pdf, filename, linearize, make_id, preserve_objstm,
|
||||||
|
generate_objstm, compress_objstm):
|
||||||
|
"""Write the file to a given filename. If linearize is True, it will be
|
||||||
|
linearized, if supported by libcpdf. If make_id is True, it will be given a
|
||||||
|
new ID. If preserve_objstm is True, existing object streams will be
|
||||||
|
preserved. If generate_objstm is True, object streams will be generated
|
||||||
|
even if not originally present. If compress_objstm is True, object streams
|
||||||
|
will be compressed (what we usually want). WARNING: the pdf argument will
|
||||||
|
be invalid after this call and should not be used again."""
|
||||||
|
|
||||||
|
|
||||||
|
def toMemory(pdf, linearize, make_id):
|
||||||
|
"""Write a file to memory, returning the buffer as a byte array of type
|
||||||
|
bytes."""
|
||||||
|
|
||||||
|
def isEncrypted(pdf):
|
||||||
|
"""Returns True if a documented is encrypted, False otherwise."""
|
||||||
|
r = libc.pycpdf_isEncrypted(pdf.pdf)
|
||||||
|
checkerror()
|
||||||
|
return r
|
||||||
|
|
||||||
|
def toFileEncrypted(pdf, method, permissions, ownerpw, userpw, linearize,
|
||||||
|
makeid, filename):
|
||||||
|
"""Write the file to a given filename encrypted with the given encryption
|
||||||
|
method, permissions list, and owener and user passwords. If linearize is
|
||||||
|
True, it will be linearized, if supported by libcpdf. If make_id is True,
|
||||||
|
it will be given a new ID."""
|
||||||
|
|
||||||
|
def toFileEncryptedExt(pdf, method, permissions, ownerpw, userpw, linearize,
|
||||||
|
makeid, preserve_objstm, generate_objstm,
|
||||||
|
compress_objstm, filename):
|
||||||
|
"""Write the file to a given filename encrypted with the given encryption
|
||||||
|
method, permissions list, and owener and user passwords. If linearize is
|
||||||
|
True, it will be linearized, if supported by libcpdf. If make_id is True,
|
||||||
|
it will be given a new ID. If preserve_objstm is True, existing object
|
||||||
|
streams will be preserved. If generate_objstm is True, object streams will
|
||||||
|
be generated even if not originally present. If compress_objstm is True,
|
||||||
|
object streams will be compressed (what we usually want). WARNING: the pdf
|
||||||
|
argument will be invalid after this call and should not be used again."""
|
||||||
|
|
||||||
|
def decryptPdf(pdf, userpw):
|
||||||
|
"""Attempts to decrypt a PDF using the given user password. An exception is
|
||||||
|
raised in the event of a bad password."""
|
||||||
|
|
||||||
|
def decryptPdfOwner(pdf, ownerpw):
|
||||||
|
"""Attempts to decrypt a PDF using the given owner password. An exception
|
||||||
|
is raised in the event of a bad password."""
|
||||||
|
|
||||||
|
def hasPermission(pdf, perm):
|
||||||
|
"""Returns True if the given permission (restriction) is present."""
|
||||||
|
|
||||||
|
def encryptionKind(pdf):
|
||||||
|
"""Return the encryption method currently in use on a document."""
|
|
@ -0,0 +1,20 @@
|
||||||
|
# CHAPTER 2. Merging and Splitting
|
||||||
|
|
||||||
|
def mergeSimple(pdfs):
|
||||||
|
"""Given a list of PDFs, merges the documents into a new PDF, which is
|
||||||
|
returned."""
|
||||||
|
|
||||||
|
def merge(pdfs, retain_numbering, remove_duplicate_fonts):
|
||||||
|
"""Merges the list of PDFs. If retain_numbering is True page labels are not
|
||||||
|
rewritten. If remove_duplicate_fonts is True, duplicate fonts are merged.
|
||||||
|
This is useful when the source documents for merging originate from the
|
||||||
|
same source."""
|
||||||
|
|
||||||
|
def mergeSame(pdfs, retain_numbering, remove_duplicate_fonts, ranges):
|
||||||
|
"""The same as merge, except that it has an additional argument
|
||||||
|
- a list of page ranges. This is used to select the pages to pick from
|
||||||
|
each PDF. This avoids duplication of information when multiple discrete
|
||||||
|
parts of a single source PDF are included."""
|
||||||
|
|
||||||
|
def selectPages(pdf, r):
|
||||||
|
""" Returns a new document which just those pages in the page range."""
|
|
@ -0,0 +1,70 @@
|
||||||
|
# CHAPTER 3. Pages
|
||||||
|
|
||||||
|
def scalePages(pdf, r, sx, sy):
|
||||||
|
"""Scale the page dimensions and content of the given range of pages by
|
||||||
|
the given scale (sx, sy), about (0, 0). Other boxes (crop etc. are altered
|
||||||
|
as appropriate)."""
|
||||||
|
|
||||||
|
def scaleToFit(pdf, r, w, h, scale_to_fit_scale):
|
||||||
|
"""Scales the pages in the range to fit new page dimensions (w and h)
|
||||||
|
multiplied by scale_to_fit_scale (typically 1.0). Other boxes (crop etc.)
|
||||||
|
are altered as appropriate."""
|
||||||
|
|
||||||
|
def scaleToFitPaper(pdf, r, papersize, scale_to_fit_scale):
|
||||||
|
"""Scales the given pages to fit the given page size, possibly multiplied
|
||||||
|
by scale_to_fit_scale (typically 1.0)"""
|
||||||
|
|
||||||
|
def scaleContents(pdf, r, pos, scale):
|
||||||
|
"""Scales the contents of the pages in the range about the point given by
|
||||||
|
the position, by the scale given."""
|
||||||
|
|
||||||
|
def shiftContents(pdf, r, dx, dy):
|
||||||
|
"""Shift the content of the pages in the range by (dx, dy)."""
|
||||||
|
|
||||||
|
def rotate(pdf, r, rotation):
|
||||||
|
"""Change the viewing rotation of the pages in the range to an
|
||||||
|
absolute value. Appropriate rotations are 0, 90, 180, 270."""
|
||||||
|
|
||||||
|
def rotateBy(pdf, r, rotation):
|
||||||
|
"""Change the viewing rotation of the pages in the range by a
|
||||||
|
given number of degrees. Appropriate values are 90, 180, 270."""
|
||||||
|
|
||||||
|
def rotateContents(pdf, r, rotation):
|
||||||
|
"""Rotate the content about the centre
|
||||||
|
of the page by the given number of degrees, in a clockwise direction."""
|
||||||
|
|
||||||
|
def upright(pdf, r):
|
||||||
|
"""Change the viewing rotation of the pages in the range, counter-rotating
|
||||||
|
the dimensions and content such that there is no visual change."""
|
||||||
|
|
||||||
|
def hFlip(pdf, r):
|
||||||
|
"""Flip horizontally the pages in the range."""
|
||||||
|
|
||||||
|
def vFlip(pdf, r):
|
||||||
|
"""Flip vertically the pages in the range."""
|
||||||
|
|
||||||
|
def crop(pdf, r, x, y, w, h):
|
||||||
|
"""Crop a page to the box defined by (x, y, w, h), replacing any existing
|
||||||
|
crop box."""
|
||||||
|
|
||||||
|
def removeCrop(pdf, r):
|
||||||
|
"""Remove any crop box from pages in the range."""
|
||||||
|
|
||||||
|
def removeTrim(pdf, r):
|
||||||
|
"""Remove any trim box from pages in the range."""
|
||||||
|
|
||||||
|
def removeArt(pdf, r):
|
||||||
|
"""Remove any art box from pages in the range."""
|
||||||
|
|
||||||
|
def removeBleed(pdf, r):
|
||||||
|
"""Remove any bleed box from pages in the range."""
|
||||||
|
|
||||||
|
def trimMarks(pdf, r):
|
||||||
|
"""Add trim marks to the given pages, if the trimbox exists."""
|
||||||
|
|
||||||
|
def showBoxes(pdf, r):
|
||||||
|
"""Show the boxes on the given pages, for debug."""
|
||||||
|
|
||||||
|
def hardBox(pdf, r, boxname):
|
||||||
|
"""Make a given box a 'hard box' i.e clip it explicitly. Boxname could be,
|
||||||
|
for example "/TrimBox"."""
|
|
@ -0,0 +1,3 @@
|
||||||
|
# CHAPTER 4. Encryption
|
||||||
|
|
||||||
|
# Encryption covered under Chapter 1 in pycpdflib
|
|
@ -0,0 +1,13 @@
|
||||||
|
# CHAPTER 5. Compression
|
||||||
|
|
||||||
|
def compress(pdf):
|
||||||
|
"""Compress any uncompressed streams in the given PDF using the Flate
|
||||||
|
algorithm."""
|
||||||
|
|
||||||
|
def decompress(pdf):
|
||||||
|
"""Decompress any streams in the given PDF, so long as the compression
|
||||||
|
method is supported."""
|
||||||
|
|
||||||
|
def squeezeInMemory(pdf):
|
||||||
|
"""squeezeToMemory(pdf) squeezes a pdf in memory. Squeezing is a lossless
|
||||||
|
compression method which works be rearrangement of a PDFs internal
|
|
@ -0,0 +1,9 @@
|
||||||
|
# CHAPTER 6. Bookmarks
|
||||||
|
|
||||||
|
def getBookmarks(pdf):
|
||||||
|
"""Get the bookmarks for a PDF as a list of tuples of the form:
|
||||||
|
(level : int, page : int, text : string, openstatus : bool)"""
|
||||||
|
|
||||||
|
def setBookmarks(pdf, marks):
|
||||||
|
"""Set the bookmarks for a PDF as a list of tuples of the form:
|
||||||
|
(level : int, page : int, text : string, openstatus : bool)"""
|
|
@ -0,0 +1,3 @@
|
||||||
|
# CHAPTER 7. Presentations
|
||||||
|
|
||||||
|
# Not included in the library version
|
|
@ -0,0 +1,110 @@
|
||||||
|
# CHAPTER 8. Logos, Watermarks and Stamps
|
||||||
|
|
||||||
|
def stampOn(pdf, pdf2, r):
|
||||||
|
"""Stamps pdf on top of all the pages in pdf2 which are in the range. The
|
||||||
|
stamp is placed with its origin at the origin of the target document."""
|
||||||
|
|
||||||
|
def stampUnder(pdf, pdf2, r):
|
||||||
|
"""Stamps pdf under under all the pages in pdf2 which are in the range. The
|
||||||
|
stamp is placed with its origin at the origin of the target document."""
|
||||||
|
|
||||||
|
def stampExtended(pdf, pdf2, r, isover, scale_stamp_to_fit, pos,
|
||||||
|
relative_to_cropbox):
|
||||||
|
"""A stamping function with extra features:
|
||||||
|
|
||||||
|
- isover True, pdf goes over pdf2, isover False, pdf goes under pdf2
|
||||||
|
- scale_stamp_to_fit scales the stamp to fit the page
|
||||||
|
- pos gives the position to put the stamp
|
||||||
|
- relative_to_cropbox: if True, pos is relative to crop box not media box"""
|
||||||
|
|
||||||
|
def combinePages(pdf, pdf2):
|
||||||
|
"""Combines the PDFs page-by-page, putting each page of pdf2 over each page
|
||||||
|
of pdf."""
|
||||||
|
|
||||||
|
def addText(metrics, pdf, r, text, p, line_spacing, bates, font, size, red,
|
||||||
|
green, blue, underneath, relative_to_cropbox, outline, opacity,
|
||||||
|
justification, midline, topline, filename, line_width,
|
||||||
|
embed_fonts):
|
||||||
|
"""Adding text. Adds text to a PDF, if the characters exist in the font.
|
||||||
|
|
||||||
|
* metrics: If True, don't actually add text but collect metrics.
|
||||||
|
* pdf: Document
|
||||||
|
* r: Page Range
|
||||||
|
* text: The text to add
|
||||||
|
* p: Position to add text at
|
||||||
|
* line_spacing: Linespacing, 1.0 = normal
|
||||||
|
* bates: Starting Bates number
|
||||||
|
* font: Font
|
||||||
|
* size: Font size in points
|
||||||
|
* red: Red component of colour, 0.0 - 1.0
|
||||||
|
* green: Green component of colour, 0.0 - 1.0
|
||||||
|
* blue: Blue component of colour, 0.0 - 1.0
|
||||||
|
* underneath: If True, text is added underneath rather than on top
|
||||||
|
* relative_to_cropbox: If True, position is relative to crop box not
|
||||||
|
media box
|
||||||
|
* outline: If True, text is outline rather than filled
|
||||||
|
* opacity: Opacity, 1.0 = opaque, 0.0 = wholly transparent
|
||||||
|
* justification: Justification
|
||||||
|
* midline: If True, position is relative to midline of text, not
|
||||||
|
baseline
|
||||||
|
* topline: If True, position is relative to topline of text, not
|
||||||
|
baseline
|
||||||
|
* filename: filename that this document was read from (optional)
|
||||||
|
* line_width: line width
|
||||||
|
* embed_fonts: embed fonts
|
||||||
|
|
||||||
|
Special codes
|
||||||
|
|
||||||
|
* %Page Page number in arabic notation (1, 2, 3...)
|
||||||
|
* %roman Page number in lower-case roman notation (i, ii, iii...)
|
||||||
|
* %Roman Page number in upper-case roman notation (I, II, III...)
|
||||||
|
* %EndPage Last page of document in arabic notation
|
||||||
|
* %Label The page label of the page
|
||||||
|
* %EndLabel The page label of the last page
|
||||||
|
* %filename The full file name of the input document
|
||||||
|
* %a Abbreviated weekday name (Sun, Mon etc.)
|
||||||
|
* %A Full weekday name (Sunday, Monday etc.)
|
||||||
|
* %b Abbreviated month name (Jan, Feb etc.)
|
||||||
|
* %B Full month name (January, February etc.)
|
||||||
|
* %d Day of the month (01-31)
|
||||||
|
* %e Day of the month (1-31)
|
||||||
|
* %H Hour in 24-hour clock (00-23)
|
||||||
|
* %I Hour in 12-hour clock (01-12)
|
||||||
|
* %j Day of the year (001-366)
|
||||||
|
* %m Month of the year (01-12)
|
||||||
|
* %M Minute of the hour (00-59)
|
||||||
|
* %p "a.m" or "p.m"
|
||||||
|
* %S Second of the minute (00-61)
|
||||||
|
* %T Same as %H:%M:%S
|
||||||
|
* %u Weekday (1-7, 1 = Monday)
|
||||||
|
* %w Weekday (0-6, 0 = Monday)
|
||||||
|
* %Y Year (0000-9999)
|
||||||
|
* %% The % character"""
|
||||||
|
|
||||||
|
def addTextSimple(pdf, r, text, p, font, size):
|
||||||
|
"""like addText, but with most parameters default
|
||||||
|
|
||||||
|
* pdf = the document
|
||||||
|
* r = the range
|
||||||
|
* text = the text
|
||||||
|
* p = the position
|
||||||
|
* font = the font
|
||||||
|
* size = the font size"""
|
||||||
|
|
||||||
|
def removeText(pdf, r):
|
||||||
|
"""Remove any text added by libcpdf from the given pages."""
|
||||||
|
r = range_of_list(r)
|
||||||
|
|
||||||
|
def textWidth(font, string):
|
||||||
|
"""Return the width of a given string in the given font in thousandths of a
|
||||||
|
point."""
|
||||||
|
|
||||||
|
def addContent(content, before, pdf, r):
|
||||||
|
"""Add page content before (if True) or after (if False) the existing
|
||||||
|
content to pages in the given range in the given PDF. Warning: this a low
|
||||||
|
level function requiring understanding of the PDF format."""
|
||||||
|
|
||||||
|
def stampAsXObject(pdf, r, stamp_pdf):
|
||||||
|
"""Stamps stamp_pdf onto the pages in the given range in pdf as a shared
|
||||||
|
Form XObject. The name of the newly-created XObject is returned, for use
|
||||||
|
with addContent. """
|
|
@ -0,0 +1,26 @@
|
||||||
|
# CHAPTER 9. Mulitpage facilities
|
||||||
|
|
||||||
|
def twoUp(pdf):
|
||||||
|
"""Impose a document two up by retaining the existing page
|
||||||
|
size, scaling pages down."""
|
||||||
|
|
||||||
|
def twoUpStack(pdf):
|
||||||
|
"""Impose a document two up by doubling the page size,
|
||||||
|
to fit two pages on one."""
|
||||||
|
|
||||||
|
def padBefore(pdf, r):
|
||||||
|
"""Adds a blank page before each page in the given range."""
|
||||||
|
|
||||||
|
def padAfter(pdf, r):
|
||||||
|
"""Adds a blank page after each page in the given range."""
|
||||||
|
|
||||||
|
def padEvery(pdf, n):
|
||||||
|
"""Adds a blank page after every n pages."""
|
||||||
|
|
||||||
|
def padMultiple(pdf, n):
|
||||||
|
"""Adds pages at the end to pad the file to a multiple of n pages in
|
||||||
|
length."""
|
||||||
|
|
||||||
|
def padMultipleBefore(pdf, n):
|
||||||
|
"""Adds pages at the beginning to pad the file to a
|
||||||
|
multiple of n pages in length."""
|
|
@ -0,0 +1,3 @@
|
||||||
|
# CHAPTER 10. Annotations
|
||||||
|
|
||||||
|
# Not in the library version.
|
|
@ -0,0 +1,265 @@
|
||||||
|
# CHAPTER 11. Document Information and Metadata
|
||||||
|
|
||||||
|
def isLinearized(filename):
|
||||||
|
"""Finds out if a document is linearized as quickly
|
||||||
|
as possible without loading it."""
|
||||||
|
|
||||||
|
def getVersion(pdf):
|
||||||
|
"""Return the minor version number of a document."""
|
||||||
|
|
||||||
|
def getMajorVersion(pdf):
|
||||||
|
"""Return the minor version number of a document."""
|
||||||
|
|
||||||
|
def getTitle(pdf):
|
||||||
|
"""Return the title of a document."""
|
||||||
|
|
||||||
|
def getAuthor(pdf):
|
||||||
|
"""Return the subject of a document."""
|
||||||
|
|
||||||
|
def getSubject(pdf):
|
||||||
|
"""Return the subject of a document."""
|
||||||
|
|
||||||
|
def getKeywords(pdf):
|
||||||
|
"""Return the keywords of a document."""
|
||||||
|
|
||||||
|
def getCreator(pdf):
|
||||||
|
"""Return the creator of a document."""
|
||||||
|
|
||||||
|
def getProducer(pdf):
|
||||||
|
"""Return the producer of a document."""
|
||||||
|
|
||||||
|
def getCreationDate(pdf):
|
||||||
|
"""Return the creation date of a document."""
|
||||||
|
|
||||||
|
def getModificationDate(pdf):
|
||||||
|
"""Return the modification date of a document."""
|
||||||
|
|
||||||
|
def getTitleXMP(pdf):
|
||||||
|
"""Return the XMP title of a document."""
|
||||||
|
|
||||||
|
def getAuthorXMP(pdf):
|
||||||
|
"""Return the XMP author of a document."""
|
||||||
|
|
||||||
|
def getSubjectXMP(pdf):
|
||||||
|
"""Return the XMP subject of a document."""
|
||||||
|
|
||||||
|
def getKeywordsXMP(pdf):
|
||||||
|
"""Return the XMP keywords of a document."""
|
||||||
|
|
||||||
|
def getCreatorXMP(pdf):
|
||||||
|
"""Returs the XMP creator of a document."""
|
||||||
|
|
||||||
|
def getProducerXMP(pdf):
|
||||||
|
"""Return the XMP producer of a document."""
|
||||||
|
|
||||||
|
def getCreationDateXMP(pdf):
|
||||||
|
"""Return the XMP creation date of a document."""
|
||||||
|
|
||||||
|
def getModificationDateXMP(pdf):
|
||||||
|
"""Return the XMP modification date of a document."""
|
||||||
|
|
||||||
|
def setTitle(pdf, s):
|
||||||
|
"""Set the title of a document."""
|
||||||
|
|
||||||
|
def setAuthor(pdf, s):
|
||||||
|
"""Set the author of a document."""
|
||||||
|
|
||||||
|
def setSubject(pdf, s):
|
||||||
|
"""Set the subject of a document."""
|
||||||
|
|
||||||
|
def setKeywords(pdf, s):
|
||||||
|
"""Set the keywords of a document."""
|
||||||
|
|
||||||
|
def setCreator(pdf, s):
|
||||||
|
"""Set the creator of a document."""
|
||||||
|
|
||||||
|
def setProducer(pdf, s):
|
||||||
|
"""Set the producer of a document."""
|
||||||
|
|
||||||
|
def setCreationDate(pdf, s):
|
||||||
|
"""Set the creation date of a document."""
|
||||||
|
|
||||||
|
def setModificationDate(pdf, s):
|
||||||
|
"""Set the modifcation date of a document."""
|
||||||
|
|
||||||
|
def setTitleXMP(pdf, s):
|
||||||
|
"""Set the XMP title of a document."""
|
||||||
|
|
||||||
|
def setAuthorXMP(pdf, s):
|
||||||
|
"""Set the XMP author of a document."""
|
||||||
|
|
||||||
|
def setSubjectXMP(pdf, s):
|
||||||
|
"""Set the XMP subject of a document."""
|
||||||
|
|
||||||
|
def setKeywordsXMP(pdf, s):
|
||||||
|
"""Set the XMP keywords of a document."""
|
||||||
|
|
||||||
|
def setCreatorXMP(pdf, s):
|
||||||
|
"""Set the XMP creator of a document."""
|
||||||
|
|
||||||
|
def setProducerXMP(pdf, s):
|
||||||
|
"""Set the XMP producer of a document."""
|
||||||
|
|
||||||
|
def setCreationDateXMP(pdf, s):
|
||||||
|
"""Set the XMP creation date of a document."""
|
||||||
|
|
||||||
|
def setModificationDateXMP(pdf, s):
|
||||||
|
"""Set the XMP modification date of a document."""
|
||||||
|
|
||||||
|
def getDateComponents(string):
|
||||||
|
"""Return the components (year, month, day, hour, minute, second,
|
||||||
|
hour_offset, minute_offset) from a PDF date string.
|
||||||
|
|
||||||
|
Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds
|
||||||
|
(0-59), hour_offset is the offset from UT in hours (-23 to 23);
|
||||||
|
minute_offset is the offset from UT in minutes (-59 to 59)."""
|
||||||
|
|
||||||
|
def dateStringOfComponents(cs):
|
||||||
|
"""Build a PDF date string a (year, month, day, hour, minute, second,
|
||||||
|
hour_offset, minute_offset) tuple.
|
||||||
|
|
||||||
|
Dates: Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds
|
||||||
|
(0-59), hour_offset is the offset from UT in hours (-23 to 23);
|
||||||
|
minute_offset is the offset from UT in minutes (-59 to 59)."""
|
||||||
|
|
||||||
|
def getPageRotation(pdf, pagenumber):
|
||||||
|
"""Get the viewing rotation for a given page."""
|
||||||
|
|
||||||
|
def hasBox(pdf, pagenumber, boxname):
|
||||||
|
"""Returns True, if the page has the given box. E.g "/CropBox" """
|
||||||
|
|
||||||
|
def getMediaBox(pdf, pagenumber):
|
||||||
|
"""Get a mediabox box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
||||||
|
hasBox"""
|
||||||
|
|
||||||
|
def getCropBox(pdf, pagenumber):
|
||||||
|
"""Get a crop box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
||||||
|
hasBox"""
|
||||||
|
|
||||||
|
def getTrimBox(pdf, pagenumber):
|
||||||
|
"""Get a trim box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
||||||
|
hasBox"""
|
||||||
|
|
||||||
|
def getArtBox(pdf, pagenumber):
|
||||||
|
"""Get an art box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
||||||
|
hasBox"""
|
||||||
|
|
||||||
|
def getBleedBox(pdf, pagenumber):
|
||||||
|
"""Get a bleed box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
||||||
|
hasBox"""
|
||||||
|
|
||||||
|
def setMediaBox(pdf, r, minx, maxx, miny, maxy):
|
||||||
|
"""Set the media box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points."""
|
||||||
|
|
||||||
|
def setCropBox(pdf, r, minx, maxx, miny, maxy):
|
||||||
|
"""Set the crop box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points."""
|
||||||
|
|
||||||
|
def setTrimBox(pdf, r, minx, maxx, miny, maxy):
|
||||||
|
"""Set the trim box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points."""
|
||||||
|
|
||||||
|
def setArtBox(pdf, r, minx, maxx, miny, maxy):
|
||||||
|
"""Set the art box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points."""
|
||||||
|
|
||||||
|
def setBleedBox(pdf, r, minx, maxx, miny, maxy):
|
||||||
|
"""Set the bleed box given the document, page range, min x, max x,
|
||||||
|
min y, max y in points."""
|
||||||
|
|
||||||
|
def markTrapped(pdf):
|
||||||
|
"""Mark a document as trapped."""
|
||||||
|
|
||||||
|
def markUntrapped(pdf):
|
||||||
|
"""Mark a document as untrapped."""
|
||||||
|
|
||||||
|
def markTrappedXMP(pdf):
|
||||||
|
"""Mark a document as trapped in XMP metadata."""
|
||||||
|
|
||||||
|
def markUntrappedXMP(pdf):
|
||||||
|
"""Mark a document as untrapped in XMP metadata."""
|
||||||
|
|
||||||
|
def setPageLayout(pdf, layout):
|
||||||
|
"""Set the page layout for a document."""
|
||||||
|
|
||||||
|
def setPageMode(pdf, mode):
|
||||||
|
"""Set the page mode for a document."""
|
||||||
|
|
||||||
|
def hideToolbar(pdf, flag):
|
||||||
|
"""Sets the hide toolbar flag."""
|
||||||
|
|
||||||
|
def hideMenubar(pdf, flag):
|
||||||
|
"""Set the hide menu bar flag."""
|
||||||
|
|
||||||
|
def hideWindowUi(pdf, flag):
|
||||||
|
"""Set the hide window UI flag."""
|
||||||
|
|
||||||
|
def fitWindow(pdf, flag):
|
||||||
|
"""Set the fit window flag."""
|
||||||
|
|
||||||
|
def centerWindow(pdf, flag):
|
||||||
|
"""Set the center window flag."""
|
||||||
|
|
||||||
|
def displayDocTitle(pdf, flag):
|
||||||
|
"""Set the display document title flag."""
|
||||||
|
|
||||||
|
def openAtPage(pdf, fitflag, pagenumber):
|
||||||
|
"""Set the PDF to open, possibly with zoom-to-fit, at the given page
|
||||||
|
number. """
|
||||||
|
|
||||||
|
def setMetadataFromFile(pdf, filename):
|
||||||
|
"""Set the XMP metadata of a document, given a file name."""
|
||||||
|
|
||||||
|
def setMetadataFromByteArray(pdf, data):
|
||||||
|
"""Set the XMP metadata from an array of bytes."""
|
||||||
|
|
||||||
|
def getMetadata(pdf):
|
||||||
|
"""Return the XMP metadata as a byte array of type bytes"""
|
||||||
|
|
||||||
|
def removeMetadata(pdf):
|
||||||
|
"""Remove the XMP metadata from a document"""
|
||||||
|
|
||||||
|
def createMetadata(pdf):
|
||||||
|
"""Builds fresh XMP metadata as good as possible from existing
|
||||||
|
metadata in the document."""
|
||||||
|
|
||||||
|
def setMetadataDate(pdf, date):
|
||||||
|
"""Set the metadata date for a PDF. The date is given in PDF date format --
|
||||||
|
cpdf will convert it to XMP format. The date 'now' means now."""
|
||||||
|
|
||||||
|
def getPageLabels(pdf):
|
||||||
|
"""Get page labels as a list of tuples (style, prefix, offset, startvalue)
|
||||||
|
|
||||||
|
For example, a document might have five pages of introduction with roman
|
||||||
|
numerals, followed by the rest of the pages in decimal arabic, numbered
|
||||||
|
from one. First label:
|
||||||
|
|
||||||
|
* labelstyle = LowercaseRoman
|
||||||
|
* labelprefix = ""
|
||||||
|
* startpage = 1
|
||||||
|
* startvalue = 1
|
||||||
|
|
||||||
|
Second label:
|
||||||
|
|
||||||
|
* labelstyle = DecimalArabic
|
||||||
|
* labelprefix = ""
|
||||||
|
* startpage = 6
|
||||||
|
* startvalue = 1 """
|
||||||
|
|
||||||
|
def addPageLabels(pdf, label, progress):
|
||||||
|
"""Add one group of page labels from a tuple (style, prefix, offset, range).
|
||||||
|
|
||||||
|
The prefix is prefix text for each label. The range is the page range the
|
||||||
|
labels apply to. Offset can be used to shift the numbering up or down."""
|
||||||
|
|
||||||
|
def removePageLabels(pdf):
|
||||||
|
"""Removes all page labels from the document."""
|
||||||
|
|
||||||
|
def getPageLabelStringForPage(pdf, pagenumber):
|
||||||
|
"""Calculate the full label string for a given page, and return it."""
|
|
@ -0,0 +1,23 @@
|
||||||
|
# CHAPTER 12. File Attachments
|
||||||
|
|
||||||
|
def attachFile(filename, pdf):
|
||||||
|
"""Attach a file to the pdf. It is attached at document level."""
|
||||||
|
|
||||||
|
def attachFileToPage(filename, pdf, pagenumber):
|
||||||
|
"""Attach a file, given its file name, pdf, and the page number to which
|
||||||
|
it should be attached."""
|
||||||
|
|
||||||
|
def attachFileFromMemory(data, filename, pdf):
|
||||||
|
"""Attach a file from a byte array. It is attached at document level."""
|
||||||
|
|
||||||
|
def attachFileToPageFromMemory(data, filename, pdf, pagenumber):
|
||||||
|
"""Attach a file to a given pag from a byte array. It is attached at
|
||||||
|
document level."""
|
||||||
|
|
||||||
|
def removeAttachedFiles(pdf):
|
||||||
|
"""Remove all page- and document-level attachments from a document."""
|
||||||
|
|
||||||
|
def getAttachments(pdf):
|
||||||
|
"""List information about attachements. Returns a list of tuples
|
||||||
|
(name, page number, byte array of data). Page 0 = document-level
|
||||||
|
attachment."""
|
|
@ -0,0 +1,6 @@
|
||||||
|
# CHAPTER 13. Images
|
||||||
|
|
||||||
|
def getImageResolution(pdf, min_required_resolution):
|
||||||
|
"""Return a list of all uses of images in the PDF which do not meet the
|
||||||
|
minimum required resolution in dpi as tuples of:
|
||||||
|
(pagenumber, name, x pixels, y pixels, x resolution, y resolution)"""
|
|
@ -0,0 +1,12 @@
|
||||||
|
# CHAPTER 14. Fonts
|
||||||
|
|
||||||
|
def getFontInfo(pdf):
|
||||||
|
"""Get a list of (pagenumber, fontname, fonttype, fontencoding) tuples,
|
||||||
|
showing each font used on each page."""
|
||||||
|
|
||||||
|
def removeFonts(pdf):
|
||||||
|
"""Remove all font data from a file."""
|
||||||
|
|
||||||
|
def copyFont(pdf, pdf2, r, pagenumber, fontname):
|
||||||
|
"""Copy the given font from the given page in the pdf PDF to every page in
|
||||||
|
the pdf2 PDF. The new font is stored under its font name."""
|
|
@ -0,0 +1,6 @@
|
||||||
|
# CHAPTER 15. PDF and JSON
|
||||||
|
|
||||||
|
def outputJSON(filename, parse_content, no_stream_data, pdf):
|
||||||
|
"""Output a PDF in JSON format to the given filename. If parse_content is
|
||||||
|
True, page content is parsed. If no_stream_data is True, all stream data is
|
||||||
|
suppressed entirely."""
|
|
@ -0,0 +1,16 @@
|
||||||
|
# CHAPTER 16. Optional Content Groups
|
||||||
|
|
||||||
|
def getOCGList(pdf):
|
||||||
|
"""Return a list of Optional Content Groups in the given pdf as strings."""
|
||||||
|
|
||||||
|
def OCGRename(pdf, n_from, n_to):
|
||||||
|
"""Rename an optional content group."""
|
||||||
|
|
||||||
|
def OCGOrderAll(pdf):
|
||||||
|
"""Ensure that every optional content group appears in the OCG order list."""
|
||||||
|
|
||||||
|
def OCGCoalesce(pdf):
|
||||||
|
"""Coalesce optional content groups. For example, if we merge or stamp two
|
||||||
|
files both with an OCG called "Layer 1", we will have two different
|
||||||
|
optional content groups. This function will merge the two into a single
|
||||||
|
optional content group."""
|
|
@ -0,0 +1,41 @@
|
||||||
|
# CHAPTER 17. Miscellaneous
|
||||||
|
|
||||||
|
def draft(pdf, r, boxes):
|
||||||
|
"""Remove images on the given pages, replacing
|
||||||
|
them with crossed boxes if 'boxes' is True."""
|
||||||
|
|
||||||
|
def removeAllText(pdf, r):
|
||||||
|
"""Remove all text from the given pages in a document."""
|
||||||
|
|
||||||
|
def blackText(pdf, r):
|
||||||
|
"""Blacken all text on the given pages."""
|
||||||
|
|
||||||
|
def blackLines(pdf, r):
|
||||||
|
"""Blacken all lines on the given pages."""
|
||||||
|
|
||||||
|
def blackFills(pdf, r):
|
||||||
|
"""Blacken all fills on the given pages."""
|
||||||
|
|
||||||
|
def thinLines(pdf, r, linewidth):
|
||||||
|
"""Thicken every line less than
|
||||||
|
linewidth to linewidth. Thickness given in points."""
|
||||||
|
|
||||||
|
def copyId(pdf, pdf2):
|
||||||
|
"""Copy the /ID from one pdf to pdf2."""
|
||||||
|
|
||||||
|
def removeId(pdf):
|
||||||
|
"""Remove a document's /ID"""
|
||||||
|
|
||||||
|
def setVersion(pdf, version):
|
||||||
|
"""Set the minor version number of a document."""
|
||||||
|
|
||||||
|
def setFullVersion(pdf, major, minor):
|
||||||
|
"""Set the major and minor version number of
|
||||||
|
a document."""
|
||||||
|
|
||||||
|
def removeDictEntry(pdf, key):
|
||||||
|
"""Remove any dictionary entry with the given
|
||||||
|
key anywhere in the document."""
|
||||||
|
|
||||||
|
def removeClipping(pdf, r):
|
||||||
|
"""Remove all clipping from pages in the given range"""
|
Loading…
Reference in New Issue