This commit is contained in:
John Whitington 2021-09-27 19:18:25 +01:00
parent c427f79d8b
commit 25eb56fbdf
19 changed files with 944 additions and 0 deletions

View File

@ -0,0 +1,15 @@
"""Pycpdflib: a python interface to cpdf.
Before using the library, you must load the libpycpdf and libcpdf DLLs. This is
achieved with the pycpdflib.loadDLL function, given the filename or full path
of the libpycpdf DLL. On Windows, you may have to call os.add_dll_directory
first. On MacOS, you may need to give the full path, and you may need to
install libcpdf.so in a standard location /usr/local/lib/, or use the
install_name_tool command to tell libpycpdf.so where to find libcpdf.so.
A 'range' is a list of integers specifying page numbers.
Text arguments and results are in UTF8.
Any function may raise the exception CPDFError, carrying a string describing
the error. """

View File

@ -0,0 +1,129 @@
Loading the libpypcdf and libcpdf DLLs
--------------------------------------
Before using the library, you must load the ``libpycpdf`` and ``libcpdf`` DLLs.
This is achieved with the ``pycpdflib.loadDLL`` function, given the filename or
full path of the ``libpycpdf`` DLL.
On Windows, you may have to call ``os.add_dll_directory`` first. On MacOS, you
may need to give the full path, and you may need to install ``libcpdf.so`` in a
standard location ``/usr/local/lib/``, or use the ``install_name_tool`` command
to tell ``libpycpdf.so`` where to find ``libcpdf.so``.
Conventions
-----------
Any function may raise the exception ``CPDFError``, carrying a string describing
the error.
A 'range' is a list of integers specifying page numbers. Page numbers start at
1. Range arguments are called `r`.
Text arguments and results are in UTF8.
Units are in PDF points (1/72 inch).
Angles are in degrees.
Built-in values
---------------
Paper sizes:
a0portrait a1portrait a2portrait a3portrait a4portrait a5portrait a0landscape
a1landscape a2landscape a3landscape a4landscape a5landscape usletterportrait
usletterlandscape uslegalportrait uslegallandscape
Permissions:
noEdit noPrint noCopy noAnnot noForms noExtract noAssemble noHqPrint
Encryption methods:
pdf40bit pdf128bit aes128bitfalse aes128bittrue aes256bitfalse aes256bittrue
aes256bitisofalse aes256bitisotrue
Positions:
Positions with two numbers in a tuple e.g (posLeft, 10.0, 20.0)
posCentre posLeft posRight
Positions with one number in a tuple e.g (top, 5.0)
top topLeft topRight left bottomLeft bottomRight right
Positions with no numbers e.g diagonal
diagonal reverseDiagonal
Fonts:
timesRoman timesBold timesItalic timesBoldItalic helvetica helveticaBold
helveticaOblique helveticaBoldOblique courier courierBold courierOblique
courierBoldOblique
Justification:
leftJustify centreJustify rightJustify
Page layouts:
singlePage oneColumn twoColumnLeft twoColumnRight twoPageLeft twoPageRight
Page modes:
useNone useOutlines useThumbs useOC useAttachments
Page label styles:
decimalArabic uppercaseRoman lowercaseRoman uppercaseLetters lowercaseLetters
# CHAPTER 0. Preliminaries
class Pdf:
"""The type of PDF documents."""
def loadDLL(f):
"""Load the libpycpdf DLL from a given file, and set up pycpdflib. Must be
called prior to using any other function in the library."""
class CPDFError(Exception):
"""Any function may raise an exception CPDFError, carrying a string
describing what went wrong."""
def lastError():
"""Return the last error. Not usually used directly, since pycpdflib
functions raise exceptions."""
def lastErrorString():
"""Return the last error string. Not usually used directly, since pycpdflib
functions raise exceptions."""
def checkerror():
"""Raise an exception if the last function call resulted in an error. Not
used directly, since pycpdflib functions will raise the exception
directly."""
def version():
"""Return the version number of the pycpdflib library."""
def setFast():
""" Set fast mode. Some operations have a fast mode. The default is 'slow'
mode, which works even on old-fashioned files. For more details, see
section 1.13 of the CPDF manual. This function sets the mode globally. """
def setSlow():
""" Set slow mode. Some operations have a fast mode. The default is 'slow'
mode, which works even on old-fashioned files. For more details, see
section 1.13 of the CPDF manual. This function sets the mode globally. """
def clearError():
""" Clear the current error state. """
def onExit():
""" A debug function which prints some information about
resource usage. This can be used to detect if PDFs or ranges are being
deallocated properly."""

View File

@ -0,0 +1,174 @@
# CHAPTER 1. Basics
def fromFile(filename, userpw):
""" Load a PDF file from a given file.
Supply a user password (possibly blank) in case the file is encypted. It
won't be decrypted, but sometimes the password is needed just to load the
file."""
def fromFileLazy(filename, userpw):
""" Loads a PDF from a file, doing only
minimal parsing. The objects will be read and parsed when they are actually
needed. Use this when the whole file won't be required. Also supply a user
password (possibly blank) in case the file is encypted. It won't be
decrypted, but sometimes the password is needed just to load the file."""
def fromMemory(data, userpw):
""" Load a file from a byte array and the user password (blank if none)."""
def fromMemoryLazy(data, userpw):
""" Load a file from from a byte array and the user password (blank if
none), but lazily like fromFileLazy."""
def blankDocument(w, h, pages):
""" Create a blank document
with pages of the given width (in points), height (in points), and number
of pages."""
def blankDocumentPaper(papersize, pages):
"""Create a blank document with pages of the given paper size, and number
of pages. """
def ptOfCm(i):
"""Convert a figure in centimetres to points (72 points to 1 inch)."""
def ptOfMm(i):
"""Convert a figure in millimetres to points (72 points to 1 inch)."""
def ptOfIn(i):
"""Convert a figure in inches to points (72 points to 1 inch)."""
def cmOfPt(i):
"""Convert a figure in points to centimetres (72 points to 1 inch)."""
def mmOfPt(i):
"""Convert a figure in points to millimetres (72 points to 1 inch)."""
def inOfPt(i):
"""Convert a figure in points to inches (72 points to 1 inch)."""
def parsePagespec(pdf, pagespec):
"""Parse a page specification such as "1-3,8-end" to a range with reference to
a given PDF (the PDF is supplied so that page ranges which reference pages
which do not exist are rejected)."""
def validatePagespec(pagespec):
"""Validate a page specification, returning True or False, so far as is
possible in the absence of the actual document."""
def stringOfPagespec(pdf, r):
"""Build a page specification from a page
range. For example, the range containing 1,2,3,6,7,8 in a document of 8
pages might yield "1-3,6-end" """
def blankRange():
"""Create a range with no pages in."""
def pageRange(f, t):
""" Nuild a range from one page to another inclusive.
For example, pageRange(3,7) gives the range 3,4,5,6,7. """
def all(pdf):
"""The range containing all the pages in a given document."""
def even(r):
"""A range which contains just the even pages of another
range."""
def odd(r):
"""A range which contains just the odd pages of another
range."""
def rangeUnion(a, b):
"""The union of two ranges giving a range containing
the pages in range a and range b."""
def difference(a, b):
"""The difference of two ranges, giving a range
containing all the pages in a except for those which are also in b."""
def removeDuplicates(r):
"""Deduplicates a range, returning a new one."""
def rangeLength(r):
"""The number of pages in a range."""
def rangeGet(r, n):
"""Get the page number at position n in a range, where
def rangeAdd(r, p):
"""Add the page to a range, if it is not already
there."""
def isInRange(r, p):
"""Returns True if the page p is in the range r, False otherwise."""
def pages(pdf):
"""Return the number of pages in a PDF."""
r = libc.pycpdf_pages(pdf.pdf)
checkerror()
return r
def pagesFast(userpw, filename):
"""Return the number of pages in a given
PDF, with given user password. It tries to do this as fast as
possible, without loading the whole file."""
def toFile(pdf, filename, linearize, make_id):
"""Write the file to a given filename. If linearize is True, it will be
linearized, if supported by libcpdf. If make_id is True, it will be given a
new ID."""
def toFileExt(pdf, filename, linearize, make_id, preserve_objstm,
generate_objstm, compress_objstm):
"""Write the file to a given filename. If linearize is True, it will be
linearized, if supported by libcpdf. If make_id is True, it will be given a
new ID. If preserve_objstm is True, existing object streams will be
preserved. If generate_objstm is True, object streams will be generated
even if not originally present. If compress_objstm is True, object streams
will be compressed (what we usually want). WARNING: the pdf argument will
be invalid after this call and should not be used again."""
def toMemory(pdf, linearize, make_id):
"""Write a file to memory, returning the buffer as a byte array of type
bytes."""
def isEncrypted(pdf):
"""Returns True if a documented is encrypted, False otherwise."""
r = libc.pycpdf_isEncrypted(pdf.pdf)
checkerror()
return r
def toFileEncrypted(pdf, method, permissions, ownerpw, userpw, linearize,
makeid, filename):
"""Write the file to a given filename encrypted with the given encryption
method, permissions list, and owener and user passwords. If linearize is
True, it will be linearized, if supported by libcpdf. If make_id is True,
it will be given a new ID."""
def toFileEncryptedExt(pdf, method, permissions, ownerpw, userpw, linearize,
makeid, preserve_objstm, generate_objstm,
compress_objstm, filename):
"""Write the file to a given filename encrypted with the given encryption
method, permissions list, and owener and user passwords. If linearize is
True, it will be linearized, if supported by libcpdf. If make_id is True,
it will be given a new ID. If preserve_objstm is True, existing object
streams will be preserved. If generate_objstm is True, object streams will
be generated even if not originally present. If compress_objstm is True,
object streams will be compressed (what we usually want). WARNING: the pdf
argument will be invalid after this call and should not be used again."""
def decryptPdf(pdf, userpw):
"""Attempts to decrypt a PDF using the given user password. An exception is
raised in the event of a bad password."""
def decryptPdfOwner(pdf, ownerpw):
"""Attempts to decrypt a PDF using the given owner password. An exception
is raised in the event of a bad password."""
def hasPermission(pdf, perm):
"""Returns True if the given permission (restriction) is present."""
def encryptionKind(pdf):
"""Return the encryption method currently in use on a document."""

View File

@ -0,0 +1,20 @@
# CHAPTER 2. Merging and Splitting
def mergeSimple(pdfs):
"""Given a list of PDFs, merges the documents into a new PDF, which is
returned."""
def merge(pdfs, retain_numbering, remove_duplicate_fonts):
"""Merges the list of PDFs. If retain_numbering is True page labels are not
rewritten. If remove_duplicate_fonts is True, duplicate fonts are merged.
This is useful when the source documents for merging originate from the
same source."""
def mergeSame(pdfs, retain_numbering, remove_duplicate_fonts, ranges):
"""The same as merge, except that it has an additional argument
- a list of page ranges. This is used to select the pages to pick from
each PDF. This avoids duplication of information when multiple discrete
parts of a single source PDF are included."""
def selectPages(pdf, r):
""" Returns a new document which just those pages in the page range."""

View File

@ -0,0 +1,70 @@
# CHAPTER 3. Pages
def scalePages(pdf, r, sx, sy):
"""Scale the page dimensions and content of the given range of pages by
the given scale (sx, sy), about (0, 0). Other boxes (crop etc. are altered
as appropriate)."""
def scaleToFit(pdf, r, w, h, scale_to_fit_scale):
"""Scales the pages in the range to fit new page dimensions (w and h)
multiplied by scale_to_fit_scale (typically 1.0). Other boxes (crop etc.)
are altered as appropriate."""
def scaleToFitPaper(pdf, r, papersize, scale_to_fit_scale):
"""Scales the given pages to fit the given page size, possibly multiplied
by scale_to_fit_scale (typically 1.0)"""
def scaleContents(pdf, r, pos, scale):
"""Scales the contents of the pages in the range about the point given by
the position, by the scale given."""
def shiftContents(pdf, r, dx, dy):
"""Shift the content of the pages in the range by (dx, dy)."""
def rotate(pdf, r, rotation):
"""Change the viewing rotation of the pages in the range to an
absolute value. Appropriate rotations are 0, 90, 180, 270."""
def rotateBy(pdf, r, rotation):
"""Change the viewing rotation of the pages in the range by a
given number of degrees. Appropriate values are 90, 180, 270."""
def rotateContents(pdf, r, rotation):
"""Rotate the content about the centre
of the page by the given number of degrees, in a clockwise direction."""
def upright(pdf, r):
"""Change the viewing rotation of the pages in the range, counter-rotating
the dimensions and content such that there is no visual change."""
def hFlip(pdf, r):
"""Flip horizontally the pages in the range."""
def vFlip(pdf, r):
"""Flip vertically the pages in the range."""
def crop(pdf, r, x, y, w, h):
"""Crop a page to the box defined by (x, y, w, h), replacing any existing
crop box."""
def removeCrop(pdf, r):
"""Remove any crop box from pages in the range."""
def removeTrim(pdf, r):
"""Remove any trim box from pages in the range."""
def removeArt(pdf, r):
"""Remove any art box from pages in the range."""
def removeBleed(pdf, r):
"""Remove any bleed box from pages in the range."""
def trimMarks(pdf, r):
"""Add trim marks to the given pages, if the trimbox exists."""
def showBoxes(pdf, r):
"""Show the boxes on the given pages, for debug."""
def hardBox(pdf, r, boxname):
"""Make a given box a 'hard box' i.e clip it explicitly. Boxname could be,
for example "/TrimBox"."""

View File

@ -0,0 +1,3 @@
# CHAPTER 4. Encryption
# Encryption covered under Chapter 1 in pycpdflib

View File

@ -0,0 +1,13 @@
# CHAPTER 5. Compression
def compress(pdf):
"""Compress any uncompressed streams in the given PDF using the Flate
algorithm."""
def decompress(pdf):
"""Decompress any streams in the given PDF, so long as the compression
method is supported."""
def squeezeInMemory(pdf):
"""squeezeToMemory(pdf) squeezes a pdf in memory. Squeezing is a lossless
compression method which works be rearrangement of a PDFs internal

View File

@ -0,0 +1,9 @@
# CHAPTER 6. Bookmarks
def getBookmarks(pdf):
"""Get the bookmarks for a PDF as a list of tuples of the form:
(level : int, page : int, text : string, openstatus : bool)"""
def setBookmarks(pdf, marks):
"""Set the bookmarks for a PDF as a list of tuples of the form:
(level : int, page : int, text : string, openstatus : bool)"""

View File

@ -0,0 +1,3 @@
# CHAPTER 7. Presentations
# Not included in the library version

View File

@ -0,0 +1,110 @@
# CHAPTER 8. Logos, Watermarks and Stamps
def stampOn(pdf, pdf2, r):
"""Stamps pdf on top of all the pages in pdf2 which are in the range. The
stamp is placed with its origin at the origin of the target document."""
def stampUnder(pdf, pdf2, r):
"""Stamps pdf under under all the pages in pdf2 which are in the range. The
stamp is placed with its origin at the origin of the target document."""
def stampExtended(pdf, pdf2, r, isover, scale_stamp_to_fit, pos,
relative_to_cropbox):
"""A stamping function with extra features:
- isover True, pdf goes over pdf2, isover False, pdf goes under pdf2
- scale_stamp_to_fit scales the stamp to fit the page
- pos gives the position to put the stamp
- relative_to_cropbox: if True, pos is relative to crop box not media box"""
def combinePages(pdf, pdf2):
"""Combines the PDFs page-by-page, putting each page of pdf2 over each page
of pdf."""
def addText(metrics, pdf, r, text, p, line_spacing, bates, font, size, red,
green, blue, underneath, relative_to_cropbox, outline, opacity,
justification, midline, topline, filename, line_width,
embed_fonts):
"""Adding text. Adds text to a PDF, if the characters exist in the font.
* metrics: If True, don't actually add text but collect metrics.
* pdf: Document
* r: Page Range
* text: The text to add
* p: Position to add text at
* line_spacing: Linespacing, 1.0 = normal
* bates: Starting Bates number
* font: Font
* size: Font size in points
* red: Red component of colour, 0.0 - 1.0
* green: Green component of colour, 0.0 - 1.0
* blue: Blue component of colour, 0.0 - 1.0
* underneath: If True, text is added underneath rather than on top
* relative_to_cropbox: If True, position is relative to crop box not
media box
* outline: If True, text is outline rather than filled
* opacity: Opacity, 1.0 = opaque, 0.0 = wholly transparent
* justification: Justification
* midline: If True, position is relative to midline of text, not
baseline
* topline: If True, position is relative to topline of text, not
baseline
* filename: filename that this document was read from (optional)
* line_width: line width
* embed_fonts: embed fonts
Special codes
* %Page Page number in arabic notation (1, 2, 3...)
* %roman Page number in lower-case roman notation (i, ii, iii...)
* %Roman Page number in upper-case roman notation (I, II, III...)
* %EndPage Last page of document in arabic notation
* %Label The page label of the page
* %EndLabel The page label of the last page
* %filename The full file name of the input document
* %a Abbreviated weekday name (Sun, Mon etc.)
* %A Full weekday name (Sunday, Monday etc.)
* %b Abbreviated month name (Jan, Feb etc.)
* %B Full month name (January, February etc.)
* %d Day of the month (01-31)
* %e Day of the month (1-31)
* %H Hour in 24-hour clock (00-23)
* %I Hour in 12-hour clock (01-12)
* %j Day of the year (001-366)
* %m Month of the year (01-12)
* %M Minute of the hour (00-59)
* %p "a.m" or "p.m"
* %S Second of the minute (00-61)
* %T Same as %H:%M:%S
* %u Weekday (1-7, 1 = Monday)
* %w Weekday (0-6, 0 = Monday)
* %Y Year (0000-9999)
* %% The % character"""
def addTextSimple(pdf, r, text, p, font, size):
"""like addText, but with most parameters default
* pdf = the document
* r = the range
* text = the text
* p = the position
* font = the font
* size = the font size"""
def removeText(pdf, r):
"""Remove any text added by libcpdf from the given pages."""
r = range_of_list(r)
def textWidth(font, string):
"""Return the width of a given string in the given font in thousandths of a
point."""
def addContent(content, before, pdf, r):
"""Add page content before (if True) or after (if False) the existing
content to pages in the given range in the given PDF. Warning: this a low
level function requiring understanding of the PDF format."""
def stampAsXObject(pdf, r, stamp_pdf):
"""Stamps stamp_pdf onto the pages in the given range in pdf as a shared
Form XObject. The name of the newly-created XObject is returned, for use
with addContent. """

View File

@ -0,0 +1,26 @@
# CHAPTER 9. Mulitpage facilities
def twoUp(pdf):
"""Impose a document two up by retaining the existing page
size, scaling pages down."""
def twoUpStack(pdf):
"""Impose a document two up by doubling the page size,
to fit two pages on one."""
def padBefore(pdf, r):
"""Adds a blank page before each page in the given range."""
def padAfter(pdf, r):
"""Adds a blank page after each page in the given range."""
def padEvery(pdf, n):
"""Adds a blank page after every n pages."""
def padMultiple(pdf, n):
"""Adds pages at the end to pad the file to a multiple of n pages in
length."""
def padMultipleBefore(pdf, n):
"""Adds pages at the beginning to pad the file to a
multiple of n pages in length."""

View File

@ -0,0 +1,3 @@
# CHAPTER 10. Annotations
# Not in the library version.

View File

@ -0,0 +1,265 @@
# CHAPTER 11. Document Information and Metadata
def isLinearized(filename):
"""Finds out if a document is linearized as quickly
as possible without loading it."""
def getVersion(pdf):
"""Return the minor version number of a document."""
def getMajorVersion(pdf):
"""Return the minor version number of a document."""
def getTitle(pdf):
"""Return the title of a document."""
def getAuthor(pdf):
"""Return the subject of a document."""
def getSubject(pdf):
"""Return the subject of a document."""
def getKeywords(pdf):
"""Return the keywords of a document."""
def getCreator(pdf):
"""Return the creator of a document."""
def getProducer(pdf):
"""Return the producer of a document."""
def getCreationDate(pdf):
"""Return the creation date of a document."""
def getModificationDate(pdf):
"""Return the modification date of a document."""
def getTitleXMP(pdf):
"""Return the XMP title of a document."""
def getAuthorXMP(pdf):
"""Return the XMP author of a document."""
def getSubjectXMP(pdf):
"""Return the XMP subject of a document."""
def getKeywordsXMP(pdf):
"""Return the XMP keywords of a document."""
def getCreatorXMP(pdf):
"""Returs the XMP creator of a document."""
def getProducerXMP(pdf):
"""Return the XMP producer of a document."""
def getCreationDateXMP(pdf):
"""Return the XMP creation date of a document."""
def getModificationDateXMP(pdf):
"""Return the XMP modification date of a document."""
def setTitle(pdf, s):
"""Set the title of a document."""
def setAuthor(pdf, s):
"""Set the author of a document."""
def setSubject(pdf, s):
"""Set the subject of a document."""
def setKeywords(pdf, s):
"""Set the keywords of a document."""
def setCreator(pdf, s):
"""Set the creator of a document."""
def setProducer(pdf, s):
"""Set the producer of a document."""
def setCreationDate(pdf, s):
"""Set the creation date of a document."""
def setModificationDate(pdf, s):
"""Set the modifcation date of a document."""
def setTitleXMP(pdf, s):
"""Set the XMP title of a document."""
def setAuthorXMP(pdf, s):
"""Set the XMP author of a document."""
def setSubjectXMP(pdf, s):
"""Set the XMP subject of a document."""
def setKeywordsXMP(pdf, s):
"""Set the XMP keywords of a document."""
def setCreatorXMP(pdf, s):
"""Set the XMP creator of a document."""
def setProducerXMP(pdf, s):
"""Set the XMP producer of a document."""
def setCreationDateXMP(pdf, s):
"""Set the XMP creation date of a document."""
def setModificationDateXMP(pdf, s):
"""Set the XMP modification date of a document."""
def getDateComponents(string):
"""Return the components (year, month, day, hour, minute, second,
hour_offset, minute_offset) from a PDF date string.
Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds
(0-59), hour_offset is the offset from UT in hours (-23 to 23);
minute_offset is the offset from UT in minutes (-59 to 59)."""
def dateStringOfComponents(cs):
"""Build a PDF date string a (year, month, day, hour, minute, second,
hour_offset, minute_offset) tuple.
Dates: Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds
(0-59), hour_offset is the offset from UT in hours (-23 to 23);
minute_offset is the offset from UT in minutes (-59 to 59)."""
def getPageRotation(pdf, pagenumber):
"""Get the viewing rotation for a given page."""
def hasBox(pdf, pagenumber, boxname):
"""Returns True, if the page has the given box. E.g "/CropBox" """
def getMediaBox(pdf, pagenumber):
"""Get a mediabox box given the document, page range, min x, max x,
min y, max y in points. Only suceeds if such a box exists, as checked by
hasBox"""
def getCropBox(pdf, pagenumber):
"""Get a crop box given the document, page range, min x, max x,
min y, max y in points. Only suceeds if such a box exists, as checked by
hasBox"""
def getTrimBox(pdf, pagenumber):
"""Get a trim box given the document, page range, min x, max x,
min y, max y in points. Only suceeds if such a box exists, as checked by
hasBox"""
def getArtBox(pdf, pagenumber):
"""Get an art box given the document, page range, min x, max x,
min y, max y in points. Only suceeds if such a box exists, as checked by
hasBox"""
def getBleedBox(pdf, pagenumber):
"""Get a bleed box given the document, page range, min x, max x,
min y, max y in points. Only suceeds if such a box exists, as checked by
hasBox"""
def setMediaBox(pdf, r, minx, maxx, miny, maxy):
"""Set the media box given the document, page range, min x, max x,
min y, max y in points."""
def setCropBox(pdf, r, minx, maxx, miny, maxy):
"""Set the crop box given the document, page range, min x, max x,
min y, max y in points."""
def setTrimBox(pdf, r, minx, maxx, miny, maxy):
"""Set the trim box given the document, page range, min x, max x,
min y, max y in points."""
def setArtBox(pdf, r, minx, maxx, miny, maxy):
"""Set the art box given the document, page range, min x, max x,
min y, max y in points."""
def setBleedBox(pdf, r, minx, maxx, miny, maxy):
"""Set the bleed box given the document, page range, min x, max x,
min y, max y in points."""
def markTrapped(pdf):
"""Mark a document as trapped."""
def markUntrapped(pdf):
"""Mark a document as untrapped."""
def markTrappedXMP(pdf):
"""Mark a document as trapped in XMP metadata."""
def markUntrappedXMP(pdf):
"""Mark a document as untrapped in XMP metadata."""
def setPageLayout(pdf, layout):
"""Set the page layout for a document."""
def setPageMode(pdf, mode):
"""Set the page mode for a document."""
def hideToolbar(pdf, flag):
"""Sets the hide toolbar flag."""
def hideMenubar(pdf, flag):
"""Set the hide menu bar flag."""
def hideWindowUi(pdf, flag):
"""Set the hide window UI flag."""
def fitWindow(pdf, flag):
"""Set the fit window flag."""
def centerWindow(pdf, flag):
"""Set the center window flag."""
def displayDocTitle(pdf, flag):
"""Set the display document title flag."""
def openAtPage(pdf, fitflag, pagenumber):
"""Set the PDF to open, possibly with zoom-to-fit, at the given page
number. """
def setMetadataFromFile(pdf, filename):
"""Set the XMP metadata of a document, given a file name."""
def setMetadataFromByteArray(pdf, data):
"""Set the XMP metadata from an array of bytes."""
def getMetadata(pdf):
"""Return the XMP metadata as a byte array of type bytes"""
def removeMetadata(pdf):
"""Remove the XMP metadata from a document"""
def createMetadata(pdf):
"""Builds fresh XMP metadata as good as possible from existing
metadata in the document."""
def setMetadataDate(pdf, date):
"""Set the metadata date for a PDF. The date is given in PDF date format --
cpdf will convert it to XMP format. The date 'now' means now."""
def getPageLabels(pdf):
"""Get page labels as a list of tuples (style, prefix, offset, startvalue)
For example, a document might have five pages of introduction with roman
numerals, followed by the rest of the pages in decimal arabic, numbered
from one. First label:
* labelstyle = LowercaseRoman
* labelprefix = ""
* startpage = 1
* startvalue = 1
Second label:
* labelstyle = DecimalArabic
* labelprefix = ""
* startpage = 6
* startvalue = 1 """
def addPageLabels(pdf, label, progress):
"""Add one group of page labels from a tuple (style, prefix, offset, range).
The prefix is prefix text for each label. The range is the page range the
labels apply to. Offset can be used to shift the numbering up or down."""
def removePageLabels(pdf):
"""Removes all page labels from the document."""
def getPageLabelStringForPage(pdf, pagenumber):
"""Calculate the full label string for a given page, and return it."""

View File

@ -0,0 +1,23 @@
# CHAPTER 12. File Attachments
def attachFile(filename, pdf):
"""Attach a file to the pdf. It is attached at document level."""
def attachFileToPage(filename, pdf, pagenumber):
"""Attach a file, given its file name, pdf, and the page number to which
it should be attached."""
def attachFileFromMemory(data, filename, pdf):
"""Attach a file from a byte array. It is attached at document level."""
def attachFileToPageFromMemory(data, filename, pdf, pagenumber):
"""Attach a file to a given pag from a byte array. It is attached at
document level."""
def removeAttachedFiles(pdf):
"""Remove all page- and document-level attachments from a document."""
def getAttachments(pdf):
"""List information about attachements. Returns a list of tuples
(name, page number, byte array of data). Page 0 = document-level
attachment."""

View File

@ -0,0 +1,6 @@
# CHAPTER 13. Images
def getImageResolution(pdf, min_required_resolution):
"""Return a list of all uses of images in the PDF which do not meet the
minimum required resolution in dpi as tuples of:
(pagenumber, name, x pixels, y pixels, x resolution, y resolution)"""

View File

@ -0,0 +1,12 @@
# CHAPTER 14. Fonts
def getFontInfo(pdf):
"""Get a list of (pagenumber, fontname, fonttype, fontencoding) tuples,
showing each font used on each page."""
def removeFonts(pdf):
"""Remove all font data from a file."""
def copyFont(pdf, pdf2, r, pagenumber, fontname):
"""Copy the given font from the given page in the pdf PDF to every page in
the pdf2 PDF. The new font is stored under its font name."""

View File

@ -0,0 +1,6 @@
# CHAPTER 15. PDF and JSON
def outputJSON(filename, parse_content, no_stream_data, pdf):
"""Output a PDF in JSON format to the given filename. If parse_content is
True, page content is parsed. If no_stream_data is True, all stream data is
suppressed entirely."""

View File

@ -0,0 +1,16 @@
# CHAPTER 16. Optional Content Groups
def getOCGList(pdf):
"""Return a list of Optional Content Groups in the given pdf as strings."""
def OCGRename(pdf, n_from, n_to):
"""Rename an optional content group."""
def OCGOrderAll(pdf):
"""Ensure that every optional content group appears in the OCG order list."""
def OCGCoalesce(pdf):
"""Coalesce optional content groups. For example, if we merge or stamp two
files both with an OCG called "Layer 1", we will have two different
optional content groups. This function will merge the two into a single
optional content group."""

View File

@ -0,0 +1,41 @@
# CHAPTER 17. Miscellaneous
def draft(pdf, r, boxes):
"""Remove images on the given pages, replacing
them with crossed boxes if 'boxes' is True."""
def removeAllText(pdf, r):
"""Remove all text from the given pages in a document."""
def blackText(pdf, r):
"""Blacken all text on the given pages."""
def blackLines(pdf, r):
"""Blacken all lines on the given pages."""
def blackFills(pdf, r):
"""Blacken all fills on the given pages."""
def thinLines(pdf, r, linewidth):
"""Thicken every line less than
linewidth to linewidth. Thickness given in points."""
def copyId(pdf, pdf2):
"""Copy the /ID from one pdf to pdf2."""
def removeId(pdf):
"""Remove a document's /ID"""
def setVersion(pdf, version):
"""Set the minor version number of a document."""
def setFullVersion(pdf, major, minor):
"""Set the major and minor version number of
a document."""
def removeDictEntry(pdf, key):
"""Remove any dictionary entry with the given
key anywhere in the document."""
def removeClipping(pdf, r):
"""Remove all clipping from pages in the given range"""