2021-08-10 15:41:18 +02:00
|
|
|
# CHAPTER 11. Document Information and Metadata
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def isLinearized(filename):
|
|
|
|
"""isLinearized(filename) finds out if a document is linearized as quickly
|
|
|
|
as possible without loading it."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getVersion(pdf):
|
|
|
|
"""vetVersion(pdf) returns the minor version number of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getMajorVersion(pdf):
|
|
|
|
"""getMajorVersion(pdf) returns the minor version number of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getTitle(pdf):
|
|
|
|
"""getTitle(pdf) returns the title of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getAuthor(pdf):
|
|
|
|
"""getSubject(pdf) returns the subject of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getSubject(pdf):
|
|
|
|
"""getSubject(pdf) returns the subject of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getKeywords(pdf):
|
|
|
|
"""getKeywords(pdf) returns the keywords of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getCreator(pdf):
|
|
|
|
"""getCreator(pdf) returns the creator of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getProducer(pdf):
|
|
|
|
"""getProducer(pdf) returns the producer of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getCreationDate(pdf):
|
|
|
|
"""getCreationDate(pdf) returns the creation date of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getModificationDate(pdf):
|
|
|
|
"""getModificationDate(pdf) returns the modification date of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getTitleXMP(pdf):
|
|
|
|
"""getTitleXMP(pdf) returns the XMP title of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getAuthorXMP(pdf):
|
|
|
|
"""getAuthorXMP(pdf) returns the XMP author of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getSubjectXMP(pdf):
|
|
|
|
"""getSubjectXMP(pdf) returns the XMP subject of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getKeywordsXMP(pdf):
|
|
|
|
"""getKeywordsXMP(pdf) returns the XMP keywords of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getCreatorXMP(pdf):
|
|
|
|
"""getCreatorXMP(pdf) returns the XMP creator of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getProducerXMP(pdf):
|
|
|
|
"""getProducerXMP(pdf) returns the XMP producer of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getCreationDateXMP(pdf):
|
|
|
|
"""getCreationDateXMP(pdf) returns the XMP creation date of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getModificationDateXMP(pdf):
|
|
|
|
"""getModificationDateXMP(pdf) returns the XMP modification date of a
|
|
|
|
document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setTitle(pdf, s):
|
|
|
|
"""setTitle(pdf) sets the title of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setAuthor(pdf, s):
|
|
|
|
"""setAuthor(pdf) sets the author of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setSubject(pdf, s):
|
|
|
|
"""setSubject(pdf) sets the subject of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setKeywords(pdf, s):
|
|
|
|
"""setKeywords(pdf) sets the keywords of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setCreator(pdf, s):
|
|
|
|
"""setCreator(pdf) sets the creator of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setProducer(pdf, s):
|
|
|
|
"""setProducer(pdf) sets the producer of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setCreationDate(pdf, s):
|
|
|
|
"""setCreationDate(pdf) sets the creation date of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setModificationDate(pdf, s):
|
|
|
|
"""setModificationDate(pdf) sets the modifcation date of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setTitleXMP(pdf, s):
|
|
|
|
"""setTitleXMP(pdf) set the XMP title of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setAuthorXMP(pdf, s):
|
|
|
|
"""setAuthorXMP(pdf) set the XMP author of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setSubjectXMP(pdf, s):
|
|
|
|
"""setSubjectXMP(pdf) set the XMP subject of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setKeywordsXMP(pdf, s):
|
|
|
|
"""setKeywordsXMP(pdf) set the XMP keywords of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setCreatorXMP(pdf, s):
|
|
|
|
"""setCreatorXMP(pdf) set the XMP creator of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setProducerXMP(pdf, s):
|
|
|
|
"""setProducerXMP(pdf) set the XMP producer of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setCreationDateXMP(pdf, s):
|
|
|
|
"""setCreationDateXMP(pdf) set the XMP creation date of a document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def setModificationDateXMP(pdf, s):
|
|
|
|
"""setModificationDateXMP(pdf) set the XMP modification date of a
|
|
|
|
document."""
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
def getDateComponents(string):
|
|
|
|
"""Dates: Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds
|
|
|
|
(0-59), hour_offset is the offset from UT in hours (-23 to 23);
|
|
|
|
minute_offset is the offset from UT in minutes (-59 to 59).
|
2021-07-26 20:50:33 +02:00
|
|
|
|
2021-08-10 15:41:18 +02:00
|
|
|
getDateComponents(datestring, year, month, day, hour, minute, second,
|
|
|
|
hour_offset, minute_offset) returns the components from a PDF date
|
|
|
|
string."""
|
|
|
|
|
|
|
|
def dateStringOfComponents(cs):
|
|
|
|
"""Dates: Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds
|
|
|
|
(0-59), hour_offset is the offset from UT in hours (-23 to 23);
|
|
|
|
minute_offset is the offset from UT in minutes (-59 to 59).
|
|
|
|
|
|
|
|
dateStringOfComponents(year, month, day, hour, minute, second,
|
|
|
|
hour_offset, minute_offset) builds a PDF date string from individual
|
|
|
|
components."""
|
|
|
|
|
|
|
|
def getPageRotation(pdf, pagenumber):
|
|
|
|
"""getPageRotation(pdf, pagenumber) gets the viewing rotation for a given
|
|
|
|
page."""
|
|
|
|
|
|
|
|
def hasBox(pdf, pagenumber, boxname):
|
|
|
|
"""hasBox(pdf, pagenumber, boxname) returns True, if that page has the
|
|
|
|
given box. E.g "/CropBox" """
|
|
|
|
|
|
|
|
def getMediaBox(pdf, pagenumber):
|
|
|
|
"""These functions get a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
|
|
|
hasBox"""
|
|
|
|
|
|
|
|
def getCropBox(pdf, pagenumber):
|
|
|
|
"""These functions get a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
|
|
|
hasBox"""
|
|
|
|
|
|
|
|
def getTrimBox(pdf, pagenumber):
|
|
|
|
"""These functions get a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
|
|
|
hasBox"""
|
|
|
|
|
|
|
|
def getArtBox(pdf, pagenumber):
|
|
|
|
"""These functions get a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
|
|
|
hasBox"""
|
|
|
|
|
|
|
|
def getBleedBox(pdf, pagenumber):
|
|
|
|
"""These functions get a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points. Only suceeds if such a box exists, as checked by
|
|
|
|
hasBox"""
|
|
|
|
|
|
|
|
def setMediaBox(pdf, r, minx, maxx, miny, maxy):
|
|
|
|
"""These functions set a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points."""
|
|
|
|
|
|
|
|
def setCropBox(pdf, r, minx, maxx, miny, maxy):
|
|
|
|
"""These functions set a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points."""
|
|
|
|
|
|
|
|
def setTrimBox(pdf, r, minx, maxx, miny, maxy):
|
|
|
|
"""These functions set a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points."""
|
|
|
|
|
|
|
|
def setArtBox(pdf, r, minx, maxx, miny, maxy):
|
|
|
|
"""These functions set a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points."""
|
|
|
|
|
|
|
|
def setBleedBox(pdf, r, minx, maxx, miny, maxy):
|
|
|
|
"""These functions set a box given the document, page range, min x, max x,
|
|
|
|
min y, max y in points."""
|
|
|
|
|
|
|
|
def markTrapped(pdf):
|
|
|
|
"""markTrapped(pdf) marks a document as trapped."""
|
|
|
|
|
|
|
|
def markUntrapped(pdf):
|
|
|
|
"""markUntrapped(pdf) marks a document as untrapped."""
|
|
|
|
|
|
|
|
def markTrappedXMP(pdf):
|
|
|
|
"""markTrappedXMP(pdf) marks a document as trapped in XMP metadata."""
|
|
|
|
|
|
|
|
def markUntrappedXMP(pdf):
|
|
|
|
"""markUntrappedXMP(pdf) marks a document as untrapped in XMP metadata."""
|
|
|
|
|
|
|
|
"""Page layouts."""
|
|
|
|
singlePage
|
|
|
|
oneColumn
|
|
|
|
twoColumnLeft
|
|
|
|
twoColumnRight
|
|
|
|
twoPageLeft
|
|
|
|
twoPageRight
|
|
|
|
|
|
|
|
def setPageLayout(pdf, layout):
|
|
|
|
"""setPageLayout(pdf, layout) sets the page layout for a document."""
|
|
|
|
|
|
|
|
"""Page modes."""
|
|
|
|
useNone
|
|
|
|
useOutlines
|
|
|
|
useThumbs
|
|
|
|
useOC
|
|
|
|
useAttachments
|
|
|
|
|
|
|
|
|
|
|
|
def setPageMode(pdf, mode):
|
|
|
|
"""setPageMode(pdf, mode) sets the page mode for a document."""
|
|
|
|
|
|
|
|
def hideToolbar(pdf, flag):
|
|
|
|
"""hideToolbar(pdf, flag) sets the hide toolbar flag"""
|
|
|
|
|
|
|
|
def hideMenubar(pdf, flag):
|
|
|
|
"""hideMenubar(pdf, flag) sets the hide menu bar flag"""
|
|
|
|
|
|
|
|
def hideWindowUi(pdf, flag):
|
|
|
|
"""hideWindowUi(pdf, flag) sets the hide window UI flag"""
|
|
|
|
|
|
|
|
def fitWindow(pdf, flag):
|
|
|
|
"""fitWindow(pdf, flag) sets the fit window flag"""
|
|
|
|
|
|
|
|
def centerWindow(pdf, flag):
|
|
|
|
"""centerWindow(pdf, flag) sets the center window flag"""
|
|
|
|
|
|
|
|
def displayDocTitle(pdf, flag):
|
|
|
|
"""displayDocTitle(pdf, flag) sets the display doc title flag"""
|
|
|
|
|
|
|
|
def openAtPage(pdf, flag, pagenumber):
|
|
|
|
"""openAtPage(pdf, fit, pagenumber)"""
|
|
|
|
|
|
|
|
def setMetadataFromFile(pdf, filename):
|
|
|
|
"""setMetadataFromFile(pdf, filename) set the XMP metadata of a document,
|
|
|
|
given a file name."""
|
|
|
|
|
|
|
|
def setMetadataFromByteArray(pdf, data):
|
|
|
|
"""setMetadataFromByteArray(pdf, data, length) set the XMP metadata from
|
|
|
|
an array of bytes."""
|
|
|
|
|
|
|
|
def getMetadata(pdf):
|
|
|
|
"""getMetadata(pdf, &length) returns the XMP metadata as a byte array of
|
|
|
|
type bytes"""
|
|
|
|
|
|
|
|
def removeMetadata(pdf):
|
|
|
|
"""removeMetadata(pdf) removes the XMP metadata from a document"""
|
|
|
|
|
|
|
|
def createMetadata(pdf):
|
|
|
|
"""createMetadata(pdf) builds fresh metadata as best it can from existing
|
|
|
|
metadata in the document."""
|
|
|
|
|
|
|
|
def setMetadataDate(pdf, date):
|
|
|
|
"""setMetadataDate(pdf, date) sets the metadata date for a PDF. The date
|
|
|
|
is given in PDF date format -- cpdf will convert it to XMP format. The date
|
|
|
|
'now' means now."""
|
|
|
|
|
|
|
|
"""Label styles."""
|
|
|
|
decimalArabic
|
|
|
|
uppercaseRoman
|
|
|
|
lowercaseRoman
|
|
|
|
uppercaseLetters
|
|
|
|
lowercaseLetters
|
|
|
|
|
|
|
|
|
|
|
|
def getPageLabels(pdf):
|
|
|
|
"""Get page labels as a list of tuples (style, prefix, offset, startvalue)
|
|
|
|
|
|
|
|
For example, a document might have five pages of introduction with roman
|
|
|
|
numerals, followed by the rest of the pages in decimal arabic, numbered
|
|
|
|
from one:
|
|
|
|
|
|
|
|
labelstyle = LowercaseRoman
|
|
|
|
labelprefix = ""
|
|
|
|
startpage = 1
|
|
|
|
startvalue = 1
|
|
|
|
|
|
|
|
labelstyle = DecimalArabic
|
|
|
|
labelprefix = ""
|
|
|
|
startpage = 6
|
|
|
|
startvalue = 1 """
|
|
|
|
|
|
|
|
def addPageLabels(pdf, label, progress):
|
|
|
|
"""Add page labels.
|
|
|
|
|
|
|
|
addPageLabels(pdf, style, prefix, offset, range, progress)
|
|
|
|
|
|
|
|
The prefix is prefix text for each label. The range is the page range the
|
|
|
|
labels apply to. Offset can be used to shift the numbering up or down."""
|
|
|
|
|
|
|
|
def removePageLabels(pdf):
|
|
|
|
"""removePageLabels(pdf) removes the page labels from the document."""
|
|
|
|
|
|
|
|
def getPageLabelStringForPage(pdf, pagenumber):
|
|
|
|
"""getPageLabelStringForPage(pdf, page number) calculates the full label
|