More pysplits for v2.7

This commit is contained in:
John Whitington 2024-04-17 08:14:40 +08:00
parent 94258b5014
commit 02502881e8
10 changed files with 355 additions and 106 deletions

View File

@ -1,4 +1,11 @@
# CHAPTER 10. Annotations # CHAPTER 10. Annotations
def annotationsJSON(pdf): def annotationsJSON(pdf):
"""Get the annotations in JSON format.""" """Gets the annotations in JSON format."""
def removeAnnotations(pdf, r):
"""Removes all annotations from pages in the given range."""
def setAnnotationsJSON(pdf, data):
"""Adds the annotations given in JSON format to the PDF, on top of any
existing annotations."""

View File

@ -1,8 +1,25 @@
# CHAPTER 11. Document Information and Metadata # CHAPTER 11. Document Information and Metadata
def isLinearized(filename): def isLinearized(filename):
"""Finds out if a document is linearized as quickly """Finds out if a document is linearized as quickly as possible without
as possible without loading it.""" loading it."""
def hasAcroForm(pdf):
"""Returns True if the document has an AcroForm."""
def getSubformats(pdf):
"""Returns a list of the subformats of the PDF, if any."""
def hasObjectStreams(pdf):
"""Returns True if a document was written using object streams."""
def id1(pdf):
"""Return the first ID string of the PDF, if any, in hexadecimal string
format."""
def id2(pdf):
"""Return the second ID string of the PDF, if any, in hexadecimal string
format."""
def getVersion(pdf): def getVersion(pdf):
"""Return the minor version number of a document.""" """Return the minor version number of a document."""
@ -110,64 +127,66 @@ def getDateComponents(string):
"""Return the components (year, month, day, hour, minute, second, """Return the components (year, month, day, hour, minute, second,
hour_offset, minute_offset) from a PDF date string. hour_offset, minute_offset) from a PDF date string.
Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds (0-59),
(0-59), hour_offset is the offset from UT in hours (-23 to 23); hour_offset is the offset from UT in hours (-23 to 23); minute_offset is
minute_offset is the offset from UT in minutes (-59 to 59).""" the offset from UT in minutes (-59 to 59)."""
def dateStringOfComponents(cs): def dateStringOfComponents(cs):
"""Build a PDF date string a (year, month, day, hour, minute, second, """Build a PDF date string a (year, month, day, hour, minute, second,
hour_offset, minute_offset) tuple. hour_offset, minute_offset) tuple.
Dates: Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds Dates: Month 1-31, day 1-31, hours (0-23), minutes (0-59), seconds (0-59),
(0-59), hour_offset is the offset from UT in hours (-23 to 23); hour_offset is the offset from UT in hours (-23 to 23); minute_offset is
minute_offset is the offset from UT in minutes (-59 to 59).""" the offset from UT in minutes (-59 to 59)."""
def getPageRotation(pdf, pagenumber): def getPageRotation(pdf, pagenumber):
"""Get the viewing rotation for a given page.""" """Get the viewing rotation for a given page."""
def hasBox(pdf, pagenumber, boxname): def hasBox(pdf, pagenumber, boxname):
"""Returns True, if the page has the given box. E.g "/CropBox" """ """Returns True, if the page has the given box. E.g "/CropBox"."""
def numAnnots(pdf, pagenumber):
"""Return the number of annotations on the given page in the given PDF."""
def getMediaBox(pdf, pagenumber): def getMediaBox(pdf, pagenumber):
"""Get a mediabox box given the document, page range, min x, max x, """Get a mediabox box given the document, page range, min x, max x, min y,
min y, max y in points. Only suceeds if such a box exists, as checked by max y in points. Only suceeds if such a box exists, as checked by
hasBox""" hasBox."""
def getCropBox(pdf, pagenumber): def getCropBox(pdf, pagenumber):
"""Get a crop box given the document, page range, min x, max x, """Get a crop box given the document, page range, min x, max x, min y, max
min y, max y in points. Only suceeds if such a box exists, as checked by y in points. Only suceeds if such a box exists, as checked by hasBox."""
hasBox"""
def getTrimBox(pdf, pagenumber): def getTrimBox(pdf, pagenumber):
"""Get a trim box given the document, page range, min x, max x, """Get a trim box given the document, page range, min x, max x, min y, max
min y, max y in points. Only suceeds if such a box exists, as checked by y in points. Only suceeds if such a box exists, as checked by hasBox."""
hasBox"""
def getArtBox(pdf, pagenumber): def getArtBox(pdf, pagenumber):
"""Get an art box given the document, page range, min x, max x, """Get an art box given the document, page range, min x, max x, min y, max
min y, max y in points. Only suceeds if such a box exists, as checked by y in points. Only suceeds if such a box exists, as checked by hasBox."""
hasBox"""
def getBleedBox(pdf, pagenumber): def getBleedBox(pdf, pagenumber):
"""Get a bleed box given the document, page range, min x, max x, """Get a bleed box given the document, page range, min x, max x, min y, max
min y, max y in points. Only suceeds if such a box exists, as checked by y in points. Only suceeds if such a box exists, as checked by hasBox."""
hasBox"""
def setMediaBox(pdf, r, minx, maxx, miny, maxy): def setMediaBox(pdf, r, minx, maxx, miny, maxy):
"""Set the media box given the document, page range, min x, max x, """Set the media box given the document, page range, min x, max x, min y,
min y, max y in points.""" max y in points."""
def setCropBox(pdf, r, minx, maxx, miny, maxy): def setCropBox(pdf, r, minx, maxx, miny, maxy):
"""Set the crop box given the document, page range, min x, max x, """Set the crop box given the document, page range, min x, max x, min y,
min y, max y in points.""" max y in points."""
def setTrimBox(pdf, r, minx, maxx, miny, maxy): def setTrimBox(pdf, r, minx, maxx, miny, maxy):
"""Set the trim box given the document, page range, min x, max x, """Set the trim box given the document, page range, min x, max x, min y,
min y, max y in points.""" max y in points."""
def setArtBox(pdf, r, minx, maxx, miny, maxy): def setArtBox(pdf, r, minx, maxx, miny, maxy):
"""Set the art box given the document, page range, min x, max x, """Set the art box given the document, page range, min x, max x, min y, max
min y, max y in points.""" y in points."""
def pageInfoJSON(pdf):
"""Returns JSON data for the page information."""
def setBleedBox(pdf, r, minx, maxx, miny, maxy): def setBleedBox(pdf, r, minx, maxx, miny, maxy):
"""Set the bleed box given the document, page range, min x, max x, """Set the bleed box given the document, page range, min x, max x,
@ -188,31 +207,64 @@ def markUntrappedXMP(pdf):
def setPageLayout(pdf, layout): def setPageLayout(pdf, layout):
"""Set the page layout for a document.""" """Set the page layout for a document."""
def getPageLayout(pdf):
"""Get the page layout for a document."""
def setPageMode(pdf, mode): def setPageMode(pdf, mode):
"""Set the page mode for a document.""" """Set the page mode for a document."""
def getPageMode(pdf):
"""Get the page mode for a document"""
def hideToolbar(pdf, flag): def hideToolbar(pdf, flag):
"""Sets the hide toolbar flag.""" """Sets the hide toolbar flag."""
def getHideToolbar(pdf):
"""Get the hide toolbar flag."""
def hideMenubar(pdf, flag): def hideMenubar(pdf, flag):
"""Set the hide menu bar flag.""" """Set the hide menu bar flag."""
def getHideMenubar(pdf):
"""Get the hide menubar flag."""
def hideWindowUi(pdf, flag): def hideWindowUi(pdf, flag):
"""Set the hide window UI flag.""" """Set the hide window UI flag."""
def getHideWindowUi(pdf):
"""Get the hide window UI flag."""
def fitWindow(pdf, flag): def fitWindow(pdf, flag):
"""Set the fit window flag.""" """Set the fit window flag."""
def getFitWindow(pdf):
"""Get the fit window flag."""
def centerWindow(pdf, flag): def centerWindow(pdf, flag):
"""Set the center window flag.""" """Set the center window flag."""
def getCenterWindow(pdf):
"""Get the center window flag."""
def displayDocTitle(pdf, flag): def displayDocTitle(pdf, flag):
"""Set the display document title flag.""" """Set the display document title flag."""
def getDisplayDocTitle(pdf):
"""Get the display document title flag."""
def nonFullScreenPageMode(pdf, flag):
"""set the non full screen page mode flag."""
def getNonFullScreenPageMode(pdf):
"""get the non full screen page mode flag."""
def openAtPage(pdf, fitflag, pagenumber): def openAtPage(pdf, fitflag, pagenumber):
"""Set the PDF to open, possibly with zoom-to-fit, at the given page number. """Set the PDF to open, possibly with zoom-to-fit, at the given page number.
""" """
def openAtPageCustom(pdf, custom):
"""Set a custom openAtPage description."""
def setMetadataFromFile(pdf, filename): def setMetadataFromFile(pdf, filename):
"""Set the XMP metadata of a document, given a file name.""" """Set the XMP metadata of a document, given a file name."""
@ -223,7 +275,7 @@ def getMetadata(pdf):
"""Return the XMP metadata as a byte array of type bytes""" """Return the XMP metadata as a byte array of type bytes"""
def removeMetadata(pdf): def removeMetadata(pdf):
"""Remove the XMP metadata from a document""" """Remove the XMP metadata from a document."""
def createMetadata(pdf): def createMetadata(pdf):
"""Builds fresh XMP metadata as good as possible from existing """Builds fresh XMP metadata as good as possible from existing
@ -263,3 +315,6 @@ def removePageLabels(pdf):
def getPageLabelStringForPage(pdf, pagenumber): def getPageLabelStringForPage(pdf, pagenumber):
"""Calculate the full label string for a given page, and return it.""" """Calculate the full label string for a given page, and return it."""
def compositionJSON(filesize, pdf):
"""Get the composition in JSON format."""

View File

@ -18,6 +18,5 @@ def removeAttachedFiles(pdf):
"""Remove all page- and document-level attachments from a document.""" """Remove all page- and document-level attachments from a document."""
def getAttachments(pdf): def getAttachments(pdf):
"""List information about attachements. Returns a list of tuples """List information about attachements. Returns a list of tuples (name,
(name, page number, byte array of data). Page 0 = document-level page number, byte array of data). Page 0 = document-level attachment."""
attachment."""

View File

@ -1,6 +1,19 @@
# CHAPTER 13. Images # CHAPTER 13. Images
def getImageResolution(pdf, min_required_resolution): def getImageResolution(pdf, resolution):
"""Return a list of all uses of images in the PDF which do not meet the """Return a list of all uses of images in the PDF which do not meet the
minimum required resolution in dpi as tuples of: minimum required resolution in dpi as tuples of:
(pagenumber, name, x pixels, y pixels, x resolution, y resolution)""" (pagenumber, name, x pixels, y pixels, x resolution, y resolution, objnum).
"""
def imageResolutionJSON(pdf, resolution):
"""Return the image resolution data in JSON format."""
def getImages(pdf):
"""Return a list of images in the PDF as tuples of:
(object number, pages occurring, image name, width, height, size,
bitspercomponent, color space, filter)
"""
def imagesJSON(pdf):
"""Return the list of images in the PDF in JSON format."""

View File

@ -4,6 +4,9 @@ def getFontInfo(pdf):
"""Get a list of (pagenumber, fontname, fonttype, fontencoding) tuples, """Get a list of (pagenumber, fontname, fonttype, fontencoding) tuples,
showing each font used on each page.""" showing each font used on each page."""
def fontsJSON(pdf):
"""Return font information in JSON format."""
def removeFonts(pdf): def removeFonts(pdf):
"""Remove all font data from a file.""" """Remove all font data from a file."""

View File

@ -1,5 +1,9 @@
# CHAPTER 15. PDF and JSON # CHAPTER 15. PDF and JSON
def JSONUTF8(utf8):
"""Set the JSON output format. If true, the newer UTF8 format is used.
Default: False."""
def outputJSON(filename, parse_content, no_stream_data, decompress_streams, pdf): def outputJSON(filename, parse_content, no_stream_data, decompress_streams, pdf):
"""Output a PDF in JSON format to the given filename. If parse_content is """Output a PDF in JSON format to the given filename. If parse_content is
True, page content is parsed. If decompress_streams is True, streams are True, page content is parsed. If decompress_streams is True, streams are

View File

@ -7,7 +7,8 @@ def OCGRename(pdf, n_from, n_to):
"""Rename an optional content group.""" """Rename an optional content group."""
def OCGOrderAll(pdf): def OCGOrderAll(pdf):
"""Ensure that every optional content group appears in the OCG order list.""" """Ensure that every optional content group appears in the OCG order list.
"""
def OCGCoalesce(pdf): def OCGCoalesce(pdf):
"""Coalesce optional content groups. For example, if we merge or stamp two """Coalesce optional content groups. For example, if we merge or stamp two

View File

@ -1,20 +1,37 @@
# CHAPTER 17. Creating New PDFs # CHAPTER 17. Making New PDFs
def blankDocument(w, h, pages): def blankDocument(w, h, pages):
""" Create a blank document """Create a blank document with pages of the given width (in points),
with pages of the given width (in points), height (in points), and number height (in points), and number of pages."""
of pages."""
def blankDocumentPaper(papersize, pages): def blankDocumentPaper(papersize, pages):
"""Create a blank document with pages of the given paper size, and number """Create a blank document with pages of the given paper size, and number
of pages.""" of pages."""
def textToPDF(w, h, font, fontsize, filename): def textToPDF(w, h, font, fontsize, filename):
"""textToPDF(w, h, font, fontsize, filename) typesets a UTF8 text file """Typesets a UTF8 text file ragged right on a page of size w * h in points
ragged right on a page of size w * h in points in the given font and font in the given font and font size."""
size."""
def textToPDFMemory(w, h, font, fontsize, data):
"""Typesets a UTF8 text file ragged right on a page of size w * h in points
in the given font and font size."""
def textToPDFPaper(papersize, font, fontsize, filename): def textToPDFPaper(papersize, font, fontsize, filename):
"""textToPDF(papersize font, fontsize, filename) typesets a UTF8 text file """Typesets a UTF8 text file ragged right on a page of the given size in
ragged right on a page of the given size in the given font and font the given font and font size."""
size."""
def textToPDFPaperMemory(papersize, font, fontsize, data):
"""Typesets a UTF8 text file ragged right on a page of the given size in
the given font and font size."""
def fromPNG(filename):
"""Builds a PDF from a non-interlaced non-transparent PNG file."""
def fromPNGMemory(data):
"""Builds a PDF from a non-interlaced non-transparent PNG file bytearray."""
def fromJPEG(filename):
"""Builds a PDF from a JPEG file."""
def fromJPEGMemory(data):
"""Builds a PDF from a JPEG file bytearray."""

View File

@ -1,57 +1,208 @@
# CHAPTER 18. Miscellaneous # CHAPTER 18. Drawing on PDFs
def draft(pdf, r, boxes): def drawBegin():
"""Remove images on the given pages, replacing """Sets up the drawing process. It must be called before any other draw*
them with crossed boxes if 'boxes' is True.""" function."""
def removeAllText(pdf, r): def drawEnd(pdf, r):
"""Remove all text from the given pages in a document.""" """Commits the drawing to the given PDF on pages in the given range."""
def blackText(pdf, r): def drawEndExtended(pdf, r, underneath, bates, filename):
"""Blacken all text on the given pages.""" """The same as drawEnd, but provides the special parameters which may be
required when using drawSText."""
def blackLines(pdf, r): def drawRect(x, y, w, h):
"""Blacken all lines on the given pages.""" """Add a rectangle to the current path."""
def blackFills(pdf, r): def drawTo(x, y):
"""Blacken all fills on the given pages.""" """Move the current point to (x, y)."""
def thinLines(pdf, r, linewidth): def drawLine(x, y):
"""Thicken every line less than """Adds a line from the current point to (x, y) to the current path."""
linewidth to linewidth. Thickness given in points."""
def copyId(pdf, pdf2): def drawBez(x1, y1, x2, y2, x3, y3):
"""Copy the /ID from one pdf to pdf2.""" """Adds a Bezier curve to the current path."""
def removeId(pdf): def drawBez23(x2, y2, x3, y3):
"""Remove a document's /ID""" """Adds a Bezier curve twith (x1, y1) = current point."""
def setVersion(pdf, version): def drawBez13(x1, y1, x3, y3):
"""Set the minor version number of a document.""" """Adds a Bezier curve with (x3, y3) = new current point."""
def setFullVersion(pdf, major, minor): def drawCircle(x, y, r):
"""Set the major and minor version number of """Adds a circle to the current path."""
a document."""
def removeDictEntry(pdf, key): def drawStroke():
"""Remove any dictionary entry with the given """Stroke the current path and clear it."""
key anywhere in the document."""
def removeDictEntrySearch(pdf, key, searchterm): def drawFill():
"""Remove any dictionary entry with the given """Fills the current path with a non-zero winding rule, and clears it. """
key anywhere in the document, if its value matches the given search term."""
def replaceDictEntry(pdf, key, newvalue): def drawFillEo():
"""Replace any dictionary entry with the given """Fills the current path with an even-odd winding rule, and clears it. """
key anywhere in the document using the new value given."""
def replaceDictEntrySearch(pdf, key, newvalue, searchterm): def drawStrokeFill():
"""Replace any dictionary entry with the given key anywhere in the """Fills and then strokes the current path with a non-zero winding rule,
document, if its value matches the given search term, with the new value and clears it. """
given."""
def getDictEntries(pdf, key): def drawStrokeFillEo():
"""Return JSON of any dict entries with the given key.""" """Fills and then strokes the current path with an even-odd winding rule,
and clears it. """
def removeClipping(pdf, r): def drawClose():
"""Remove all clipping from pages in the given range""" """Closes the current path by appending a straight line segment from the
current point to the starting point of the subpath. """
def drawClip():
"""Uses the current path as a clipping region, using the non-zero winding
rule. """
def drawClipEo():
"""Uses the current path as a clipping region, using the even-odd winding
rule. """
def drawStrokeColGrey(g):
"""Changes to a greyscale stroke colourspace and sets the stroke colour.
"""
def drawStrokeColRGB(r, g, b):
"""Changes to an RGB stroke colourspace and sets the stroke colour. """
def drawStrokeColCYMK(c, y, m, k):
"""Changes to a CYMK stroke colourspace and sets the stroke colour. """
def drawFillColGrey(g):
"""Changes to a greyscale fill colourspace and sets the fill colour. """
def drawFillColRGB(r, g, b):
"""Changes to an RGB fill colourspace and sets the fill colour. """
def drawFillColCYMK(c, y, m, k):
"""Changes to a CYMK fill colourspace and sets the fill colour. """
def drawThick(thickness):
"""Sets the line thickness."""
def drawCap(captype):
"""Sets the line cap."""
def drawJoin(jointype):
"""Sets the line join type"""
def drawMiter(miter):
"""Sets the miter limit."""
def drawDash(description):
"""Sets the line dash style"""
def drawPush():
"""Saves the current graphics state on the stack. """
def drawPop():
"""Restores the graphics state from the stack. """
def drawMatrix(a, b, c, d, e, f):
"""Appends the given matrix to the Current Transformation Matrix. """
def drawMTrans(tx, ty):
"""Appends a translation by (tx, ty) to the Current Transformation Matrix.
"""
def drawMRot(x, y, a):
"""Appends a rotation by a around (a, y) to the Current Transformation
Matrix. """
def drawMScale(x, y, sx, sy):
"""Appends a scaling by (sx, sy) around (x, y) to the Current
Transformation Matrix. """
def drawMShearX(x, y, a):
"""Appends an X shearing of angle a around (x, y) to the Current
Transformation Matrix. """
def drawMShearY(x, y, a):
"""Appends an X shearing of angle a around (x, y) to the Current
Transformation Matrix. """
def drawXObjBBox(x, y, w, h):
"""Sets the XObject bounding box. """
def drawXObj(name):
"""Begins the storing of an XObject. """
def drawEndXObj():
"""Ends the storing of an XObject."""
def drawUse(name):
"""Uses the named XObject. """
def drawJPEG(name, filename):
"""Loads a JPEG from the given file, storing it under the given name. """
def drawJPEGMemory(name, data):
"""Loads a JPEG from the given bytearray, storing it under the given name.
"""
def drawPNG(name, filename):
"""Loads a non-interlaced non-transparent PNG from the given file, storing
it under the given name. """
def drawPNGMemory(name, data):
"""Loads a non-interlaced non-transparent PNG from the given bytearray,
storing it under the given name. """
def drawImage(name):
"""Draws a stored image. To draw at the expected size, it is required to
scale the Current Transformation Matrix by the width and height of the
image. """
def drawFillOpacity(n):
"""Sets the fill opacity."""
def drawStrokeOpacity(n):
"""Sets the stroke opacity."""
def drawBT():
"""Begins a text section."""
def drawET():
"""Ends a text section."""
def drawFont(name):
"""Sets the font."""
def drawFontSize(n):
"""Sets the font size."""
def drawText(text):
"""Draws text."""
def drawSText(text):
"""draws text with %Specials. You may need to use cpdf_drawEndExtended
instead of cpdf_drawEnd later, to provide the extra information required.
"""
def drawLeading(n):
"""Sets the leading."""
def drawCharSpace(n):
"""Sets the character spacing."""
def drawWordSpace(n):
"""Sets the word spacing."""
def drawTextScale(n):
"""Sets the text scaling."""
def drawRenderMode(n):
"""Sets the text rendering mode."""
def drawRise(n):
"""Sets the text rise."""
def drawNL():
"""Moves to the next line. """
def drawNewPage():
"""Moves to the next page, creating it if necessary, and setting the range
to just that new page. """

View File

@ -1,8 +1,8 @@
# CHAPTER 18. Miscellaneous # CHAPTER 19. Miscellaneous
def draft(pdf, r, boxes): def draft(pdf, r, boxes):
"""Remove images on the given pages, replacing """Remove images on the given pages, replacing them with crossed boxes if
them with crossed boxes if 'boxes' is True.""" 'boxes' is True."""
def removeAllText(pdf, r): def removeAllText(pdf, r):
"""Remove all text from the given pages in a document.""" """Remove all text from the given pages in a document."""
@ -17,8 +17,8 @@ def blackFills(pdf, r):
"""Blacken all fills on the given pages.""" """Blacken all fills on the given pages."""
def thinLines(pdf, r, linewidth): def thinLines(pdf, r, linewidth):
"""Thicken every line less than """Thicken every line less than linewidth to linewidth. Thickness given in
linewidth to linewidth. Thickness given in points.""" points."""
def copyId(pdf, pdf2): def copyId(pdf, pdf2):
"""Copy the /ID from one pdf to pdf2.""" """Copy the /ID from one pdf to pdf2."""
@ -30,20 +30,19 @@ def setVersion(pdf, version):
"""Set the minor version number of a document.""" """Set the minor version number of a document."""
def setFullVersion(pdf, major, minor): def setFullVersion(pdf, major, minor):
"""Set the major and minor version number of """Set the major and minor version number of a document."""
a document."""
def removeDictEntry(pdf, key): def removeDictEntry(pdf, key):
"""Remove any dictionary entry with the given """Remove any dictionary entry with the given key anywhere in the
key anywhere in the document.""" document."""
def removeDictEntrySearch(pdf, key, searchterm): def removeDictEntrySearch(pdf, key, searchterm):
"""Remove any dictionary entry with the given """Remove any dictionary entry with the given key anywhere in the document,
key anywhere in the document, if its value matches the given search term.""" if its value matches the given search term."""
def replaceDictEntry(pdf, key, newvalue): def replaceDictEntry(pdf, key, newvalue):
"""Replace any dictionary entry with the given """Replace any dictionary entry with the given key anywhere in the document
key anywhere in the document using the new value given.""" using the new value given."""
def replaceDictEntrySearch(pdf, key, newvalue, searchterm): def replaceDictEntrySearch(pdf, key, newvalue, searchterm):
"""Replace any dictionary entry with the given key anywhere in the """Replace any dictionary entry with the given key anywhere in the