cpdf-source/cpdfcommand.ml

4608 lines
167 KiB
OCaml
Raw Normal View History

2013-10-02 16:29:53 +02:00
(* cpdf command line tools *)
2021-10-15 18:36:11 +02:00
let demo = false
2020-01-30 11:42:24 +01:00
let noncomp = false
2014-11-17 19:48:17 +01:00
let major_version = 2
2023-11-15 18:26:43 +01:00
let minor_version = 7
2024-02-14 15:51:22 +01:00
let version_date = "(13th February 2024, RC1)"
2013-08-20 16:32:57 +02:00
open Pdfutil
open Pdfio
2023-11-14 18:47:44 +01:00
let combine_with_spaces strs =
String.trim
(fold_left (fun x y -> x ^ (if x <> "" then " " else "") ^ y) "" strs)
2019-06-30 16:00:51 +02:00
let tempfiles = ref []
let exit n =
2020-03-04 19:50:32 +01:00
begin try iter Sys.remove !tempfiles with _ -> exit n end;
2019-06-30 16:00:51 +02:00
exit n
let null () = ()
2014-10-14 20:36:57 +02:00
let initial_file_size = ref 0
2016-07-21 18:02:11 +02:00
let empty = Pdf.empty ()
let emptypage = Pdfpage.blankpage Pdfpaper.a4
2022-09-27 17:59:04 +02:00
let fontnames =
[(Pdftext.TimesRoman, ["NimbusRoman-Regular.ttf"]);
(Pdftext.TimesBold, ["NimbusRoman-Bold.ttf"]);
(Pdftext.TimesItalic, ["NimbusRoman-Italic.ttf"]);
(Pdftext.TimesBoldItalic, ["NimbusRoman-BoldItalic.ttf"]);
(Pdftext.Helvetica, ["NimbusSans-Regular.ttf"]);
(Pdftext.HelveticaBold, ["NimbusSans-Bold.ttf"]);
(Pdftext.HelveticaOblique, ["NimbusSans-Italic.ttf"]);
(Pdftext.HelveticaBoldOblique, ["NimbusSans-BoldItalic.ttf"]);
(Pdftext.Courier, ["NimbusMonoPS-Regular.ttf"]);
(Pdftext.CourierBold, ["NimbusMonoPS-Bold.ttf"]);
(Pdftext.CourierOblique, ["NimbusMonoPS-Italic.ttf"]);
(Pdftext.CourierBoldOblique, ["NimbusMonoPS-BoldItalic.ttf"]);
(Pdftext.Symbol, ["StandardSymbolsPS.ttf"]);
(Pdftext.ZapfDingbats, ["D050000L.ttf"])]
2013-08-20 16:32:57 +02:00
(* Wrap up the file reading functions to exit with code 1 when an encryption
problem occurs. This happens when object streams are in an encrypted document
and so it can't be read without the right password... The existing error
handling only dealt with the case where the document couldn't be decrypted once
it had been loaded. *)
let pdfread_pdf_of_input ?revision a b c =
try Pdfread.pdf_of_input ?revision a b c with
2013-08-20 16:32:57 +02:00
Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" ->
2021-10-02 13:22:59 +02:00
raise (Cpdferror.SoftError "Bad owner or user password when reading document")
2013-08-20 16:32:57 +02:00
let pdfread_pdf_of_channel_lazy ?revision ?source b c d =
try Pdfread.pdf_of_channel_lazy ?revision ?source b c d with
2013-08-20 16:32:57 +02:00
Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" ->
2021-10-02 13:22:59 +02:00
raise (Cpdferror.SoftError "Bad owner or user password when reading document")
2013-08-20 16:32:57 +02:00
let pdfread_pdf_of_file ?revision a b c =
try Pdfread.pdf_of_file ?revision a b c with
2013-08-20 16:32:57 +02:00
Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" ->
2021-10-02 13:22:59 +02:00
raise (Cpdferror.SoftError "Bad owner or user password when reading document")
2013-08-20 16:32:57 +02:00
let optstring = function
| "" -> None
| x -> Some x
let _ =
set_binary_mode_in stdin true;
set_binary_mode_out stdout true
let stay_on_error = ref false
exception StayOnError
2013-08-20 16:32:57 +02:00
(* Fatal error reporting. *)
let error s =
2023-04-25 14:45:56 +02:00
Pdfe.log (s ^ "\nUse -help for help.\n");
if not !stay_on_error then exit 2 else raise StayOnError
2013-08-20 16:32:57 +02:00
let soft_error s =
2023-04-25 14:45:56 +02:00
Pdfe.log (Printf.sprintf "%s\n" s);
if not !stay_on_error then exit 1 else raise StayOnError
2013-08-20 16:32:57 +02:00
let parse_pagespec pdf spec =
try Cpdfpagespec.parse_pagespec pdf spec with
2013-08-20 16:32:57 +02:00
Failure x -> error x
2021-05-25 14:49:51 +02:00
(* We allow an operation such as ScaleToFit on a range such as 'portrait' to be silently null to allow, for example:
cpdf -scale-to-fit a4portrait in.pdf portrait AND -scale-to-fit a4landscape landscape -o out.pdf
*)
let parse_pagespec_allow_empty pdf spec =
try Cpdfpagespec.parse_pagespec pdf spec with
2021-05-25 14:49:51 +02:00
Pdf.PDFError ("Page range specifies no pages") -> []
2013-08-20 16:32:57 +02:00
(* Operations. *)
type op =
| CopyFont of string
| CountPages
| Version
| Encrypt
| Decrypt
| StampOn of string
| StampUnder of string
| CombinePages of string
| TwoUp
| TwoUpStack
2021-10-18 19:19:59 +02:00
| Impose of bool
2013-08-20 16:32:57 +02:00
| RemoveBookmarks
| AddBookmarks of string
| AddText of string
| AddRectangle
| RemoveText
| Draft
| PadBefore
| PadAfter
| PadEvery of int
| PadMultiple of int
2019-07-01 16:35:17 +02:00
| PadMultipleBefore of int
2013-08-20 16:32:57 +02:00
| Shift
2024-01-22 17:36:37 +01:00
| ShiftBoxes
2013-08-20 16:32:57 +02:00
| Scale
| ScaleToFit
| ScaleContents of float
| AttachFile of string list
| RemoveAttachedFiles
| ListAttachedFiles
| DumpAttachedFiles
| RemoveAnnotations
| ListAnnotations
| CopyAnnotations of string
2023-01-13 07:30:46 +01:00
| SetAnnotations of string
2013-08-20 16:32:57 +02:00
| Merge
| Split
| SplitOnBookmarks of int
2023-10-25 19:15:19 +02:00
| SplitMax of int
2023-10-30 17:36:41 +01:00
| Spray
2013-08-20 16:32:57 +02:00
| Clean
| Info
| PageInfo
| Metadata
| SetMetadata of string
| RemoveMetadata
| Fonts
| RemoveFonts
| Compress
| Decompress
| Crop
| Trim
| Bleed
| Art
2013-08-20 16:32:57 +02:00
| RemoveCrop
| RemoveArt
| RemoveTrim
| RemoveBleed
2013-08-20 16:32:57 +02:00
| CopyBox
| MediaBox
2017-05-19 20:10:49 +02:00
| HardBox of string
2013-08-20 16:32:57 +02:00
| Rotate of int
| Rotateby of int
| RotateContents of float
| Upright
| VFlip
| HFlip
| ThinLines of float
| SetAuthor of string
| SetTitle of string
| SetSubject of string
| SetKeywords of string
| SetCreate of string
| SetModify of string
| SetCreator of string
| SetProducer of string
| SetTrapped
| SetUntrapped
| SetVersion of int
| ListBookmarks
| SetPageLayout of string
| SetPageMode of string
2023-04-17 20:55:11 +02:00
| SetNonFullScreenPageMode of string
2013-08-20 16:32:57 +02:00
| HideToolbar of bool
| HideMenubar of bool
| HideWindowUI of bool
| FitWindow of bool
| CenterWindow of bool
| DisplayDocTitle of bool
| Presentation
| ChangeId
| RemoveId
| CopyId of string
| BlackText
| BlackLines
| BlackFills
| ExtractImages
2023-11-09 12:25:19 +01:00
| ListImages
2013-08-20 16:32:57 +02:00
| ImageResolution of float
| MissingFonts
2023-11-02 19:49:15 +01:00
| ExtractFontFile of string
2013-08-20 16:32:57 +02:00
| ExtractText
| OpenAtPage of string
| OpenAtPageFit of string
2023-04-18 15:42:17 +02:00
| OpenAtPageCustom of string
2013-10-24 16:21:54 +02:00
| AddPageLabels
| RemovePageLabels
| PrintPageLabels
2015-01-07 19:29:11 +01:00
| RemoveDictEntry of string
2021-10-28 18:06:46 +02:00
| ReplaceDictEntry of string
2021-10-29 16:09:21 +02:00
| PrintDictEntry of string
2016-11-03 18:11:08 +01:00
| ListSpotColours
2016-11-09 16:42:47 +01:00
| RemoveClipping
| SetMetadataDate of string
2019-07-01 15:40:22 +02:00
| CreateMetadata
2019-07-02 19:20:05 +02:00
| EmbedMissingFonts
2019-07-06 18:55:26 +02:00
| BookmarksOpenToLevel of int
| CreatePDF
2019-07-11 18:19:40 +02:00
| RemoveAllText
2019-07-15 12:52:14 +02:00
| ShowBoxes
2019-07-15 14:42:32 +02:00
| TrimMarks
| Prepend of string
| Postpend of string
2020-01-30 11:20:33 +01:00
| OutputJSON
2020-02-26 17:24:27 +01:00
| OCGCoalesce
2020-02-27 14:32:45 +01:00
| OCGList
| OCGRename
2020-03-02 13:37:39 +01:00
| OCGOrderAll
| StampAsXObject of string
2021-11-01 16:40:33 +01:00
| PrintFontEncoding of string
2021-11-19 01:32:35 +01:00
| TableOfContents
| Typeset of string
2023-05-02 16:04:35 +02:00
| TextWidth of string
2022-11-28 17:11:07 +01:00
| Draw
2023-03-03 17:02:16 +01:00
| Composition of bool
2023-11-15 18:26:43 +01:00
| Chop of int * int
2024-02-05 15:01:16 +01:00
| ChopHV of bool * float
2023-12-06 13:20:27 +01:00
| ProcessImages
2024-01-17 19:37:58 +01:00
| ExtractStream of int
| PrintObj of int
2013-08-20 16:32:57 +02:00
let string_of_op = function
2021-11-01 16:40:33 +01:00
| PrintFontEncoding _ -> "PrintFontEncoding"
2021-10-29 16:09:21 +02:00
| PrintDictEntry _ -> "PrintDictEntry"
2021-10-18 19:19:59 +02:00
| Impose _ -> "Impose"
| CopyFont _ -> "CopyFont"
| CountPages -> "CountPages"
| Version -> "Version"
| Encrypt -> "Encrypt"
| Decrypt -> "Decrypt"
| StampOn _ -> "StampOn"
| StampUnder _ -> "StampUnder"
| CombinePages _ -> "CombinePages"
| TwoUp -> "TwoUp"
| TwoUpStack -> "TwoUpStack"
| RemoveBookmarks -> "RemoveBookmarks"
| AddBookmarks _ -> "AddBookmarks"
| AddText _ -> "AddText"
| AddRectangle -> "AddRectangle"
| RemoveText -> "RemoveText"
| Draft -> "Draft"
| PadBefore -> "PadBefore"
| PadAfter -> "PadAfter"
| PadEvery _ -> "PadEvery"
| PadMultiple _ -> "PadMultiple"
2019-07-01 16:35:17 +02:00
| PadMultipleBefore _ -> "PadMultipleBefore"
| Shift -> "Shift"
2024-01-22 17:36:37 +01:00
| ShiftBoxes -> "ShiftBoxes"
| Scale -> "Scale"
| ScaleToFit -> "ScaleToFit"
| ScaleContents _ -> "ScaleContents"
| AttachFile _ -> "AttachFile"
| RemoveAttachedFiles -> "RemoveAttachedFiles"
| ListAttachedFiles -> "ListAttachedFiles"
| DumpAttachedFiles -> "DumpAttachedFiles"
| RemoveAnnotations -> "RemoveAnnotations"
| ListAnnotations -> "ListAnnotations"
| CopyAnnotations _ -> "CopyAnnotations"
2023-01-13 07:30:46 +01:00
| SetAnnotations _ -> "SetAnnotations"
| Merge -> "Merge"
| Split -> "Split"
| SplitOnBookmarks _ -> "SplitOnBookmarks"
2023-10-25 19:15:19 +02:00
| SplitMax _ -> "SplitMax"
2023-10-30 17:36:41 +01:00
| Spray -> "Spray"
| Clean -> "Clean"
| Info -> "Info"
| PageInfo -> "PageInfo"
| Metadata -> "Metadata"
| SetMetadata _ -> "SetMetadata"
| RemoveMetadata -> "RemoveMetadata"
| Fonts -> "Fonts"
| RemoveFonts -> "RemoveFonts"
| Compress -> "Compress"
| Decompress -> "Decompress"
| Crop -> "Crop"
| RemoveCrop -> "RemoveCrop"
| CopyBox -> "CopyBox"
| MediaBox -> "MediaBox"
2017-05-19 20:10:49 +02:00
| HardBox _ -> "HardBox"
| Rotate _ -> "Rotate"
| Rotateby _ -> "Rotateby"
| RotateContents _ -> "RotateContents"
| Upright -> "Upright"
| VFlip -> "VFlip"
| HFlip -> "HFlip"
| ThinLines _ -> "ThinLines"
| SetAuthor _ -> "SetAuthor"
| SetTitle _ -> "SetTitle"
| SetSubject _ -> "SetSubject"
| SetKeywords _ -> "SetKeywords"
| SetCreate _ -> "SetCreate"
| SetModify _ -> "SetModify"
| SetCreator _ -> "SetCreator"
| SetProducer _ -> "SetProducer"
| SetTrapped -> "SetTrapped"
| SetUntrapped -> "SetUntrapped"
| SetVersion _ -> "SetVersion"
| ListBookmarks -> "ListBookmarks"
| SetPageLayout _ -> "SetPageLayout"
| SetPageMode _ -> "SetPageMode"
2023-04-17 20:55:11 +02:00
| SetNonFullScreenPageMode _ -> "SetNonFullScreenPageMode"
| HideToolbar _ -> "HideToolbar"
| HideMenubar _ -> "HideMenubar"
| HideWindowUI _ -> "HideWindowUI"
| FitWindow _ -> "FitWindow"
| CenterWindow _ -> "CenterWindow"
| DisplayDocTitle _ -> "DisplayDocTitle"
| Presentation -> "Presentation"
| ChangeId -> "ChangeId"
| RemoveId -> "RemoveId"
| CopyId _ -> "CopyId"
| BlackText -> "BlackText"
| BlackLines -> "BlackLines"
| BlackFills -> "BlackFills"
| ExtractImages -> "ExtractImages"
2023-11-09 12:25:19 +01:00
| ListImages -> "ListImages"
| ImageResolution _ -> "ImageResolution"
| MissingFonts -> "MissingFonts"
2023-11-02 19:49:15 +01:00
| ExtractFontFile _ -> "ExtractFontFile"
| ExtractText -> "ExtractText"
| OpenAtPage _ -> "OpenAtPage"
| OpenAtPageFit _ -> "OpenAtPageFit"
2023-04-18 15:42:17 +02:00
| OpenAtPageCustom _ -> "OpenAtPageCustom"
| AddPageLabels -> "AddPageLabels"
| RemovePageLabels -> "RemovePageLabels"
| PrintPageLabels -> "PrintPageLabels"
2015-01-07 19:29:11 +01:00
| RemoveDictEntry _ -> "RemoveDictEntry"
2021-10-28 18:06:46 +02:00
| ReplaceDictEntry _ -> "ReplaceDictEntry"
2016-11-03 18:11:08 +01:00
| ListSpotColours -> "ListSpotColours"
2016-11-09 16:42:47 +01:00
| RemoveClipping -> "RemoveClipping"
| Trim -> "Trim"
| Art -> "Art"
| Bleed -> "Bleed"
| RemoveArt -> "RemoveArt"
| RemoveTrim -> "RemoveTrim"
| RemoveBleed -> "RemoveBleed"
| SetMetadataDate _ -> "SetMetadataDate"
2019-07-01 15:40:22 +02:00
| CreateMetadata -> "CreateMetadata"
2019-07-02 19:20:05 +02:00
| EmbedMissingFonts -> "EmbedMissingFonts"
2019-07-06 18:55:26 +02:00
| BookmarksOpenToLevel _ -> "BookmarksOpenToLevel"
| CreatePDF -> "CreatePDF"
2019-07-11 18:19:40 +02:00
| RemoveAllText -> "RemoveAllText"
2019-07-15 12:52:14 +02:00
| ShowBoxes -> "ShowBoxes"
2019-07-15 14:42:32 +02:00
| TrimMarks -> "TrimMarks"
| Prepend _ -> "Prepend"
| Postpend _ -> "Postpend"
2020-01-30 11:20:33 +01:00
| OutputJSON -> "OutputJSON"
2020-02-26 17:24:27 +01:00
| OCGCoalesce -> "OCGCoalesce"
2020-02-27 14:32:45 +01:00
| OCGList -> "OCGList"
| OCGRename -> "OCGRename"
2020-03-02 13:37:39 +01:00
| OCGOrderAll -> "OCGOrderAll"
| StampAsXObject _ -> "StampAsXObject"
2021-11-19 01:32:35 +01:00
| TableOfContents -> "TableOfContents"
| Typeset _ -> "Typeset"
2023-05-02 16:04:35 +02:00
| TextWidth _ -> "TextWidth"
2022-11-28 17:11:07 +01:00
| Draw -> "Draw"
2023-03-03 17:02:16 +01:00
| Composition _ -> "Composition"
2023-11-15 18:26:43 +01:00
| Chop _ -> "Chop"
2024-02-05 15:01:16 +01:00
| ChopHV _ -> "ChopHV"
2023-12-06 13:20:27 +01:00
| ProcessImages -> "ProcessImages"
2024-01-17 19:37:58 +01:00
| ExtractStream _ -> "ExtractStream"
| PrintObj _ -> "PrintObj"
2013-08-20 16:32:57 +02:00
(* Inputs: filename, pagespec. *)
2023-12-05 13:20:03 +01:00
type input_kind =
| AlreadyInMemory of Pdf.t * string
2013-08-20 16:32:57 +02:00
| InFile of string
| StdIn
let string_of_input_kind = function
2023-12-05 13:20:03 +01:00
| AlreadyInMemory (_, s) -> s
2013-08-20 16:32:57 +02:00
| InFile s -> s
| StdIn -> "Stdin"
type input =
input_kind * string * string * string * bool ref * int option
(* input kind, range, user_pw, owner_pw, was_decrypted_with_owner, revision *)
2013-08-20 16:32:57 +02:00
type output_method =
| NoOutputSpecified
| Stdout
| File of string
2023-10-30 17:36:41 +01:00
(* Outputs are also added here, in case -spray is in use. *)
let spray_outputs = ref []
2013-08-20 16:32:57 +02:00
(* A list of PDFs to be output, if no output method was specified. *)
let output_pdfs : Pdf.t list ref = ref []
type font =
| StandardFont of Pdftext.standard_font
| EmbeddedFont of string
2013-08-20 16:32:57 +02:00
| OtherFont of string
type args =
{mutable op : op option;
mutable preserve_objstm : bool;
mutable create_objstm : bool;
mutable out : output_method;
mutable inputs : input list;
mutable chunksize : int;
mutable linearize : bool;
mutable keeplinearize : bool;
mutable rectangle : string;
mutable coord : string;
2013-08-20 16:32:57 +02:00
mutable duration : float option;
mutable transition : string option;
mutable horizontal : bool;
mutable inward : bool;
mutable direction : int;
mutable effect_duration : float;
mutable font : font;
mutable fontname : string;
2022-09-21 17:10:48 +02:00
mutable fontencoding : Pdftext.encoding;
2013-08-20 16:32:57 +02:00
mutable fontsize : float;
2022-09-27 17:59:04 +02:00
mutable embedstd14 : string option;
2021-12-21 15:00:58 +01:00
mutable color : Cpdfaddtext.color;
2013-08-20 16:32:57 +02:00
mutable opacity : float;
2021-08-12 21:38:55 +02:00
mutable position : Cpdfposition.position;
2013-08-20 16:32:57 +02:00
mutable underneath : bool;
mutable linespacing : float;
mutable midline : bool;
2015-01-20 16:50:36 +01:00
mutable topline : bool;
2021-12-21 15:00:58 +01:00
mutable justification : Cpdfaddtext.justification;
2013-08-20 16:32:57 +02:00
mutable bates : int;
2015-07-17 17:34:47 +02:00
mutable batespad : int option;
2013-08-20 16:32:57 +02:00
mutable prerotate : bool;
mutable relative_to_cropbox : bool;
mutable keepversion : bool;
mutable bycolumns : bool;
mutable pagerotation : int;
mutable crypt_method : string;
mutable owner : string;
mutable user : string;
mutable no_edit : bool;
mutable no_print : bool;
mutable no_copy : bool;
mutable no_annot : bool;
mutable no_forms : bool;
mutable no_extract : bool;
mutable no_assemble : bool;
mutable no_hq_print : bool;
mutable debug : bool;
2014-10-24 18:24:29 +02:00
mutable debugcrypt : bool;
2018-03-20 11:19:50 +01:00
mutable debugforce : bool;
2013-08-20 16:32:57 +02:00
mutable boxes : bool;
mutable encrypt_metadata : bool;
mutable retain_numbering : bool;
mutable process_struct_trees : bool;
2013-08-20 16:32:57 +02:00
mutable remove_duplicate_fonts : bool;
mutable remove_duplicate_streams : bool;
2021-12-19 13:55:06 +01:00
mutable encoding : Cpdfmetadata.encoding;
2013-08-20 16:32:57 +02:00
mutable scale : float;
mutable copyfontpage : int;
mutable copyfontname : string option;
mutable fast : bool;
mutable dashrange : string;
mutable outline : bool;
mutable linewidth : float;
mutable path_to_ghostscript : string;
2020-12-11 15:13:24 +01:00
mutable path_to_im : string;
mutable path_to_p2p : string;
2023-12-06 13:20:27 +01:00
mutable path_to_convert : string;
2023-12-22 20:33:10 +01:00
mutable path_to_jbig2enc : string;
2013-08-20 16:32:57 +02:00
mutable frombox : string option;
mutable tobox : string option;
mutable mediabox_if_missing : bool;
mutable topage : string option;
mutable scale_stamp_to_fit : bool;
2014-09-18 15:27:07 +02:00
mutable labelstyle : Pdfpagelabels.labelstyle;
2013-10-24 16:21:54 +02:00
mutable labelprefix : string option;
2014-09-18 16:40:22 +02:00
mutable labelstartval : int;
mutable labelsprogress : bool;
mutable squeeze : bool;
mutable squeeze_recompress : bool;
mutable squeeze_pagedata: bool;
2014-10-02 20:57:06 +02:00
mutable original_filename : string;
mutable was_encrypted : bool;
2014-10-15 14:48:39 +02:00
mutable cpdflin : string option;
mutable recrypt : bool;
2015-01-22 20:16:56 +01:00
mutable was_decrypted_with_owner : bool;
mutable creator : string option;
2015-04-13 15:17:48 +02:00
mutable producer : string option;
2017-12-18 20:44:02 +01:00
mutable extract_text_font_size : float option;
mutable padwith : string option;
mutable alsosetxml : bool;
mutable justsetxml : bool;
2019-07-03 15:40:32 +02:00
mutable gs_malformed : bool;
2019-07-22 15:00:37 +02:00
mutable gs_quiet : bool;
2019-07-03 15:40:32 +02:00
mutable merge_add_bookmarks : bool;
mutable merge_add_bookmarks_use_titles : bool;
mutable createpdf_pages : int;
2019-07-09 17:31:45 +02:00
mutable createpdf_pagesize : Pdfpaper.t;
2020-01-30 11:20:33 +01:00
mutable removeonly : string option;
2020-02-01 11:18:15 +01:00
mutable jsonparsecontentstreams : bool;
2020-02-27 14:32:45 +01:00
mutable jsonnostreamdata : bool;
2021-10-04 19:38:36 +02:00
mutable jsondecompressstreams : bool;
2021-12-30 16:25:24 +01:00
mutable jsoncleanstrings : bool;
2020-02-27 14:32:45 +01:00
mutable ocgrenamefrom : string;
2020-12-20 16:41:52 +01:00
mutable ocgrenameto : string;
mutable dedup : bool;
2021-10-16 16:47:41 +02:00
mutable dedup_per_page : bool;
2021-10-18 19:19:59 +02:00
mutable collate : bool;
mutable impose_columns : bool;
mutable impose_rtl : bool;
mutable impose_btt : bool;
mutable impose_center : bool;
mutable impose_margin : float;
mutable impose_spacing : float;
2021-10-26 18:32:36 +02:00
mutable impose_linewidth : float;
2021-10-28 18:06:46 +02:00
mutable format_json : bool;
2021-10-29 16:09:21 +02:00
mutable replace_dict_entry_value : Pdf.pdfobject;
2021-12-02 00:50:04 +01:00
mutable dict_entry_search : Pdf.pdfobject option;
2021-12-10 13:58:30 +01:00
mutable toc_title : string;
2021-12-15 14:01:51 +01:00
mutable toc_bookmark : bool;
2022-01-02 16:18:55 +01:00
mutable idir_only_pdfs : bool;
2023-12-06 13:20:27 +01:00
mutable no_warn_rotate : bool;
mutable jpegquality : float;
mutable jpegqualitylossless : float;
2023-12-24 14:54:21 +01:00
mutable onebppmethod : string;
mutable pixel_threshold : int;
mutable length_threshold : int;
mutable percentage_threshold : float;
mutable dpi_threshold : float;
mutable resample_factor : float;
2024-01-12 16:00:28 +01:00
mutable resample_interpolate : bool;
2024-01-17 19:37:58 +01:00
mutable jbig2_lossy_threshold : float;
mutable extract_stream_decompress : bool}
2013-08-20 16:32:57 +02:00
let args =
{op = None;
preserve_objstm = true;
create_objstm = false;
out = NoOutputSpecified;
inputs = [];
chunksize = 1;
linearize = false;
keeplinearize = false;
rectangle = "0 0 0 0";
coord = "0 0";
2013-08-20 16:32:57 +02:00
duration = None;
transition = None;
horizontal = true;
inward = true;
direction = 0;
effect_duration = 1.;
font = StandardFont Pdftext.TimesRoman;
fontname = "Times-Roman";
fontsize = 12.;
2022-09-21 17:10:48 +02:00
fontencoding = Pdftext.WinAnsiEncoding;
2021-12-21 15:00:58 +01:00
color = Cpdfaddtext.RGB (0., 0., 0.);
2013-08-20 16:32:57 +02:00
opacity = 1.;
2023-04-07 16:31:21 +02:00
position = Cpdfposition.TopLeft (100., 100.);
2013-08-20 16:32:57 +02:00
underneath = false;
linespacing = 1.;
midline = false;
2015-01-20 16:50:36 +01:00
topline = false;
2021-12-21 15:00:58 +01:00
justification = Cpdfaddtext.LeftJustify;
2013-08-20 16:32:57 +02:00
bates = 0;
2015-07-17 17:34:47 +02:00
batespad = None;
2013-08-20 16:32:57 +02:00
prerotate = false;
relative_to_cropbox = false;
keepversion = false;
bycolumns = false;
pagerotation = 0;
crypt_method = "";
owner = "";
user = "";
no_edit = false;
no_print = false;
no_copy = false;
no_annot = false;
no_forms = false;
no_extract = false;
no_assemble = false;
no_hq_print = false;
debug = false;
2014-10-24 18:24:29 +02:00
debugcrypt = false;
2018-03-20 11:19:50 +01:00
debugforce = false;
2013-08-20 16:32:57 +02:00
boxes = false;
encrypt_metadata = true;
retain_numbering = false;
process_struct_trees = true;
2013-08-20 16:32:57 +02:00
remove_duplicate_fonts = false;
remove_duplicate_streams = false;
2024-01-24 15:21:11 +01:00
encoding = Cpdfmetadata.Stripped;
2013-08-20 16:32:57 +02:00
scale = 1.;
copyfontpage = 1;
copyfontname = None;
fast = false;
dashrange = "all";
outline = false;
linewidth = 1.0;
2024-02-14 20:30:36 +01:00
path_to_ghostscript = "";
2024-02-14 15:51:22 +01:00
path_to_im = "";
path_to_p2p = "";
2024-02-14 20:30:36 +01:00
path_to_convert = "";
path_to_jbig2enc = "";
2013-08-20 16:32:57 +02:00
frombox = None;
tobox = None;
mediabox_if_missing = false;
topage = None;
scale_stamp_to_fit = false;
2014-09-18 15:27:07 +02:00
labelstyle = Pdfpagelabels.DecimalArabic;
2013-10-24 16:21:54 +02:00
labelprefix = None;
2014-09-18 16:40:22 +02:00
labelstartval = 1;
labelsprogress = false;
squeeze = false;
squeeze_recompress = true;
squeeze_pagedata = true;
2014-10-02 20:57:06 +02:00
original_filename = "";
was_encrypted = false;
2014-10-15 14:48:39 +02:00
cpdflin = None;
recrypt = false;
2015-01-22 20:16:56 +01:00
was_decrypted_with_owner = false;
producer = None;
2015-04-13 15:17:48 +02:00
creator = None;
2022-09-26 21:38:16 +02:00
embedstd14 = None;
2017-12-18 20:44:02 +01:00
extract_text_font_size = None;
padwith = None;
alsosetxml = false;
justsetxml = false;
2019-07-03 15:40:32 +02:00
gs_malformed = false;
2019-07-22 15:00:37 +02:00
gs_quiet = false;
2019-07-03 15:40:32 +02:00
merge_add_bookmarks = false;
merge_add_bookmarks_use_titles = false;
createpdf_pages = 1;
2019-07-09 17:31:45 +02:00
createpdf_pagesize = Pdfpaper.a4;
2020-01-30 11:20:33 +01:00
removeonly = None;
2020-02-01 11:18:15 +01:00
jsonparsecontentstreams = false;
2020-02-27 14:32:45 +01:00
jsonnostreamdata = false;
2021-10-04 19:38:36 +02:00
jsondecompressstreams = false;
2021-12-30 16:25:24 +01:00
jsoncleanstrings = false;
2020-02-27 14:32:45 +01:00
ocgrenamefrom = "";
2020-12-20 16:41:52 +01:00
ocgrenameto = "";
dedup = false;
2021-10-16 16:47:41 +02:00
dedup_per_page = false;
2021-10-18 19:19:59 +02:00
collate = false;
impose_columns = false;
impose_rtl = false;
impose_btt = false;
impose_center = false;
impose_margin = 0.;
impose_spacing = 0.;
2021-10-26 18:32:36 +02:00
impose_linewidth = 0.;
2021-10-28 18:06:46 +02:00
format_json = false;
2021-10-29 16:09:21 +02:00
replace_dict_entry_value = Pdf.Null;
2021-12-02 00:50:04 +01:00
dict_entry_search = None;
2021-12-10 13:58:30 +01:00
toc_title = "Table of Contents";
2021-12-15 14:01:51 +01:00
toc_bookmark = true;
2022-01-02 16:18:55 +01:00
idir_only_pdfs = false;
2023-12-06 13:20:27 +01:00
no_warn_rotate = false;
jpegquality = 100.;
jpegqualitylossless = 101.;
2023-12-24 14:54:21 +01:00
onebppmethod = "";
pixel_threshold = 25;
length_threshold = 100;
percentage_threshold = 99.;
dpi_threshold = 0.;
resample_factor = 101.;
2024-01-12 16:00:28 +01:00
resample_interpolate = false;
2024-01-17 19:37:58 +01:00
jbig2_lossy_threshold = 0.85;
extract_stream_decompress = false}
2023-12-31 12:59:48 +01:00
(* Do not reset original_filename or cpdflin or was_encrypted or
was_decrypted_with_owner or recrypt or producer or creator or path_to_* or
gs_malformed or gs_quiet or no-warn-rotate, since we want these to work
across ANDs. Or squeeze options: a little odd, but we want it to happen on
eventual output. Or -debug-force (from v2.6). *)
2013-08-20 16:32:57 +02:00
let reset_arguments () =
args.op <- None;
args.preserve_objstm <- true;
args.create_objstm <- false;
args.out <- NoOutputSpecified;
args.inputs <- [];
args.chunksize <- 1;
args.linearize <- false;
args.keeplinearize <- false;
args.rectangle <- "0 0 0 0";
args.coord <- "0 0";
2013-08-20 16:32:57 +02:00
args.duration <- None;
args.transition <- None;
args.horizontal <- true;
args.inward <- true;
args.direction <- 0;
args.effect_duration <- 1.;
args.font <- StandardFont Pdftext.TimesRoman;
args.fontname <- "Times-Roman";
args.fontsize <- 12.;
2022-09-21 17:10:48 +02:00
args.fontencoding <- Pdftext.WinAnsiEncoding;
2021-12-21 15:00:58 +01:00
args.color <- Cpdfaddtext.RGB (0., 0., 0.);
2013-08-20 16:32:57 +02:00
args.opacity <- 1.;
2023-04-07 16:31:21 +02:00
args.position <- Cpdfposition.TopLeft (100., 100.);
2013-08-20 16:32:57 +02:00
args.underneath <- false;
args.linespacing <- 1.;
args.midline <- false;
2015-01-20 16:50:36 +01:00
args.topline <- false;
2021-12-21 15:00:58 +01:00
args.justification <- Cpdfaddtext.LeftJustify;
2013-08-20 16:32:57 +02:00
args.bates <- 0;
2015-07-17 17:34:47 +02:00
args.batespad <- None;
2013-08-20 16:32:57 +02:00
args.prerotate <- false;
args.relative_to_cropbox <- false;
args.keepversion <- false;
args.bycolumns <- false;
args.pagerotation <- 0;
args.crypt_method <- "";
args.owner <- "";
args.user <- "";
args.no_edit <- false;
args.no_print <- false;
args.no_copy <- false;
args.no_annot <- false;
args.no_forms <- false;
args.no_extract <- false;
args.no_assemble <- false;
args.no_hq_print <- false;
args.debug <- false;
2014-10-24 18:24:29 +02:00
args.debugcrypt <- false;
2013-08-20 16:32:57 +02:00
args.boxes <- false;
args.encrypt_metadata <- true;
args.retain_numbering <- false;
args.process_struct_trees <- true;
2013-08-20 16:32:57 +02:00
args.remove_duplicate_fonts <- false;
args.remove_duplicate_streams <- false;
2024-01-24 15:21:11 +01:00
args.encoding <- Cpdfmetadata.Stripped;
2013-08-20 16:32:57 +02:00
args.scale <- 1.;
args.copyfontpage <- 1;
args.copyfontname <- None;
args.fast <- false;
args.dashrange <- "all";
args.outline <- false;
args.linewidth <- 1.0;
args.frombox <- None;
args.tobox <- None;
args.mediabox_if_missing <- false;
args.topage <- None;
args.scale_stamp_to_fit <- false;
2014-09-18 15:27:07 +02:00
args.labelstyle <- Pdfpagelabels.DecimalArabic;
2013-10-24 16:21:54 +02:00
args.labelprefix <- None;
2014-09-18 16:40:22 +02:00
args.labelstartval <- 1;
args.labelsprogress <- false;
2022-09-26 21:38:16 +02:00
args.embedstd14 <- None;
2017-12-18 20:44:02 +01:00
args.extract_text_font_size <- None;
args.padwith <- None;
args.alsosetxml <- false;
2019-07-03 15:40:32 +02:00
args.justsetxml <- false;
args.merge_add_bookmarks <- false;
args.merge_add_bookmarks_use_titles <- false;
args.createpdf_pages <- 1;
2019-07-09 17:31:45 +02:00
args.createpdf_pagesize <- Pdfpaper.a4;
2020-01-30 11:20:33 +01:00
args.removeonly <- None;
2020-02-01 11:18:15 +01:00
args.jsonparsecontentstreams <- false;
2020-02-27 14:32:45 +01:00
args.jsonnostreamdata <- false;
2021-10-04 19:38:36 +02:00
args.jsondecompressstreams <- false;
2021-12-30 16:25:24 +01:00
args.jsoncleanstrings <- false;
2020-02-27 14:32:45 +01:00
args.ocgrenamefrom <- "";
2020-12-20 16:41:52 +01:00
args.ocgrenameto <- "";
args.dedup <- false;
2021-10-16 16:47:41 +02:00
args.dedup_per_page <- false;
2021-10-18 19:19:59 +02:00
args.collate <- false;
args.impose_columns <- false;
args.impose_rtl <- false;
args.impose_btt <- false;
args.impose_center <- false;
args.impose_margin <- 0.;
args.impose_spacing <- 0.;
2021-10-26 18:32:36 +02:00
args.impose_linewidth <- 0.;
2021-10-28 18:06:46 +02:00
args.format_json <- false;
2021-10-29 16:09:21 +02:00
args.replace_dict_entry_value <- Pdf.Null;
2021-12-02 00:50:04 +01:00
args.dict_entry_search <- None;
2021-12-10 13:58:30 +01:00
args.toc_title <- "Table of Contents";
2021-12-15 14:01:51 +01:00
args.toc_bookmark <- true;
2023-05-04 14:53:49 +02:00
args.idir_only_pdfs <- false;
args.jpegquality <- 100.;
args.jpegqualitylossless <- 101.;
2023-12-22 17:45:53 +01:00
args.onebppmethod <- "";
args.pixel_threshold <- 25;
args.length_threshold <- 100;
args.percentage_threshold <- 99.;
args.dpi_threshold <- 0.;
args.resample_factor <- 101.;
2023-12-31 12:59:48 +01:00
args.resample_interpolate <- false;
2024-01-12 16:00:28 +01:00
args.jbig2_lossy_threshold <- 0.85;
2024-01-17 19:37:58 +01:00
args.extract_stream_decompress <- false;
clear Cpdfdrawcontrol.fontpack_initialised
2021-12-18 17:14:31 +01:00
(* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume
installed at a system place *)
let find_cpdflin provided =
match provided with
Some x -> x
| None ->
let dotslash = match Sys.os_type with "Win32" -> "" | _ -> "./" in
if Sys.file_exists "cpdflin" then (dotslash ^ "cpdflin") else
if Sys.file_exists "cpdflin.exe" then (dotslash ^ "cpdflin.exe") else
match Sys.os_type with
"Win32" -> "cpdflin.exe"
| _ -> "cpdflin"
(* Call cpdflin, given the (temp) input name, the output name, and the location
of the cpdflin binary. Returns the exit code. *)
let call_cpdflin cpdflin temp output best_password =
let command =
2023-02-21 15:50:07 +01:00
Filename.quote_command cpdflin
["--linearize"; ("--password=" ^ best_password); temp; output]
2021-12-18 17:14:31 +01:00
in
match Sys.os_type with
"Win32" ->
(* On windows, don't use LD_LIBRARY_PATH - it will happen automatically *)
2023-04-25 14:45:56 +02:00
if args.debug then Pdfe.log (command ^ "\n");
2021-12-18 17:14:31 +01:00
Sys.command command
| _ ->
(* On other platforms, if -cpdflin was provided, or cpdflin was in the
current folder, set up LD_LIBRARY_PATH: *)
match cpdflin with
"cpdflin" ->
2023-04-25 14:45:56 +02:00
if args.debug then Pdfe.log (command ^ "\n");
2021-12-18 17:14:31 +01:00
Sys.command command
| _ ->
let command =
2023-02-21 15:50:07 +01:00
"DYLD_FALLBACK_LIBRARY_PATH=" ^ Filename.quote (Filename.dirname cpdflin) ^ " " ^
"LD_LIBRARY_PATH=" ^ Filename.quote (Filename.dirname cpdflin) ^ " " ^
2021-12-18 17:14:31 +01:00
command
in
2023-04-25 14:45:56 +02:00
if args.debug then Pdfe.log (command ^ "\n");
2021-12-18 17:14:31 +01:00
Sys.command command
2015-07-26 13:31:43 +02:00
let get_pagespec () =
match args.inputs with
| (_, ps, _, _, _, _)::_ -> ps
| _ -> error "No range specified for input, or specified too late."
let string_of_permission = function
| Pdfcrypt.NoEdit -> "No edit"
| Pdfcrypt.NoPrint -> "No print"
| Pdfcrypt.NoCopy -> "No copy"
| Pdfcrypt.NoAnnot -> "No annotate"
| Pdfcrypt.NoForms -> "No edit forms"
| Pdfcrypt.NoExtract -> "No extract"
| Pdfcrypt.NoAssemble -> "No assemble"
| Pdfcrypt.NoHqPrint -> "No high-quality print"
let getpermissions pdf =
fold_left
(fun x y -> if x = "" then x ^ y else x ^ ", " ^ y)
""
(map string_of_permission (Pdfread.permissions pdf))
2013-08-20 16:32:57 +02:00
let banlist_of_args () =
let l = ref [] in
if args.no_edit then l =| Pdfcrypt.NoEdit;
if args.no_print then l =| Pdfcrypt.NoPrint;
if args.no_copy then l =| Pdfcrypt.NoCopy;
if args.no_annot then l =| Pdfcrypt.NoAnnot;
if args.no_forms then l =| Pdfcrypt.NoForms;
if args.no_extract then l =| Pdfcrypt.NoExtract;
if args.no_assemble then l =| Pdfcrypt.NoAssemble;
if args.no_hq_print then l =| Pdfcrypt.NoHqPrint;
!l
(* If a file is encrypted, decrypt it using the owner password or, if not
present, the user password. If the user password is used, the operation to be
performed is checked to see if it's allowable under the permissions regime. *)
(* The bans. Each function has a list of bans. If any of these is present in the
bans list in the input file, the operation cannot proceed. Other operations
cannot proceed at all without owner password. *)
let banned banlist = function
2023-02-15 22:28:44 +01:00
| Fonts | Info | Metadata | PageInfo | CountPages
2021-10-27 19:55:52 +02:00
| ListAttachedFiles | ListAnnotations
2023-11-09 12:25:19 +01:00
| ListBookmarks | ImageResolution _ | ListImages | MissingFonts
| PrintPageLabels | Clean | Compress | Decompress
2023-04-12 20:37:30 +02:00
| ChangeId | CopyId _ | ListSpotColours | Version
2019-07-15 12:52:14 +02:00
| DumpAttachedFiles | RemoveMetadata | EmbedMissingFonts | BookmarksOpenToLevel _ | CreatePDF
2023-04-17 20:55:11 +02:00
| SetPageMode _ | SetNonFullScreenPageMode _ | HideToolbar _ | HideMenubar _ | HideWindowUI _
2019-08-01 15:34:45 +02:00
| FitWindow _ | CenterWindow _ | DisplayDocTitle _
2023-04-18 15:42:17 +02:00
| RemoveId | OpenAtPageFit _ | OpenAtPage _ | OpenAtPageCustom _ | SetPageLayout _
2019-08-18 12:25:55 +02:00
| ShowBoxes | TrimMarks | CreateMetadata | SetMetadataDate _ | SetVersion _
| SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _
2021-10-29 16:09:21 +02:00
| SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | ReplaceDictEntry _ | PrintDictEntry _ | SetMetadata _
2023-11-02 19:49:15 +01:00
| ExtractText | ExtractImages | ExtractFontFile _
2020-02-26 17:24:27 +01:00
| AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce
2023-03-03 17:02:16 +01:00
| OCGRename | OCGList | OCGOrderAll | PrintFontEncoding _ | TableOfContents | Typeset _ | Composition _
2024-01-17 19:37:58 +01:00
| TextWidth _ | SetAnnotations _ | CopyAnnotations _ | ExtractStream _ | PrintObj _
2019-08-18 12:25:55 +02:00
-> false (* Always allowed *)
(* Combine pages is not allowed because we would not know where to get the
-recrypt from -- the first or second file? *)
| Decrypt | Encrypt | CombinePages _ -> true (* Never allowed *)
2019-07-01 16:35:17 +02:00
| AddBookmarks _ | PadBefore | PadAfter | PadEvery _ | PadMultiple _ | PadMultipleBefore _
2023-10-30 17:36:41 +01:00
| Merge | Split | SplitOnBookmarks _ | SplitMax _ | Spray | RotateContents _ | Rotate _
2024-02-05 15:01:16 +01:00
| Rotateby _ | Upright | VFlip | HFlip | Impose _ | Chop _ | ChopHV _ ->
2019-08-01 15:34:45 +02:00
mem Pdfcrypt.NoAssemble banlist
2021-10-02 14:21:06 +02:00
| TwoUp|TwoUpStack|RemoveBookmarks|AddRectangle|RemoveText|
2024-01-22 17:36:37 +01:00
Draft|Shift|ShiftBoxes | Scale|ScaleToFit|RemoveAttachedFiles|
RemoveAnnotations|RemoveFonts|Crop|RemoveCrop|Trim|RemoveTrim|Bleed|RemoveBleed|Art|RemoveArt|
2023-05-29 14:05:37 +02:00
CopyBox|MediaBox|HardBox _|SetTrapped|SetUntrapped|Presentation|
2021-10-02 14:21:06 +02:00
BlackText|BlackLines|BlackFills|CopyFont _|StampOn _|StampUnder _|StampAsXObject _|
2023-05-29 14:05:37 +02:00
AddText _|ScaleContents _|AttachFile _| ThinLines _ | RemoveClipping | RemoveAllText
2023-12-06 13:20:27 +01:00
| Prepend _ | Postpend _ | Draw | ProcessImages ->
2016-11-03 18:11:08 +01:00
mem Pdfcrypt.NoEdit banlist
2013-08-20 16:32:57 +02:00
let operation_allowed pdf banlist op =
2018-03-20 11:19:50 +01:00
args.debugforce ||
2014-10-15 18:51:15 +02:00
match op with
| None ->
2014-10-28 19:40:56 +01:00
if args.debugcrypt then Printf.printf "operation is None, so allowed!\n";
true (* Merge *) (* changed to allow it *)
| Some op ->
if args.debugcrypt then Printf.printf "operation_allowed: op = %s\n" (string_of_op op);
if args.debugcrypt then Printf.printf "Permissions: %s\n" (getpermissions pdf);
not (banned banlist op)
2013-08-20 16:32:57 +02:00
let rec decrypt_if_necessary (_, _, user_pw, owner_pw, was_dec_with_owner, _) op pdf =
2014-10-24 18:24:29 +02:00
if args.debugcrypt then
begin match op with
None -> flprint "decrypt_if_necessary: op = None\n"
| Some x -> Printf.printf "decrypt_if_necessary: op = %s\n" (string_of_op x)
end;
2013-08-20 16:32:57 +02:00
if not (Pdfcrypt.is_encrypted pdf) then pdf else
2014-11-17 15:48:56 +01:00
match op with Some (CombinePages _) ->
(* This is a hack because we don't have support for recryption on combine
2023-03-03 17:02:16 +01:00
* pages. This is prevented by permissions above, but in the case that the
2014-11-17 15:48:56 +01:00
* owner password is blank (e.g christmas_tree_lights.pdf), we would end
* up here. *)
2021-10-12 19:58:37 +02:00
soft_error "Combine pages: both files must be unencrypted for this operation, or add -decrypt-force"
2014-11-17 15:48:56 +01:00
| _ ->
match Pdfcrypt.decrypt_pdf_owner owner_pw pdf with
| Some pdf ->
args.was_decrypted_with_owner <- true;
was_dec_with_owner := true;
if args.debugcrypt then Printf.printf "Managed to decrypt with owner password\n";
pdf
2013-08-20 16:32:57 +02:00
| _ ->
2014-11-17 15:48:56 +01:00
if args.debugcrypt then Printf.printf "Couldn't decrypt with owner password %s\n" owner_pw;
2019-10-26 15:10:03 +02:00
match
if args.debugcrypt then Printf.printf "call decrypt_pdf user\n";
let r = Pdfcrypt.decrypt_pdf user_pw pdf in
if args.debugcrypt then Printf.printf "returned from decrypt_pdf\n";
r
with
2014-11-17 15:48:56 +01:00
| Some pdf, permissions ->
if args.debugcrypt then Printf.printf "Managed to decrypt with user password\n";
if operation_allowed pdf permissions op
then pdf
2021-10-12 19:58:37 +02:00
else soft_error "User password cannot give permission for this operation. Supply owner or add -decrypt-force."
2014-11-17 15:48:56 +01:00
| _ ->
2019-10-26 15:10:03 +02:00
if args.debugcrypt then Printf.printf "Failed to decrypt with user password: raising soft_error";
2014-11-17 15:48:56 +01:00
soft_error "Failed to decrypt file: wrong password?"
2013-08-20 16:32:57 +02:00
(* Output Page Count *)
let output_page_count pdf =
2024-01-13 18:30:06 +01:00
Printf.printf "%i\n" ((if args.fast then Pdfpage.endpage_fast else Pdfpage.endpage) pdf)
2013-08-20 16:32:57 +02:00
2019-07-08 13:44:27 +02:00
let detect_duplicate_op op =
2019-09-26 14:30:22 +02:00
match args.op with
None | Some Shift -> ()
| _ ->
2023-04-25 14:45:56 +02:00
Pdfe.log (Printf.sprintf "Operation %s already specified, so cannot specify operation %s.\nUse AND from Chapter 1 of the manual to chain commands together.\n"
(string_of_op (unopt args.op)) (string_of_op op));
2019-07-08 13:44:27 +02:00
exit 1
2013-08-20 16:32:57 +02:00
let setop op () =
2019-07-08 13:44:27 +02:00
detect_duplicate_op op;
2013-08-20 16:32:57 +02:00
args.op <- Some op
let setout name =
2023-10-30 17:36:41 +01:00
args.out <- File name;
spray_outputs := name::!spray_outputs
2013-08-20 16:32:57 +02:00
let setchunk c =
if c > 0
then args.chunksize <- c
else error "invalid chunk size"
let fixdashes s =
let bufferdashes chars =
let buf = ref [] in
iter
(function '-' -> buf =@ [' '; '-'; ' '] | x -> buf =| x)
chars;
rev !buf
in
let chars = explode s in
implode (bufferdashes chars)
2024-01-16 15:54:45 +01:00
let set_input_image f s =
try
let fh = open_in_bin s in
let pdf = Cpdfimage.image_of_input f (Pdfio.input_of_channel fh) in
begin try close_in fh with _ -> () end;
args.original_filename <- s;
args.create_objstm <- true;
args.inputs <- (AlreadyInMemory (pdf, s), "all", "", "", ref false, None)::args.inputs
with
Sys_error _ -> error "Image file not found"
let jbig2_global = ref None
let set_input_png s = set_input_image (fun () -> Cpdfimage.obj_of_png_data) s
let set_input_jpeg s = set_input_image (fun () -> Cpdfimage.obj_of_jpeg_data) s
let set_input_jbig2 s =
set_input_image
(fun () -> Cpdfimage.obj_of_jbig2_data ?global:!jbig2_global) s;
args.remove_duplicate_streams <- true
2024-01-23 19:37:35 +01:00
let encrypt_to_collect = ref 0
let setmethod s =
detect_duplicate_op Encrypt;
if args.op = None then args.op <- Some Encrypt; (* Could be additional to -split *)
match s with
| "40bit" | "128bit" | "AES" | "AES256" | "AES256ISO" -> args.crypt_method <- s
| _ -> error ("Unsupported encryption method " ^ s)
2024-01-16 15:54:45 +01:00
2013-08-20 16:32:57 +02:00
let anon_fun s =
try
match !encrypt_to_collect with
| 3 -> setmethod s; decr encrypt_to_collect
2024-01-23 19:37:35 +01:00
| 2 -> args.owner <- s; decr encrypt_to_collect
| 1 -> args.user <- s; decr encrypt_to_collect
2013-08-20 16:32:57 +02:00
| 0 ->
let before, after = cleavewhile (neq '=') (explode s) in
begin match implode before with
| "user" ->
begin match args.inputs with
| [] -> ()
| (a, b, _, e, f, g)::more ->
args.inputs <- (a, b, implode (tl after), e, f, g)::more
2013-08-20 16:32:57 +02:00
end
| "owner" ->
begin match args.inputs with
| [] -> ()
| (a, b, d, _, f, g)::more ->
args.inputs <- (a, b, d, implode (tl after), f, g)::more
2013-08-20 16:32:57 +02:00
end
| _ -> raise Not_found
end
| _ -> assert false
with
Not_found ->
try
ignore (String.index s '.');
2024-01-16 15:54:45 +01:00
begin match rev (explode s) with
| a::b::c::d::e::'.'::r when implode (map Char.uppercase_ascii [e; d; c; b; a]) = "JBIG2" -> set_input_jbig2 s
| a::b::c::d::'.'::r when implode (map Char.uppercase_ascii [d; c; b; a]) = "JPEG" -> set_input_jpeg s
| a::b::c::'.'::r when implode (map Char.uppercase_ascii [c; b; a]) = "JPG" -> set_input_jpeg s
| a::b::c::'.'::r when implode (map Char.uppercase_ascii [c; b; a]) = "PNG" -> set_input_png s
| _ -> args.inputs <- (InFile s, "all", "", "", ref false, None)::args.inputs
end;
args.original_filename <- s
2013-08-20 16:32:57 +02:00
with
Not_found ->
match args.inputs with
2021-12-15 14:51:26 +01:00
| [] ->
2023-05-12 16:33:28 +02:00
Pdfe.log (Printf.sprintf "Warning: '%s' ignored\n" s)
| (a, _, d, e, f, g)::t ->
args.inputs <- (a, fixdashes s, d, e, f, g)::t
2013-08-20 16:32:57 +02:00
(* If a password begins with a dash, we allow -pw=<password> too *)
let setdashpassword = anon_fun
2013-08-20 16:32:57 +02:00
(* Setting operations *)
let setcrop s =
setop Crop ();
args.rectangle <- s
2013-08-20 16:32:57 +02:00
let settrim s =
setop Trim ();
args.rectangle <- s
let setbleed s =
setop Bleed ();
args.rectangle <- s
let setart s =
setop Art ();
args.rectangle <- s
2013-08-20 16:32:57 +02:00
let setmediabox s =
setop MediaBox ();
args.rectangle <- s
2013-08-20 16:32:57 +02:00
let setrectangle s =
setop AddRectangle ();
args.coord <- s
2013-08-20 16:32:57 +02:00
let setrotate i =
if i = 0 || i = 90 || i = 180 || i = 270
then setop (Rotate i) ()
else error "bad rotation"
let setrotateby i =
if i = 0 || i = 90 || i = 180 || i = 270
then setop (Rotateby i) ()
else error "bad rotation"
let hidetoolbar b =
try setop (HideToolbar (bool_of_string b)) () with
_ -> failwith "HideToolBar: must use true or false"
let hidemenubar b =
try setop (HideMenubar (bool_of_string b)) () with
_ -> failwith "HideMenuBar: must use true or false"
let hidewindowui b =
try setop (HideWindowUI (bool_of_string b)) () with
_ -> failwith "HideWindowUI: must use true or false"
let fitwindow b =
try setop (FitWindow (bool_of_string b)) () with
_ -> failwith "FitWindow: must use true or false"
let centerwindow b =
try setop (CenterWindow (bool_of_string b)) () with
_ -> failwith "CenterWindow: must use true or false"
let displaydoctitle b =
try setop (DisplayDocTitle (bool_of_string b)) () with
_ -> failwith "DisplayDocTitle: must use true or false"
2023-10-25 19:44:29 +02:00
let read_file_size s =
let read_int s = int_of_string (implode (rev s)) in
2023-10-26 14:46:51 +02:00
match rev (explode (String.uppercase_ascii s)) with
2023-10-26 18:33:10 +02:00
| 'B'::'I'::'G'::s -> 1024 * 1024 * 1024 * read_int s
| 'B'::'G'::s -> 1000 * 1000 * 1000 * read_int s
| 'B'::'I'::'M'::s -> 1024 * 1024 * read_int s
| 'B'::'M'::s -> 1000 * 1000 * read_int s
| 'B'::'I'::'K'::s -> 1024 * read_int s
| 'B'::'K'::s -> 1000 * read_int s
2023-10-25 19:44:29 +02:00
| s -> read_int s
let setsplitmax i = setop (SplitMax (read_file_size i)) ()
2013-08-20 16:32:57 +02:00
let setstdout () = args.out <- Stdout
let setstdin () = args.inputs <- [StdIn, "all", "", "", ref false, None]
2013-08-20 16:32:57 +02:00
let settrans s = args.transition <- Some s
let setduration f = args.duration <- Some f
let setvertical () = args.horizontal <- false
let setoutward () = args.inward <- false
let setdirection i =
args.direction <-
match i with
| 0 | 90 | 180 | 270 | 315 -> i
| _ -> error "Bad direction"
let seteffectduration f = args.effect_duration <- f
let setcopyid s = setop (CopyId s) ()
let setthinlines s = setop (ThinLines (Cpdfcoord.parse_single_number empty s)) ()
2013-08-20 16:32:57 +02:00
let setcopyannotations s = setop (CopyAnnotations s) ()
2023-01-13 07:30:46 +01:00
let setsetannotations s = setop (SetAnnotations s) ()
2013-08-20 16:32:57 +02:00
let setshift s =
setop Shift ();
args.coord <- s
2013-08-20 16:32:57 +02:00
2024-01-22 17:36:37 +01:00
let setshiftboxes s =
setop ShiftBoxes ();
args.coord <- s
2013-08-20 16:32:57 +02:00
let setscale s =
setop Scale ();
args.coord <- s
2013-08-20 16:32:57 +02:00
let setscaletofit s =
setop ScaleToFit ();
args.coord <- s
2013-08-20 16:32:57 +02:00
let setattachfile s =
match args.op with
| Some (AttachFile t) ->
args.op <- Some (AttachFile (s::t))
2019-07-08 13:44:27 +02:00
| None ->
2013-08-20 16:32:57 +02:00
setop (AttachFile [s]) ()
2019-07-08 13:44:27 +02:00
| Some _ -> detect_duplicate_op (AttachFile [s])
2013-08-20 16:32:57 +02:00
2016-11-13 15:02:09 +01:00
let setextracttextfontsize f =
args.extract_text_font_size <- Some f
2013-08-20 16:32:57 +02:00
let setfontsize f =
2016-11-13 15:02:09 +01:00
if f > 0. then args.fontsize <- f else error "Negative font size specified"
2013-08-20 16:32:57 +02:00
let setaddtext s =
setop (AddText s) ()
let setcolor s =
args.color <- Cpdfdrawcontrol.parse_color s
2013-08-20 16:32:57 +02:00
let setopacity o =
args.opacity <- o
let setaddbookmarks s =
setop (AddBookmarks s) ()
2021-10-26 18:32:36 +02:00
let setaddbookmarksjson s =
setop (AddBookmarks s) ();
2021-10-27 19:55:52 +02:00
args.format_json <- true
2021-10-26 18:32:36 +02:00
2023-10-31 16:50:23 +01:00
let setlistfontsjson () =
setop Fonts ();
args.format_json <- true
let setinfojson () =
setop Info ();
args.format_json <- true
let setpageinfojson () =
setop PageInfo ();
args.format_json <- true
let setprintpagelabelsjson () =
setop PrintPageLabels ();
args.format_json <- true
2021-10-27 19:55:52 +02:00
let setlistbookmarksjson () =
2021-10-26 18:32:36 +02:00
setop ListBookmarks ();
2021-10-27 19:55:52 +02:00
args.format_json <- true
let setlistannotationsjson () =
2021-10-28 16:34:03 +02:00
setop ListAnnotations ();
2021-10-27 19:55:52 +02:00
args.format_json <- true
2021-10-26 18:32:36 +02:00
2013-08-20 16:32:57 +02:00
let setstampon f =
setop (StampOn f) ();
(* Due to an earlier bad decision (default position), we have this nasty hack *)
2023-04-07 16:31:21 +02:00
if args.position = Cpdfposition.TopLeft (100., 100.) then args.position <- Cpdfposition.BottomLeft (0., 0.)
2013-08-20 16:32:57 +02:00
let setstampunder f =
setop (StampUnder f) ();
2023-04-07 16:31:21 +02:00
if args.position = Cpdfposition.TopLeft (100., 100.) then args.position <- Cpdfposition.BottomLeft (0., 0.)
2013-08-20 16:32:57 +02:00
let setstampasxobject f =
setop (StampAsXObject f) ()
2013-08-20 16:32:57 +02:00
let setcombinepages f =
setop (CombinePages f) ()
let setposcenter s =
let x, y = Cpdfcoord.parse_coordinate empty s in
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.PosCentre (x, y)
2013-08-20 16:32:57 +02:00
let setposleft s =
let x, y = Cpdfcoord.parse_coordinate empty s in
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.PosLeft (x, y)
2013-08-20 16:32:57 +02:00
let setposright s =
let x, y = Cpdfcoord.parse_coordinate empty s in
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.PosRight (x, y)
2013-08-20 16:32:57 +02:00
let settop n =
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.Top (Cpdfcoord.parse_single_number empty n);
2021-12-21 15:00:58 +01:00
args.justification <- Cpdfaddtext.CentreJustify
2013-08-20 16:32:57 +02:00
let settopleft n =
2023-04-07 16:31:21 +02:00
let coord =
match Cpdfcoord.parse_coordinate empty n with
| (a, b) -> Cpdfposition.TopLeft (a, b)
2023-06-07 22:52:01 +02:00
| exception _ ->
let x = Cpdfcoord.parse_single_number empty n in
Cpdfposition.TopLeft (x, x)
2023-04-07 16:31:21 +02:00
in
args.position <- coord;
args.justification <- Cpdfaddtext.LeftJustify
2013-08-20 16:32:57 +02:00
let settopright n =
2023-04-07 16:31:21 +02:00
let coord =
match Cpdfcoord.parse_coordinate empty n with
| (a, b) -> Cpdfposition.TopRight (a, b)
2023-06-07 22:52:01 +02:00
| exception _ ->
let x = Cpdfcoord.parse_single_number empty n in
Cpdfposition.TopRight (x, x)
2023-04-07 16:31:21 +02:00
in
args.position <- coord;
args.justification <- Cpdfaddtext.RightJustify
2013-08-20 16:32:57 +02:00
let setleft n =
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.Left (Cpdfcoord.parse_single_number empty n);
2021-12-21 15:00:58 +01:00
args.justification <- Cpdfaddtext.LeftJustify
2013-08-20 16:32:57 +02:00
let setbottomleft n =
2023-04-07 16:31:21 +02:00
let coord =
match Cpdfcoord.parse_coordinate empty n with
| (a, b) -> Cpdfposition.BottomLeft (a, b)
2023-06-07 22:52:01 +02:00
| exception _ ->
let x = Cpdfcoord.parse_single_number empty n in
Cpdfposition.BottomLeft (x, x)
2023-04-07 16:31:21 +02:00
in
args.position <- coord;
args.justification <- Cpdfaddtext.LeftJustify
2013-08-20 16:32:57 +02:00
let setbottom n =
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.Bottom (Cpdfcoord.parse_single_number empty n);
2021-12-21 15:00:58 +01:00
args.justification <- Cpdfaddtext.CentreJustify
2013-08-20 16:32:57 +02:00
let setbottomright n =
2023-04-07 16:31:21 +02:00
let coord =
match Cpdfcoord.parse_coordinate empty n with
| (a, b) -> Cpdfposition.BottomRight (a, b)
2023-06-07 22:52:01 +02:00
| exception _ ->
let x = Cpdfcoord.parse_single_number empty n in
Cpdfposition.BottomRight (x, x)
2023-04-07 16:31:21 +02:00
in
args.position <- coord;
args.justification <- Cpdfaddtext.RightJustify
2013-08-20 16:32:57 +02:00
let setright n =
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.Right (Cpdfcoord.parse_single_number empty n);
2021-12-21 15:00:58 +01:00
args.justification <- Cpdfaddtext.RightJustify
2013-08-20 16:32:57 +02:00
let setdiagonal n =
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.Diagonal;
2021-12-21 15:00:58 +01:00
args.justification <- Cpdfaddtext.CentreJustify
2013-08-20 16:32:57 +02:00
let setreversediagonal n =
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.ReverseDiagonal;
2021-12-21 15:00:58 +01:00
args.justification <- Cpdfaddtext.CentreJustify
2013-08-20 16:32:57 +02:00
let setcenter n =
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.Centre;
2021-12-21 15:00:58 +01:00
args.justification <- Cpdfaddtext.CentreJustify
2015-07-26 13:31:43 +02:00
(* Calculate -bates automatically so that n is applied to the first page in the range *)
let setbatesrange n =
let first_page =
let range = Cpdfpagespec.parse_pagespec_without_pdf (get_pagespec ()) in
2015-07-26 13:31:43 +02:00
fold_left min max_int range
in
args.bates <- n + 1 - first_page
2013-08-20 16:32:57 +02:00
let setpagerotation r =
match r with
| 90 | 270 -> args.pagerotation <- r
| _ -> error "Bad Page rotation. Try 90 or 270."
let set_input s =
args.original_filename <- s;
args.inputs <- (InFile s, "all", "", "", ref false, None)::args.inputs
2013-08-20 16:32:57 +02:00
2021-10-01 13:16:55 +02:00
let set_json_input s =
args.original_filename <- s;
2022-01-08 17:20:26 +01:00
args.create_objstm <- true;
2021-10-01 13:16:55 +02:00
let fh = open_in_bin s in
2021-10-01 23:05:43 +02:00
let pdf = Cpdfjson.of_input (Pdfio.input_of_channel fh) in
2021-10-01 13:16:55 +02:00
close_in fh;
2023-12-05 13:20:03 +01:00
args.inputs <- (AlreadyInMemory (pdf, s), "all", "", "", ref false, None)::args.inputs
2021-10-01 13:16:55 +02:00
2013-08-20 16:32:57 +02:00
let set_input_dir s =
let names = sort compare (leafnames_of_dir s) in
2021-12-15 14:01:51 +01:00
let names =
if args.idir_only_pdfs then
option_map
(fun x ->
if String.length x > 4 && String.lowercase_ascii (String.sub x (String.length x - 4) 4) = ".pdf"
then Some x else None)
names
else
names
in
2013-08-20 16:32:57 +02:00
args.inputs <-
(rev
(map
(fun n -> (InFile (s ^ Filename.dir_sep ^ n), "all", "", "", ref false, None)) names))
@ args.inputs
2013-08-20 16:32:57 +02:00
let setdebug () =
set Pdfread.read_debug;
set Pdfwrite.write_debug;
set Pdfcrypt.crypt_debug;
2017-07-04 15:37:28 +02:00
set Pdfops.debug;
2013-08-20 16:32:57 +02:00
args.debug <- true
let setboxes () =
args.boxes <- true
let set_no_encrypt_metadata () =
args.encrypt_metadata <- false
let set_retain_numbering () =
args.retain_numbering <- true
let set_remove_duplicate_fonts () =
args.remove_duplicate_fonts <- true
let setencoding enc () =
args.encoding <- enc
let setscaletofitscale f =
args.scale <- f
let setscalecontents f =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (ScaleContents f);
2013-08-20 16:32:57 +02:00
args.op <- Some (ScaleContents f);
2021-08-12 21:38:55 +02:00
args.position <- Cpdfposition.Diagonal (* Will be center *)
2013-08-20 16:32:57 +02:00
2014-09-11 15:05:13 +02:00
let setsqueeze () =
2014-09-20 21:16:08 +02:00
args.squeeze <- true;
args.create_objstm <- true
2014-09-11 15:05:13 +02:00
let setcreatoraswego s =
2015-01-22 20:16:56 +01:00
args.creator <- Some s
let setproduceraswego s =
2015-01-22 20:16:56 +01:00
args.producer <- Some s
let setprepend s =
args.op <- Some (Prepend s)
let setpostpend s =
args.op <- Some (Postpend s)
2013-08-20 16:32:57 +02:00
(* Parsing the control file *)
let rec getuntilendquote prev = function
| [] -> implode (rev prev), []
| '"'::t -> implode (rev prev), t
| '\\'::'"'::t -> getuntilendquote ('"'::prev) t
| h::t -> getuntilendquote (h::prev) t
let rec getarg prev = function
| [] -> implode (rev prev), []
| h::t ->
if Pdf.is_whitespace h
then implode (rev prev), t
else getarg (h::prev) t
let rec parse_chars args = function
| [] -> rev args
| h::more when Pdf.is_whitespace h ->
parse_chars args more
| '"'::more ->
let this, rest = getuntilendquote [] more in
parse_chars (this::args) rest
| h::t ->
let this, rest = getarg [] (h::t) in
parse_chars (this::args) rest
let control_args = ref []
let parse_control_file name =
(parse_chars []
(charlist_of_bytes (Pdfio.bytes_of_input_channel (open_in_bin name))))
let setencryptcollect () =
encrypt_to_collect := 3
let setcopyfont s =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (CopyFont s);
2013-08-20 16:32:57 +02:00
args.op <- Some (CopyFont s)
let setfontpage i =
args.copyfontpage <- i
let setcopyfontname s =
args.copyfontname <- Some s
let setpadevery i =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (PadEvery i);
if i > 0 then
args.op <- Some (PadEvery i)
else
error "PadEvery: must be > 0"
2013-08-20 16:32:57 +02:00
2017-12-18 20:44:02 +01:00
let setpadwith filename =
args.padwith <- Some filename
2013-08-20 16:32:57 +02:00
let setpadmultiple i =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (PadMultiple i);
2013-08-20 16:32:57 +02:00
args.op <- Some (PadMultiple i)
2019-07-01 16:35:17 +02:00
let setpadmultiplebefore i =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (PadMultipleBefore i);
2019-07-01 16:35:17 +02:00
args.op <- Some (PadMultipleBefore i)
2013-08-20 16:32:57 +02:00
let setfast () =
args.fast <- true
(* Explicitly add a range. Parse it and replace the top input file with the range. *)
let setrange spec =
2014-10-03 15:18:28 +02:00
args.dashrange <- spec;
match args.inputs with
(x, _, c, d, e, f)::more ->
args.inputs <- (x, spec, c, d, e, f) :: more
2014-10-03 15:18:28 +02:00
| x -> ()
2013-08-20 16:32:57 +02:00
let setrevision n =
match args.inputs with
(a, b, c, d, e, _)::more ->
args.inputs <- (a, b, c, d, e, Some n) :: more
2014-12-22 14:20:20 +01:00
| [] ->
2023-04-25 14:45:56 +02:00
Pdfe.log "Warning. -revision ignored. Put it after the filename.\n"
2013-08-20 16:32:57 +02:00
let setimageresolution f =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (ImageResolution f);
2013-08-20 16:32:57 +02:00
args.op <- Some (ImageResolution f)
2020-12-11 15:13:24 +01:00
let setimpath p =
args.path_to_im <- p
2023-12-06 13:20:27 +01:00
let setconvertpath p =
args.path_to_convert <- p
2023-12-22 20:33:10 +01:00
let setjbig2encpath p =
args.path_to_jbig2enc <- p
2020-12-11 15:13:24 +01:00
let setp2ppath p =
args.path_to_p2p <- p
2013-08-20 16:32:57 +02:00
let setfrombox s =
2019-07-08 13:44:27 +02:00
detect_duplicate_op CopyBox;
2013-08-20 16:32:57 +02:00
args.op <- Some CopyBox;
args.frombox <- Some s
let settobox s =
args.tobox <- Some s
let setmediaboxifmissing () =
args.mediabox_if_missing <- true
let settopage s =
args.topage <- Some s
let setstdinuser u =
match args.inputs with
| (StdIn, x, _, o, f, g)::t -> args.inputs <- (StdIn, x, u, o, f, g)::t
2013-08-20 16:32:57 +02:00
| _ -> error "-stdin-user: must follow -stdin"
let setstdinowner o =
match args.inputs with
| (StdIn, x, u, _, f, g)::t -> args.inputs <- (StdIn, x, u, o, f, g)::t
2014-11-17 19:48:17 +01:00
| _ -> error "-stdin-owner: must follow -stdin"
2013-08-20 16:32:57 +02:00
2013-10-02 16:29:53 +02:00
let setopenatpage n =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (OpenAtPage n);
2013-10-02 16:29:53 +02:00
args.op <- Some (OpenAtPage n)
let setopenatpagefit n =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (OpenAtPageFit n);
2013-10-02 16:29:53 +02:00
args.op <- Some (OpenAtPageFit n)
2023-04-18 15:42:17 +02:00
let setopenatpagecustom n =
detect_duplicate_op (OpenAtPageCustom n);
args.op <- Some (OpenAtPageCustom n)
2013-10-24 16:21:54 +02:00
let setlabelstyle s =
let style =
match s with
| "DecimalArabic" -> Pdfpagelabels.DecimalArabic
| "UppercaseRoman" -> Pdfpagelabels.UppercaseRoman
| "LowercaseRoman" -> Pdfpagelabels.LowercaseRoman
| "UppercaseLetters" -> Pdfpagelabels.UppercaseLetters
| "LowercaseLetters" -> Pdfpagelabels.LowercaseLetters
2014-09-18 15:27:07 +02:00
| "NoLabelPrefixOnly" -> Pdfpagelabels.NoLabelPrefixOnly
2013-10-24 16:21:54 +02:00
| _ -> error "Unknown label style"
in
2014-09-18 15:27:07 +02:00
args.labelstyle <- style
2013-10-24 16:21:54 +02:00
let setlabelprefix s =
args.labelprefix <- Some s
2014-09-18 16:40:22 +02:00
let setlabelstartval i =
args.labelstartval <- i
let setlabelsprogress () =
args.labelsprogress <- true
2014-10-02 20:57:06 +02:00
let setcpdflin s =
args.cpdflin <- Some s
2014-10-15 18:51:15 +02:00
let setrecrypt () =
args.recrypt <- true
2014-10-15 14:48:39 +02:00
2015-01-07 19:29:11 +01:00
let setremovedictentry s =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (RemoveDictEntry s);
2015-01-07 19:29:11 +01:00
args.op <- Some (RemoveDictEntry s)
2015-01-07 21:29:39 +01:00
let logto = ref None
let setsqueezelogto s =
logto := Some s
let setstayonerror () =
set stay_on_error
2022-09-26 21:38:16 +02:00
let setembedstd14 s =
args.embedstd14 <- Some s
2015-04-13 15:17:48 +02:00
2023-10-05 17:45:11 +02:00
let _ =
Cpdfdrawcontrol.setembedstd14 := (fun b dir -> if b then args.embedstd14 <- Some dir else args.embedstd14 <- None)
2017-05-19 20:10:49 +02:00
let sethardbox box =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (HardBox box);
2017-05-19 20:10:49 +02:00
args.op <- Some (HardBox box)
let setalsosetxml () =
args.alsosetxml <- true
let setjustsetxml () =
args.justsetxml <- true
2019-06-29 16:03:22 +02:00
let setsetmetadatadate d =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (SetMetadataDate d);
2019-06-29 16:03:22 +02:00
args.op <- Some (SetMetadataDate d)
let setgsmalformed () =
args.gs_malformed <- true
2019-07-03 15:40:32 +02:00
let setmergeaddbookmarks () =
args.merge_add_bookmarks <- true
let setmergeaddbookmarksusetitles () =
args.merge_add_bookmarks_use_titles <- true
2019-07-06 18:55:26 +02:00
let setbookmarksopentolevel l =
2019-07-08 13:44:27 +02:00
detect_duplicate_op (BookmarksOpenToLevel l);
2019-07-06 18:55:26 +02:00
args.op <- Some (BookmarksOpenToLevel l)
let setcreatepdfpages i =
args.createpdf_pages <- i
let setcreatepdfpapersize s =
args.createpdf_pagesize <-
let w, h = Cpdfcoord.parse_coordinate (Pdf.empty ()) s in
Pdfpaper.make Pdfunits.PdfPoint w h
2021-10-18 19:19:59 +02:00
let setimpose s =
setop (Impose true) ();
args.coord <- s
let setimposexy s =
setop (Impose false) ();
args.coord <- s
2023-11-15 18:34:14 +01:00
let setchop s =
let x, y = Cpdfcoord.parse_coordinate empty s in
setop (Chop (int_of_float x, int_of_float y)) ()
2024-02-05 15:01:16 +01:00
let setchopv x =
setop (ChopHV (false, x)) ()
let setchoph y =
setop (ChopHV (true, y)) ()
2021-10-28 18:06:46 +02:00
let setreplacedictentry s =
setop (ReplaceDictEntry s) ()
2021-10-29 16:09:21 +02:00
let setprintdictentry s =
setop (PrintDictEntry s) ()
2021-10-28 18:06:46 +02:00
let setreplacedictentryvalue s =
2021-10-29 16:09:21 +02:00
try
2023-02-17 16:30:39 +01:00
let pdfobj = Cpdfjson.object_of_json (Cpdfyojson.Safe.from_string s) in
2021-10-29 16:09:21 +02:00
args.replace_dict_entry_value <- pdfobj
with
e -> error (Printf.sprintf "Failed to parse replacement value: %s\n" (Printexc.to_string e))
2021-10-28 18:06:46 +02:00
let setdictentrysearch s =
2021-10-29 16:09:21 +02:00
try
2023-02-17 16:30:39 +01:00
let pdfobj = Cpdfjson.object_of_json (Cpdfyojson.Safe.from_string s) in
2021-10-29 16:09:21 +02:00
args.dict_entry_search <- Some pdfobj
with
e -> error (Printf.sprintf "Failed to parse search term: %s\n" (Printexc.to_string e))
2021-10-28 18:06:46 +02:00
2021-11-01 16:40:33 +01:00
let setprintfontencoding s =
setop (PrintFontEncoding s) ()
2021-11-19 01:32:35 +01:00
let settypeset s =
setop (Typeset s) ()
2021-12-02 00:50:04 +01:00
let settableofcontentstitle s =
args.toc_title <- s
2021-12-10 13:58:30 +01:00
let settocnobookmark () =
args.toc_bookmark <- false
2021-12-15 14:01:51 +01:00
let setidironlypdfs () =
args.idir_only_pdfs <- true
2022-01-02 16:18:55 +01:00
let setnowarnrotate () =
args.no_warn_rotate <- true
2023-07-18 20:52:54 +02:00
(* Unused for now *)
2022-09-09 19:59:46 +02:00
let setfontttfencoding s =
2022-09-21 17:10:48 +02:00
args.fontencoding <-
2022-09-09 19:59:46 +02:00
match s with
| "MacRomanEncoding" -> Pdftext.MacRomanEncoding
| "WinAnsiEncoding" -> Pdftext.WinAnsiEncoding
| "StandardEncoding" -> Pdftext.StandardEncoding
| _ -> error "Unknown encoding"
let whingemalformed () =
2023-04-25 14:45:56 +02:00
Pdfe.log "Command line must be of exactly the form\ncpdf <infile> -gs <path> -gs-malformed-force -o <outfile>\n";
exit 1
2022-12-16 17:49:59 +01:00
let addop o =
begin match o with Cpdfdraw.FontPack _ -> set Cpdfdrawcontrol.fontpack_initialised | _ -> () end;
begin match args.op with Some Draw -> () | _ -> error "Need to be in drawing mode for this." end;
Cpdfdrawcontrol.addop o
2022-12-23 15:29:47 +01:00
2023-07-17 14:38:35 +02:00
let embed_font_inner font =
match font with
2023-07-07 15:34:51 +02:00
| StandardFont f ->
2023-07-17 15:13:24 +02:00
(* Printf.printf "embed_font: StandardFont\n";*)
2023-07-07 15:34:51 +02:00
begin match args.embedstd14 with
| Some dirname ->
begin try
let fontfile, fontname =
let filename = hd (List.assoc f fontnames) in
Pdfio.bytes_of_string (contents_of_file (Filename.concat dirname filename)),
Filename.remove_extension filename
in
Cpdfembed.EmbedInfo {fontfile; fontname; encoding = args.fontencoding}
with
e -> error (Printf.sprintf "Can't load font for embedding: %s\n" (Printexc.to_string e))
end
| None ->
PreMadeFontPack (Cpdfembed.fontpack_of_standardfont (Pdftext.StandardFont (f, args.fontencoding)))
end
| OtherFont f ->
ExistingNamedFont
| EmbeddedFont name ->
2023-07-17 15:13:24 +02:00
(*Printf.printf "embed_font: TTF\n";*)
try
let fontname, font = Hashtbl.find Cpdfdrawcontrol.ttfs name in
args.fontname <- fontname;
font
with
Not_found -> error (Printf.sprintf "Font %s not found" name)
2023-07-07 15:34:51 +02:00
2023-07-17 14:38:35 +02:00
let embed_font () = embed_font_inner args.font
let _ = Cpdfdrawcontrol.embed_font := embed_font
let _ = Cpdfdrawcontrol.setdrawing := (fun () -> args.op <- Some Draw)
2023-07-14 15:40:59 +02:00
let setfont f =
(*Printf.printf "Cpdfcommand.setfont: |%s|\n%!" f;*)
2023-07-17 14:38:35 +02:00
try
let fontname, _ = Hashtbl.find Cpdfdrawcontrol.ttfs f in
2023-07-17 14:38:35 +02:00
args.font <- EmbeddedFont f;
args.fontname <- fontname
with
Not_found ->
let convert f = (* convert from written PDF representation to internal PDF string e.g # sequences *)
match Pdfread.lex_name (Pdfio.input_of_string f) with Pdfgenlex.LexName s -> s | _ -> assert false
in
args.font <-
begin match Pdftext.standard_font_of_name ("/" ^ f) with
| Some x -> StandardFont x
| None ->
2023-10-04 16:27:59 +02:00
if f <> "" && hd (explode f) <> '/' then error "Font not found";
2023-07-17 14:38:35 +02:00
OtherFont (convert f)
end;
args.fontname <-
begin match Pdftext.standard_font_of_name ("/" ^ f) with
| Some x -> f
| None -> convert f
end;
(* If drawing, add the font pack as an op. *)
begin match args.op with Some Draw -> addop (Cpdfdraw.FontPack (f, embed_font (), null_hash ())) | _ -> () end
2023-07-14 15:40:59 +02:00
let loadttf n =
2023-07-17 17:39:41 +02:00
(*Printf.printf "loadttf: %s\n" n;*)
let name, filename =
match String.split_on_char '=' n with
| [name; filename] -> name, filename
2023-07-14 17:36:13 +02:00
| _ -> error "loadttf: bad file specification. Should be <name>=<filename>"
in
try
let fontfile = Pdfio.bytes_of_string (contents_of_file filename) in
let fontname = Filename.remove_extension (Filename.basename filename) in
Hashtbl.replace
Cpdfdrawcontrol.ttfs
name
(fontname, Cpdfembed.EmbedInfo {fontfile; fontname; encoding = args.fontencoding});
2023-07-14 15:40:59 +02:00
(* If drawing, add the font pack as an op. *)
2023-07-17 14:38:35 +02:00
begin match args.op with
Some Draw -> addop (Cpdfdraw.FontPack (fontname, embed_font_inner (EmbeddedFont name), null_hash ())) | _ -> () end
with
_ -> error "addtff: could not load TTF"
2023-07-17 14:53:09 +02:00
2023-10-04 16:59:52 +02:00
let () = Cpdfdrawcontrol.loadttf := loadttf
2023-04-25 14:45:56 +02:00
let setstderrtostdout () =
Pdfe.logger := (fun s -> print_string s; flush stdout)
2023-05-02 16:04:35 +02:00
let settextwidth s =
args.op <- Some (TextWidth s)
2023-07-14 17:27:53 +02:00
let setdraw () =
2023-07-17 14:53:09 +02:00
args.op <- Some Draw
2023-07-14 17:27:53 +02:00
2023-11-02 19:49:15 +01:00
let setextractfontfile s =
args.op <- Some (ExtractFontFile s)
let () = Cpdfdrawcontrol.getfontname := fun () -> args.fontname
let () = Cpdfdrawcontrol.getfontsize := fun () -> args.fontsize
2023-09-27 18:02:22 +02:00
let () = Cpdfdrawcontrol.setfontname := setfont
let () = Cpdfdrawcontrol.setfontsize := fun s -> args.fontsize <- s
2023-11-09 12:25:19 +01:00
let setlistimagesjson () =
setop ListImages ();
args.format_json <- true
2023-12-04 17:32:12 +01:00
let set_jbig2_global f =
jbig2_global := Some (Pdfio.bytes_of_string (contents_of_file f))
let clear_jbig2_global () =
jbig2_global := None
2023-12-06 13:20:27 +01:00
let setjpegquality q =
args.jpegquality <- q
let setjpegqualitylossless q =
args.jpegqualitylossless <- q
2023-12-22 17:45:53 +01:00
let set1bppmethod m =
args.onebppmethod <- m
2023-12-24 14:54:21 +01:00
let setpixelthreshold i =
args.pixel_threshold <- i
let setlengththreshold i =
args.length_threshold <- i
let setpercentagethreshold i =
args.percentage_threshold <- i
2024-01-04 12:43:27 +01:00
let setdpithreshold i =
args.dpi_threshold <- i
2023-12-31 12:59:48 +01:00
let setlosslessresample i =
args.resample_factor <- i
let setlosslessresampledpi i =
args.resample_factor <- -.i
2024-01-01 20:09:40 +01:00
let setresampleinterpolate () =
args.resample_interpolate <- true
2023-12-31 12:59:48 +01:00
2024-01-12 16:00:28 +01:00
let setjbig2_lossy_threshold f =
args.jbig2_lossy_threshold <- f
2023-12-28 17:18:25 +01:00
let setprocessimagesinfo () =
set Cpdfimage.debug_image_processing
2024-01-17 19:37:58 +01:00
let setextractstream i =
args.op <- Some (ExtractStream i)
let setextractstreamdecomp i =
args.op <- Some (ExtractStream i);
args.extract_stream_decompress <- true
let setprintobj i =
args.op <- Some (PrintObj i)
2013-08-20 16:32:57 +02:00
(* Parse a control file, make an argv, and then make Arg parse it. *)
let rec make_control_argv_and_parse filename =
control_args := !control_args @ parse_control_file filename
and specs =
[("-version",
Arg.Unit (setop Version),
" Print the cpdf version number");
("-o",
Arg.String setout,
" Set the output file, if appropriate");
("-i",
Arg.String set_input,
" Add an input file");
2023-01-11 05:12:51 +01:00
("-png",
Arg.String set_input_png,
" Load from a PNG file, converting to PDF");
("-jpeg",
Arg.String set_input_jpeg,
" Load from a JPEG file, converting to PDF");
2023-12-04 14:39:56 +01:00
("-jbig2",
Arg.String set_input_jbig2,
" Load from a JBIG2 fragment, converting to PDF");
2023-12-04 17:32:12 +01:00
("-jbig2-global",
Arg.String set_jbig2_global,
" Load a JBIG2 global stream");
("-jbig2-global-clear",
Arg.Unit clear_jbig2_global,
" Forget any JBIG2 global stream");
2013-08-20 16:32:57 +02:00
("-idir",
Arg.String set_input_dir,
" Add a directory of files");
2021-12-15 14:01:51 +01:00
("-idir-only-pdfs",
Arg.Unit setidironlypdfs,
" Have -idir ignore files not ending in .pdf");
2021-05-20 17:53:35 +02:00
("-pw",
Arg.String setdashpassword,
" Supply a password explicitly -pw=<password>");
2013-08-20 16:32:57 +02:00
("-stdin",
Arg.Unit setstdin,
" Read input from standard input");
("-stdin-owner",
Arg.String setstdinowner,
" Owner password for -stdin");
("-stdin-user",
Arg.String setstdinuser,
" User password for -stdin");
("-stdout",
Arg.Unit setstdout,
" Send result to standard output");
("-error-on-malformed",
Arg.Set Pdfread.error_on_malformed,
" Do not try to read malformed files");
2013-08-20 16:32:57 +02:00
("-range",
Arg.String setrange,
" Explicitly add a range");
2021-10-16 16:47:41 +02:00
("-collate",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.collate <- true),
2021-10-16 16:47:41 +02:00
" Collate ranges when merging");
("-revision",
Arg.Int setrevision,
2016-11-04 17:46:08 +01:00
"");
2013-08-20 16:32:57 +02:00
("-change-id",
Arg.Unit (setop ChangeId),
" Change the file's /ID tag");
("-no-preserve-objstm",
2024-01-23 19:22:09 +01:00
Arg.Unit (fun () -> args.preserve_objstm <- false),
2013-08-20 16:32:57 +02:00
" Don't preserve object streams");
("-create-objstm",
2024-01-23 19:22:09 +01:00
Arg.Unit (fun () -> args.create_objstm <- true),
2013-08-20 16:32:57 +02:00
" Create object streams anew");
("-keep-version",
2024-01-23 19:37:35 +01:00
Arg.Unit (fun () -> args.keepversion <- true),
2013-08-20 16:32:57 +02:00
" Don't change the version number");
("-l",
2024-01-23 15:32:31 +01:00
Arg.Unit (fun () -> args.linearize <- true),
2014-12-01 19:12:02 +01:00
" Linearize output file");
("-keep-l",
2024-01-23 15:32:31 +01:00
Arg.Unit (fun () -> args.keeplinearize <- true),
2014-12-01 19:12:02 +01:00
" Linearize if the input file was linearized");
2014-10-02 20:57:06 +02:00
("-cpdflin",
Arg.String setcpdflin,
" Set location of 'cpdflin'");
2014-10-15 14:48:39 +02:00
("-recrypt",
2014-10-15 18:51:15 +02:00
Arg.Unit setrecrypt,
2014-10-15 14:48:39 +02:00
" Keep this file's encryption when writing");
2013-08-20 16:32:57 +02:00
("-raw",
2021-12-19 13:55:06 +01:00
Arg.Unit (setencoding Cpdfmetadata.Raw),
2013-08-20 16:32:57 +02:00
" Do not process text");
("-stripped",
2021-12-19 13:55:06 +01:00
Arg.Unit (setencoding Cpdfmetadata.Stripped),
2013-08-20 16:32:57 +02:00
" Process text by simple stripping to ASCII");
("-utf8",
2021-12-19 13:55:06 +01:00
Arg.Unit (setencoding Cpdfmetadata.UTF8),
2013-08-20 16:32:57 +02:00
" Process text by conversion to UTF8 Unicode");
("-fast",
Arg.Unit setfast,
" Speed over correctness with malformed documents");
("-control",
Arg.String make_control_argv_and_parse,
2013-10-09 15:34:50 +02:00
" Use a control file. Deprecated. Use -args.");
2019-10-21 13:06:04 +02:00
("-args",
2021-10-12 19:40:47 +02:00
Arg.Unit (fun () -> ()),
2019-10-21 13:06:04 +02:00
" Get arguments from a file.");
2013-08-20 16:32:57 +02:00
("-merge",
Arg.Unit (setop Merge),
" Merge a number of files into one");
("-retain-numbering",
Arg.Unit set_retain_numbering,
" Don't renumber pages when merging");
2019-07-03 15:40:32 +02:00
("-merge-add-bookmarks",
Arg.Unit setmergeaddbookmarks,
" Add bookmarks for each file to merged file");
("-merge-add-bookmarks-use-titles",
Arg.Unit setmergeaddbookmarksusetitles,
" Use title of document rather than filename");
("-no-process-struct-trees",
2024-01-23 19:46:09 +01:00
Arg.Unit (fun () -> args.process_struct_trees <- false),
" Do not process structure trees");
2013-08-20 16:32:57 +02:00
("-remove-duplicate-fonts",
Arg.Unit set_remove_duplicate_fonts,
" Remove duplicate fonts when merging");
("-split",
Arg.Unit (setop Split),
" Split a file into individual pages");
("-chunk",
Arg.Int setchunk,
" Set chunk size for -split (default 1)");
("-split-bookmarks",
2024-01-23 15:32:31 +01:00
Arg.Int (fun i -> setop (SplitOnBookmarks i) ()),
2013-08-20 16:32:57 +02:00
" Split a file at bookmarks at a given level");
2023-10-25 19:44:29 +02:00
("-split-max",
Arg.String setsplitmax,
" Split a file to files of a given size");
2023-10-30 19:30:49 +01:00
("-spray",
Arg.Unit (setop Spray),
" Split a file by alternating pages");
2013-08-20 16:32:57 +02:00
("-scale-page",
Arg.String setscale,
" -scale-page \"sx sy\" scales by (sx, sy)");
("-scale-to-fit",
Arg.String setscaletofit,
" -scale-to-fit \"x y\" scales to page size (x, y)");
("-scale-contents",
Arg.Float setscalecontents,
2022-09-01 17:46:24 +02:00
" Scale contents by the given factor");
2013-08-20 16:32:57 +02:00
("-scale-to-fit-scale",
Arg.Float setscaletofitscale,
" -scale-to-fit-scale (1.0 = 100%)");
("-shift",
Arg.String setshift,
" -shift \"dx dy\" shifts the chosen pages");
2024-01-22 17:36:37 +01:00
("-shift-boxes",
Arg.String setshiftboxes,
2024-01-25 18:51:48 +01:00
" -shift-boxes \"dx dy\" shifts boxes on the chosen pages");
2013-08-20 16:32:57 +02:00
("-rotate",
Arg.Int setrotate,
" Set rotation of pages to 0, 90, 180, 270");
("-rotateby",
Arg.Int setrotateby,
" Rotate pages by 90, 180 or 270 degrees");
("-rotate-contents",
2024-01-23 15:32:31 +01:00
Arg.Float (fun f -> setop (RotateContents f) ()),
2013-08-20 16:32:57 +02:00
" Rotate contents of pages");
("-upright",
Arg.Unit (setop Upright),
" Make pages upright");
2022-01-02 16:18:55 +01:00
("-prerotate",
2024-01-23 19:22:09 +01:00
Arg.Unit (fun () -> args.prerotate <- true),
2022-01-02 16:18:55 +01:00
" Calls -upright on pages before modifying them, if required");
("-no-warn-rotate",
Arg.Unit setnowarnrotate,
" Do not warn on pages of PDFs which are not upright");
2013-08-20 16:32:57 +02:00
("-hflip",
Arg.Unit (setop HFlip),
" Flip pages horizontally");
("-vflip",
Arg.Unit (setop VFlip),
" Flip pages vertically");
("-crop",
Arg.String setcrop,
2023-08-03 14:21:24 +02:00
" Crop specified pages (synonym for -cropbox)");
("-cropbox",
Arg.String setcrop,
2023-08-03 14:21:24 +02:00
" Crop specified pages");
("-artbox",
Arg.String setart,
" Set art box for specified pages");
("-bleedbox",
Arg.String setbleed,
" Set bleed box for specified pages");
("-trimbox",
Arg.String settrim,
" Set trim box for specified pages");
2017-05-19 20:10:49 +02:00
("-hard-box",
Arg.String sethardbox,
" Hard crop specified pages to the given box");
2019-07-15 12:52:14 +02:00
("-show-boxes",
Arg.Unit (setop ShowBoxes),
" Show boxes by adding rectangles to pages");
2019-07-15 14:42:32 +02:00
("-trim-marks",
Arg.Unit (setop TrimMarks),
" Add trim marks");
2013-08-20 16:32:57 +02:00
("-remove-crop",
Arg.Unit (setop RemoveCrop),
" Remove cropping on specified pages");
("-remove-cropbox",
Arg.Unit (setop RemoveCrop),
" Synonym for -remove-crop");
("-remove-trimbox",
Arg.Unit (setop RemoveTrim),
" Remove trim box on specified pages");
("-remove-bleedbox",
Arg.Unit (setop RemoveBleed),
" Remove bleed box on specified pages");
("-remove-artbox",
Arg.Unit (setop RemoveArt),
" Remove art box on specified pages");
2013-08-20 16:32:57 +02:00
("-frombox", Arg.String setfrombox, " Set box to copy from");
("-tobox", Arg.String settobox, " Set box to copy to");
2015-01-21 13:20:49 +01:00
("-mediabox-if-missing",
Arg.Unit setmediaboxifmissing,
" If copy from box missing, substitute media box");
2013-08-20 16:32:57 +02:00
("-mediabox",
Arg.String setmediabox,
" Set media box on specified pages");
("-encrypt",
Arg.Unit setencryptcollect,
" Encrypt a document");
("-decrypt",
Arg.Unit (setop Decrypt),
" Decrypt a file");
2021-10-12 19:40:47 +02:00
("-decrypt-force",
2024-01-23 19:46:09 +01:00
Arg.Unit (fun () -> args.debugforce <- true),
2021-10-12 19:40:47 +02:00
" Decrypt a file even without password");
2024-01-23 19:37:35 +01:00
("-no-edit", Arg.Unit (fun () -> args.no_edit <- true) , " No edits");
("-no-print", Arg.Unit (fun () -> args.no_print <- true), " No printing");
("-no-copy", Arg.Unit (fun () -> args.no_copy <- true), " No copying");
("-no-annot", Arg.Unit (fun () -> args.no_annot <- true), " No annotations");
("-no-forms", Arg.Unit (fun () -> args.no_forms <- true), " No forms");
("-no-extract", Arg.Unit (fun () -> args.no_extract <- true), " No extracting");
("-no-assemble", Arg.Unit (fun () -> args.no_assemble <- true), " No assembling");
("-no-hq-print", Arg.Unit (fun () -> args.no_hq_print <- true), " No high quality printing");
2013-08-20 16:32:57 +02:00
("-no-encrypt-metadata",
Arg.Unit set_no_encrypt_metadata,
" Don't encrypt metadata (AES only)");
("-decompress",
Arg.Unit (setop Decompress),
" Decompress");
("-compress",
Arg.Unit (setop Compress),
" Compress streams, leaving metadata alone");
("-remove-duplicate-streams",
2024-01-23 19:22:09 +01:00
Arg.Unit (fun () -> args.remove_duplicate_streams <- true),
2013-08-20 16:32:57 +02:00
"");
("-list-bookmarks",
Arg.Unit (setop ListBookmarks),
" List Bookmarks");
2021-10-26 18:32:36 +02:00
("-list-bookmarks-json",
Arg.Unit setlistbookmarksjson,
" List Bookmarks in JSON format");
2013-08-20 16:32:57 +02:00
("-remove-bookmarks",
Arg.Unit (setop RemoveBookmarks),
" Remove bookmarks from a file");
("-add-bookmarks",
Arg.String setaddbookmarks,
" Add bookmarks from the given file");
2021-10-26 18:32:36 +02:00
("-add-bookmarks-json",
Arg.String setaddbookmarksjson,
" Add bookmarks from the given file in JSON format");
2019-07-06 18:55:26 +02:00
("-bookmarks-open-to-level",
Arg.Int setbookmarksopentolevel,
" Open bookmarks to this level (0 = all closed)");
2013-08-20 16:32:57 +02:00
("-presentation",
Arg.Unit (setop Presentation),
" Make a presentation");
("-trans",
Arg.String settrans,
" Set the transition method for -presentation");
("-duration",
Arg.Float setduration,
" Set the display duration for -presentation");
("-vertical",
Arg.Unit setvertical,
" Set dimension for Split and Blinds styles");
("-outward",
Arg.Unit setoutward,
" Set direction for Split and Box styles");
("-direction",
Arg.Int setdirection,
" Set direction for Wipe and Glitter styles");
("-effect-duration",
Arg.Float seteffectduration,
" Set the effect duration in seconds");
("-stamp-on",
Arg.String setstampon,
" Stamp a file on some pages of another");
("-stamp-under",
Arg.String setstampunder,
" Stamp a file under some pages of another");
2014-10-08 15:52:55 +02:00
("-scale-stamp-to-fit",
2024-01-23 19:22:09 +01:00
Arg.Unit (fun () -> args.scale_stamp_to_fit <- true),
2014-10-08 15:52:55 +02:00
" Scale the stamp to fit the page");
2013-08-20 16:32:57 +02:00
("-combine-pages",
Arg.String setcombinepages,
" Combine two files by merging individual pages");
("-add-text",
Arg.String setaddtext,
" Superimpose text on the given range of pages");
("-remove-text",
Arg.Unit (setop RemoveText),
" Remove text previously added by cpdf");
("-add-rectangle",
Arg.String setrectangle,
2016-11-08 19:15:04 +01:00
" Add a rectangle to the page");
2013-08-20 16:32:57 +02:00
("-bates",
2024-01-23 19:37:35 +01:00
Arg.Int (fun n -> args.bates <- n),
2013-08-20 16:32:57 +02:00
" Set the base bates number");
2015-07-26 13:31:43 +02:00
("-bates-at-range",
Arg.Int setbatesrange,
" Set the base bates number at first page in range");
2015-07-17 17:34:47 +02:00
("-bates-pad-to",
2024-01-23 19:37:35 +01:00
Arg.Int (fun n -> args.batespad <- Some n),
2015-07-17 17:34:47 +02:00
" Pad the bates number with leading zeroes to width");
2013-08-20 16:32:57 +02:00
("-font",
Arg.String setfont,
" Set the font");
2022-09-21 17:10:48 +02:00
("-font-size",
Arg.Float setfontsize,
" Set the font size");
("-load-ttf",
Arg.String loadttf,
" Use a TrueType font");
2022-09-26 21:38:16 +02:00
("-embed-std14",
Arg.String setembedstd14,
" Embed standard 14 fonts");
2013-08-20 16:32:57 +02:00
("-color",
Arg.String setcolor,
" Set the color");
("-opacity",
Arg.Float setopacity,
" Set the text opacity");
("-outline",
2024-01-23 19:22:09 +01:00
Arg.Unit (fun () -> args.outline <- true),
2013-08-20 16:32:57 +02:00
" Use outline mode for text");
("-linewidth",
2024-01-23 19:22:09 +01:00
Arg.Float (fun f -> args.linewidth <- f),
2013-08-20 16:32:57 +02:00
" Set line width for outline text");
("-pos-center",
Arg.String setposcenter,
" Set position relative to center of baseline");
("-pos-left",
Arg.String setposleft,
" Set position relative to left of baseline");
("-pos-right",
Arg.String setposright,
" Set position relative to right of baseline");
("-top",
Arg.String settop,
" Set position relative to center top of page");
("-topleft",
Arg.String settopleft,
" Set position relative to top left of page");
("-topright",
Arg.String settopright,
" Set position relative to top right of page");
("-left",
Arg.String setleft,
" Set position relative to center left of page");
("-bottomleft",
Arg.String setbottomleft,
" Set position relative to bottom left of page");
("-bottom",
Arg.String setbottom,
" Set position relative to center bottom of page");
("-bottomright",
Arg.String setbottomright,
" Set position relative to bottom right of page");
("-right",
Arg.String setright,
" Set position relative to center right of page");
("-diagonal",
Arg.Unit setdiagonal,
" Place text diagonally across page");
("-reverse-diagonal",
Arg.Unit setreversediagonal,
" Place text diagonally across page from top left");
("-center",
Arg.Unit setcenter,
" Place text in the center of the page");
2013-08-20 16:32:57 +02:00
("-justify-left",
2023-05-11 22:54:23 +02:00
Arg.Unit (fun () -> args.justification <- Cpdfaddtext.LeftJustify),
2013-08-20 16:32:57 +02:00
" Justify multiline text left");
("-justify-right",
2023-05-11 22:54:23 +02:00
Arg.Unit (fun () -> args.justification <- Cpdfaddtext.RightJustify),
2013-08-20 16:32:57 +02:00
" Justify multiline text right");
("-justify-center",
2023-05-11 22:54:23 +02:00
Arg.Unit (fun () -> args.justification <- Cpdfaddtext.CentreJustify),
2019-10-21 12:54:34 +02:00
" Justify multiline text center");
2013-08-20 16:32:57 +02:00
("-underneath",
2023-05-11 22:54:23 +02:00
Arg.Unit (fun () -> args.underneath <- true),
2013-08-20 16:32:57 +02:00
" Text stamp is underneath content");
("-line-spacing",
2023-05-11 22:54:23 +02:00
Arg.Float (fun f -> args.linespacing <- f),
2013-08-20 16:32:57 +02:00
" Line spacing (1 is normal)");
("-midline",
2023-05-11 22:54:23 +02:00
Arg.Unit (fun () -> args.midline <- true),
2013-08-20 16:32:57 +02:00
" Adjust text to midline rather than baseline");
2015-01-20 16:50:36 +01:00
("-topline",
2023-05-11 22:54:23 +02:00
Arg.Unit (fun () -> args.topline <- true),
2015-01-20 16:50:36 +01:00
" Adjust text to topline rather than baseline");
2013-08-20 16:32:57 +02:00
("-relative-to-cropbox",
2023-05-11 22:54:23 +02:00
Arg.Unit (fun () -> args.relative_to_cropbox <- true),
2013-08-20 16:32:57 +02:00
" Add text relative to Crop Box not Media Box");
2019-07-02 19:20:05 +02:00
("-embed-missing-fonts",
Arg.Unit (setop EmbedMissingFonts),
" Embed missing fonts by calling gs");
2013-08-20 16:32:57 +02:00
("-twoup",
Arg.Unit (setop TwoUp),
" Put 2 pages onto one");
("-twoup-stack",
Arg.Unit (setop TwoUpStack),
" Stack 2 pages onto one twice the size");
2021-10-18 19:19:59 +02:00
("-impose",
Arg.String setimpose,
" Impose onto given page size");
("-impose-xy",
Arg.String setimposexy,
" Impose x by y (zero means unlimited)");
("-impose-columns",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.impose_columns <- true),
2021-10-18 19:19:59 +02:00
" Impose in columns rather than rows");
("-impose-rtl",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.impose_rtl <- true),
2023-11-29 11:33:07 +01:00
" Impose right-to-left");
2021-10-18 19:19:59 +02:00
("-impose-btt",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.impose_btt <- true),
2023-11-29 11:33:07 +01:00
" Impose bottom-to-top");
2021-10-18 19:19:59 +02:00
("-impose-margin",
2024-01-23 19:14:00 +01:00
Arg.Float (fun f -> args.impose_margin <- f),
2021-10-18 19:19:59 +02:00
" Add margin around whole imposed page");
("-impose-spacing",
2024-01-23 19:14:00 +01:00
Arg.Float (fun f -> args.impose_spacing <- f),
2021-10-18 19:19:59 +02:00
" Add spacing around each imposed page");
("-impose-linewidth",
2024-01-23 19:14:00 +01:00
Arg.Float (fun f -> args.impose_linewidth <- f),
2021-10-18 19:19:59 +02:00
" Imposition divider line width (0=none)");
2023-11-15 18:34:14 +01:00
("-chop",
Arg.String setchop,
" Chop x by y");
2024-02-05 15:01:16 +01:00
("-chop-h",
Arg.Float setchoph,
" Chop horizontally");
("-chop-v",
Arg.Float setchopv,
" Chop horizontally");
2023-11-29 11:33:07 +01:00
("-chop-columns",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.impose_columns <- true),
2023-11-29 11:33:07 +01:00
" Chop in columns rather than rows");
("-chop-rtl",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.impose_rtl <- true),
2023-11-29 11:33:07 +01:00
" Chop right-to-left");
("-chop-btt",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.impose_btt <- true),
2023-11-29 11:33:07 +01:00
" Chop bottom-to-top");
2013-08-20 16:32:57 +02:00
("-pad-before",
Arg.Unit (setop PadBefore),
" Add a blank page before the given pages");
("-pad-after",
Arg.Unit (setop PadAfter),
" Add a blank page after the given pages");
("-pad-every",
Arg.Int setpadevery,
" Add a blank page after every n pages");
2017-12-18 20:44:02 +01:00
("-pad-with",
Arg.String setpadwith,
" Use a given PDF instead of a blank page");
2013-08-20 16:32:57 +02:00
("-pad-multiple",
Arg.Int setpadmultiple,
" Pad the document to a multiple of n pages");
2019-07-01 16:35:17 +02:00
("-pad-multiple-before",
Arg.Int setpadmultiplebefore,
" Pad the document at beginning to a multiple of n pages");
2013-08-20 16:32:57 +02:00
("-list-annotations",
Arg.Unit (setop ListAnnotations),
" List annotations");
2021-10-27 19:55:52 +02:00
("-list-annotations-json",
Arg.Unit setlistannotationsjson,
" List annotations in JSON format");
2013-08-20 16:32:57 +02:00
("-copy-annotations",
Arg.String setcopyannotations,
" Copy annotations from given file");
("-remove-annotations",
Arg.Unit (setop RemoveAnnotations),
" Remove annotations");
2023-01-13 07:30:46 +01:00
("-set-annotations",
Arg.String setsetannotations,
" Set annotations from JSON file");
2013-08-20 16:32:57 +02:00
("-list-fonts",
Arg.Unit (setop Fonts),
" Output font list");
2023-10-31 16:50:23 +01:00
("-list-fonts-json",
Arg.Unit setlistfontsjson,
" Output font list in JSON format");
2013-08-20 16:32:57 +02:00
("-info",
Arg.Unit (setop Info),
" Output file information");
2023-10-31 16:50:23 +01:00
("-info-json",
Arg.Unit setinfojson,
" Output file information in JSON format");
2013-08-20 16:32:57 +02:00
("-page-info",
Arg.Unit (setop PageInfo),
2019-10-21 12:54:34 +02:00
" Output page information");
2023-10-31 16:50:23 +01:00
("-page-info-json",
Arg.Unit setpageinfojson,
" Output page information in JSON format");
2013-08-20 16:32:57 +02:00
("-set-author",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetAuthor s) ()),
2013-08-20 16:32:57 +02:00
" Set Author");
("-set-title",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetTitle s) ()),
2013-08-20 16:32:57 +02:00
" Set Title");
("-set-subject",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetSubject s) ()),
2013-08-20 16:32:57 +02:00
" Set Subject");
("-set-keywords",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetKeywords s) ()),
2013-08-20 16:32:57 +02:00
" Set Keywords");
("-set-create",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetCreate s) ()),
2013-08-20 16:32:57 +02:00
" Set Creation date");
("-set-modify",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetModify s) ()),
2013-08-20 16:32:57 +02:00
" Set Modification date");
("-set-creator",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetCreator s) ()),
2013-08-20 16:32:57 +02:00
" Set Creator");
("-set-producer",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetProducer s) ()),
2013-08-20 16:32:57 +02:00
" Set Producer");
("-set-trapped",
Arg.Unit (setop SetTrapped),
" Mark as trapped");
("-set-untrapped",
Arg.Unit (setop SetUntrapped),
" Mark as not trapped");
2019-07-28 13:35:51 +02:00
("-also-set-xmp",
Arg.Unit setalsosetxml,
2019-07-28 13:35:51 +02:00
" Also set XMP metadata");
("-just-set-xmp",
Arg.Unit setjustsetxml,
2019-07-28 13:35:51 +02:00
" Just set XMP metadata, not old-fashioned metadata");
2019-07-01 15:40:22 +02:00
("-create-metadata",
Arg.Unit (setop CreateMetadata),
2019-10-21 12:54:34 +02:00
" Create XMP metadata from scratch.");
2013-08-20 16:32:57 +02:00
("-set-page-layout",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetPageLayout s) ()),
2013-08-20 16:32:57 +02:00
" Set page layout upon document opening");
("-set-page-mode",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetPageMode s) ()),
2013-08-20 16:32:57 +02:00
" Set page mode upon document opening");
2023-06-02 14:05:42 +02:00
("-set-non-full-screen-page-mode",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetNonFullScreenPageMode s) ()),
2023-04-17 20:55:11 +02:00
" Set non full screen page mode if page mode is FullScreen");
2013-10-02 16:29:53 +02:00
("-open-at-page",
Arg.String setopenatpage,
2013-10-02 16:29:53 +02:00
" Set initial page");
("-open-at-page-fit",
Arg.String setopenatpagefit,
2023-06-02 14:05:42 +02:00
" Set initial page, scaling to fit");
2023-04-18 15:42:17 +02:00
("-open-at-page-custom",
Arg.String setopenatpagecustom,
2023-06-02 14:05:42 +02:00
" Set initial page, with custom scaling");
2013-08-20 16:32:57 +02:00
("-set-metadata",
2024-01-23 15:32:31 +01:00
Arg.String (fun s -> setop (SetMetadata s) ()),
2013-08-20 16:32:57 +02:00
" Set metadata to the contents of a file");
("-print-metadata",
Arg.Unit (setop Metadata),
" Output metadata information");
("-remove-metadata",
Arg.Unit (setop RemoveMetadata),
" Remove document metadata");
2019-06-29 16:03:22 +02:00
("-set-metadata-date",
Arg.String setsetmetadatadate,
" Set the XMP metadata date property");
2013-08-20 16:32:57 +02:00
("-hide-toolbar",
Arg.String hidetoolbar,
" Hide the viewer's toolbar");
("-hide-menubar",
Arg.String hidemenubar,
" Hide the viewer's menubar");
("-hide-window-ui",
Arg.String hidewindowui,
" Hide the viewer's scroll bars etc.");
("-fit-window",
Arg.String fitwindow,
" Resize document's window to fit size of page");
("-center-window",
Arg.String centerwindow,
" Position window in the center of screen");
("-display-doc-title",
Arg.String displaydoctitle,
" Display document's title in the title bar");
("-pages",
Arg.Unit (setop CountPages),
" Count pages");
("-list-attached-files",
Arg.Unit (setop ListAttachedFiles),
" List attached files");
("-dump-attachments",
Arg.Unit (setop DumpAttachedFiles),
2019-07-14 14:50:48 +02:00
" Dump attachments to disk");
2013-08-20 16:32:57 +02:00
("-attach-file",
Arg.String setattachfile,
" Attach a file");
("-to-page",
Arg.String settopage,
" Attach file to given page instead of document");
("-remove-files",
Arg.Unit (setop RemoveAttachedFiles),
" Remove embedded attached document-level files");
2023-11-09 12:25:19 +01:00
("-list-images",
Arg.Unit (setop ListImages),
" List images");
("-list-images-json",
Arg.Unit setlistimagesjson,
" List images in JSON format");
("-list-images-used",
2023-11-09 12:25:19 +01:00
Arg.Unit (fun () -> setop (ImageResolution max_float) ()),
" List images at point of use");
("-list-images-used-json",
2023-11-09 12:25:19 +01:00
Arg.Unit (fun () -> args.format_json <- true; setop (ImageResolution max_float) ()),
" List images at point of use in JSON format");
2013-08-20 16:32:57 +02:00
("-image-resolution",
Arg.Float setimageresolution,
2023-11-09 12:25:19 +01:00
" List images at point of use under a given dpi");
("-image-resolution-json",
Arg.Float (fun f -> setimageresolution f; args.format_json <- true),
" List images at point of use under a given dpi");
2013-08-20 16:32:57 +02:00
("-copy-font",
Arg.String setcopyfont,
" Copy a named font");
("-copy-font-page",
Arg.Int setfontpage,
" Set the page a copied font is drawn from");
("-copy-font-name",
Arg.String setcopyfontname,
" Set the name of the font to copy");
2019-07-31 18:32:18 +02:00
("-remove-fonts",
Arg.Unit (setop RemoveFonts),
" Remove embedded fonts");
2013-08-20 16:32:57 +02:00
("-missing-fonts",
Arg.Unit (setop MissingFonts),
" List missing fonts");
("-remove-id",
Arg.Unit (setop RemoveId),
" Remove the file's /ID tag");
("-draft",
Arg.Unit (setop Draft),
" Remove images from the file");
2019-07-09 17:31:45 +02:00
("-draft-remove-only",
2024-01-23 19:09:15 +01:00
Arg.String (fun s -> args.removeonly <- Some s),
2019-07-09 17:31:45 +02:00
" Only remove named image");
2013-08-20 16:32:57 +02:00
("-boxes",
Arg.Unit setboxes,
" Add crossed boxes to -draft option");
2019-07-11 18:19:40 +02:00
("-remove-all-text",
Arg.Unit (setop RemoveAllText),
" Remove all text");
2013-08-20 16:32:57 +02:00
("-blacktext",
Arg.Unit (setop BlackText),
" Blacken document text");
("-blacklines",
Arg.Unit (setop BlackLines),
" Blacken lines in document");
("-blackfills",
Arg.Unit (setop BlackFills),
" Blacken fills in document");
("-thinlines",
Arg.String setthinlines,
" Set minimum line thickness to the given width");
2016-11-09 16:42:47 +01:00
("-remove-clipping",
Arg.Unit (setop RemoveClipping),
" Remove clipping paths");
2013-08-20 16:32:57 +02:00
("-clean",
Arg.Unit (setop Clean),
" Garbage-collect a file");
("-set-version",
2024-01-23 15:32:31 +01:00
Arg.Int (fun i -> setop (SetVersion i) ()),
2013-08-20 16:32:57 +02:00
" Set PDF version number");
("-copy-id-from",
Arg.String setcopyid,
" Copy one file's ID tag to another");
("-print-page-labels",
Arg.Unit (setop PrintPageLabels),
" Print page labels");
2023-10-31 16:50:23 +01:00
("-print-page-labels-json",
Arg.Unit setprintpagelabelsjson,
" Print page labels in JSON format");
("-remove-page-labels",
Arg.Unit (setop RemovePageLabels),
" Remove page labels");
("-add-page-labels",
2013-10-24 16:21:54 +02:00
Arg.Unit (setop AddPageLabels),
" Add or replace page labels");
2013-10-24 16:21:54 +02:00
("-label-style",
Arg.String setlabelstyle,
" Set label style (default DecimalArabic)");
("-label-prefix",
Arg.String setlabelprefix,
" Set label prefix (default none)");
2014-09-18 16:40:22 +02:00
("-label-startval",
Arg.Int setlabelstartval,
" Set label start value (default 1)");
("-labels-progress",
Arg.Unit setlabelsprogress,
" Label start value progresses with multiple ranges");
2015-01-07 19:29:11 +01:00
("-remove-dict-entry",
Arg.String setremovedictentry,
" Remove an entry from all dictionaries");
2021-10-28 18:06:46 +02:00
("-replace-dict-entry",
Arg.String setreplacedictentry,
" Remove an entry from all dictionaries");
("-replace-dict-entry-value",
Arg.String setreplacedictentryvalue,
" Replacement value for -replace-dict-entry");
("-dict-entry-search",
Arg.String setdictentrysearch,
" Search string for -remove-dict-entry and -replace-dict-entry");
2021-10-29 16:09:21 +02:00
("-print-dict-entry",
Arg.String setprintdictentry,
" Print dictionary values of a given key");
2015-01-22 20:16:56 +01:00
("-producer",
Arg.String setproduceraswego,
2015-01-22 20:16:56 +01:00
" Change the /Producer entry in the /Info dictionary");
("-creator",
Arg.String setcreatoraswego,
2015-01-22 20:16:56 +01:00
" Change the /Creator entry in the /Info dictionary");
2016-11-08 19:15:04 +01:00
("-list-spot-colors",
2016-11-03 18:11:08 +01:00
Arg.Unit (setop ListSpotColours),
2016-11-08 19:15:04 +01:00
" List spot colors");
("-create-pdf",
Arg.Unit (setop CreatePDF),
" Create a new PDF");
("-create-pdf-pages",
Arg.Int setcreatepdfpages,
" Number of pages for new PDF");
("-create-pdf-papersize",
Arg.String setcreatepdfpapersize,
" Paper size for new PDF");
("-prepend-content",
Arg.String setprepend,
" Prepend content to page");
("-postpend-content",
Arg.String setpostpend,
" Postpend content to page");
2020-12-11 20:01:02 +01:00
("-gs",
2024-01-23 19:22:09 +01:00
Arg.String (fun s -> args.path_to_ghostscript <- s),
2020-12-11 20:01:02 +01:00
" Path to gs executable");
("-gs-malformed",
Arg.Unit setgsmalformed,
" Also try to reconstruct malformed files with gs");
("-gs-quiet",
2024-01-23 19:09:15 +01:00
Arg.Unit (fun () -> args.gs_quiet <- true),
2020-12-11 20:01:02 +01:00
" Make gs go into quiet mode");
2021-05-20 17:53:35 +02:00
("-gs-malformed-force",
Arg.Unit whingemalformed,
" See manual for usage.");
2020-12-11 20:01:02 +01:00
("-im",
Arg.String setimpath,
2024-01-10 19:39:00 +01:00
" Path to magick executable");
2020-12-11 20:01:02 +01:00
("-p2p",
Arg.String setp2ppath,
" Path to pnmtopng executable");
("-extract-images",
Arg.Unit (setop ExtractImages),
" Extract images to file");
2020-12-20 16:41:52 +01:00
("-dedup",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.dedup <- true),
2020-12-20 16:41:52 +01:00
" Deduplicate extracted images fully");
("-dedup-perpage",
2024-01-23 19:14:00 +01:00
Arg.Unit (fun () -> args.dedup_per_page <- true),
2020-12-20 16:41:52 +01:00
" Deduplicate extracted images per page only");
2023-12-06 13:20:27 +01:00
("-process-images",
Arg.Unit (setop ProcessImages),
" Process images within PDF");
2023-12-28 17:18:25 +01:00
("-process-images-info",
Arg.Unit setprocessimagesinfo,
" Show info when processing images");
2023-12-06 13:20:27 +01:00
("-convert",
Arg.String setconvertpath,
" Path to convert executable");
2023-12-22 20:33:10 +01:00
("-jbig2enc",
Arg.String setjbig2encpath,
" Path to jbig2enc executable");
2023-12-06 13:20:27 +01:00
("-jpeg-to-jpeg",
Arg.Float setjpegquality,
2023-12-06 13:20:27 +01:00
" Set JPEG quality for existing JPEGs");
("-lossless-to-jpeg",
Arg.Float setjpegqualitylossless,
2023-12-06 13:20:27 +01:00
" Set JPEG quality for existing lossless images");
2023-12-22 17:45:53 +01:00
("-1bpp-method",
Arg.String set1bppmethod,
" Set 1bpp compression method for existing images");
2024-01-12 16:00:28 +01:00
("-jbig2-lossy-threshold",
Arg.Float setjbig2_lossy_threshold,
" Set jbig2enc lossy threshold");
2023-12-24 14:54:21 +01:00
("-pixel-threshold",
Arg.Int setpixelthreshold,
" Only process images with more pixels than this");
("-length-threshold",
Arg.Int setlengththreshold,
" Only process images with data longer than this");
("-percentage-threshold",
Arg.Float setpercentagethreshold,
" Only substitute lossy image when smaller than this");
2024-01-04 12:43:27 +01:00
("-dpi-threshold",
Arg.Float setdpithreshold,
2024-01-04 12:43:27 +01:00
" Only process image when always higher than this dpi");
2023-12-31 12:59:48 +01:00
("-lossless-resample",
Arg.Float setlosslessresample,
2023-12-31 12:59:48 +01:00
" Resample lossless images to given part of original");
("-lossless-resample-dpi",
Arg.Float setlosslessresampledpi,
" Resample lossless images to given DPI");
2023-12-31 12:59:48 +01:00
("-resample-interpolate",
2024-01-01 20:09:40 +01:00
Arg.Unit setresampleinterpolate,
2023-12-31 12:59:48 +01:00
" Interpolate when resampling");
2021-05-20 17:53:35 +02:00
("-squeeze",
Arg.Unit setsqueeze,
" Squeeze");
("-squeeze-log-to",
Arg.String setsqueezelogto,
" Squeeze log location");
("-squeeze-no-pagedata",
2024-01-23 19:09:15 +01:00
Arg.Unit (fun () -> args.squeeze_pagedata <- false),
2021-05-20 17:53:35 +02:00
" Don't recompress pages");
("-squeeze-no-recompress",
2024-01-23 19:09:15 +01:00
Arg.Unit (fun () -> args.squeeze_recompress <- false),
2021-05-20 17:53:35 +02:00
" Don't recompress streams");
("-output-json",
Arg.Unit (setop OutputJSON),
" Export PDF file as JSON data");
("-output-json-parse-content-streams",
2024-01-23 19:09:15 +01:00
Arg.Unit (fun () -> args.jsonparsecontentstreams <- true),
2021-05-20 17:53:35 +02:00
" Parse content streams");
("-output-json-no-stream-data",
2024-01-23 19:09:15 +01:00
Arg.Unit (fun () -> args.jsonnostreamdata <- true),
2021-05-20 17:53:35 +02:00
" Skip stream data for brevity");
2021-10-04 19:38:36 +02:00
("-output-json-decompress-streams",
2024-01-23 19:09:15 +01:00
Arg.Unit (fun () -> args.jsondecompressstreams <- true),
2021-10-04 19:38:36 +02:00
" Skip stream data for brevity");
2021-12-30 16:25:24 +01:00
("-output-json-clean-strings",
2024-01-23 19:09:15 +01:00
Arg.Unit (fun () -> args.jsoncleanstrings <- true),
2021-12-30 16:25:24 +01:00
" Convert UTF16BE strings to PDFDocEncoding when possible");
2021-10-01 13:16:55 +02:00
("-j",
Arg.String set_json_input,
2021-10-21 19:10:47 +02:00
" Load a PDF JSON file");
2021-05-20 17:53:35 +02:00
("-ocg-list",
Arg.Unit (setop OCGList),
" List optional content groups");
("-ocg-rename",
Arg.Unit (setop OCGRename),
" Rename optional content group");
("-ocg-rename-from",
2024-01-23 19:09:15 +01:00
Arg.String (fun s -> args.ocgrenamefrom <- s),
2021-05-20 17:53:35 +02:00
" Rename from (with -ocg-rename)");
("-ocg-rename-to",
2024-01-23 19:09:15 +01:00
Arg.String (fun s -> args.ocgrenameto <- s),
2021-05-20 17:53:35 +02:00
" Rename to (with -ocg-rename)");
("-ocg-order-all",
Arg.Unit (setop OCGOrderAll),
" Repair /Order so all OCGs listed ");
("-ocg-coalesce-on-name",
Arg.Unit (setop OCGCoalesce),
" Coalesce OCGs with like name");
("-stamp-as-xobject",
Arg.String setstampasxobject,
2021-06-21 16:56:02 +02:00
" Stamp a file as a form xobject in another");
2021-11-03 17:05:53 +01:00
("-print-font-table",
2021-11-01 16:40:33 +01:00
Arg.String setprintfontencoding,
2021-11-03 17:05:53 +01:00
" Print the /ToUnicode table for a given font, if present.");
("-print-font-table-page",
2021-11-01 16:40:33 +01:00
Arg.Int setfontpage,
2021-11-03 17:05:53 +01:00
" Set page for -print-font-table");
2023-11-02 19:49:15 +01:00
("-extract-font",
Arg.String setextractfontfile,
" Extract a font");
2021-11-19 01:32:35 +01:00
("-table-of-contents",
Arg.Unit (setop TableOfContents),
" Typeset a table of contents from bookmarks");
2021-12-10 13:58:30 +01:00
("-toc-title",
2021-12-02 00:50:04 +01:00
Arg.String settableofcontentstitle,
" Set (or clear if empty) the TOC title");
2021-12-10 13:58:30 +01:00
("-toc-no-bookmark",
Arg.Unit settocnobookmark,
" Don't add the table of contents to the bookmarks");
2021-11-19 01:32:35 +01:00
("-typeset",
Arg.String settypeset,
" Typeset a text file as a PDF");
2023-03-03 17:02:16 +01:00
("-composition",
Arg.Unit (setop (Composition false)),
" Show composition of PDF");
("-composition-json",
Arg.Unit (setop (Composition true)),
" Show composition of PDF in JSON format");
2023-05-02 16:04:35 +02:00
("-text-width",
Arg.String settextwidth,
" Find width of a line of text");
2023-07-14 17:27:53 +02:00
("-draw", Arg.Unit setdraw, " Begin drawing");
("-rect", Arg.String Cpdfdrawcontrol.addrect, " Draw rectangle");
("-to", Arg.String Cpdfdrawcontrol.addto, " Move to");
("-line", Arg.String Cpdfdrawcontrol.addline, " Add line to");
("-bez", Arg.String Cpdfdrawcontrol.addbezier, " Add Bezier curve to path");
("-bez23", Arg.String Cpdfdrawcontrol.addbezier23, " Add Bezier v-op to path");
("-bez13", Arg.String Cpdfdrawcontrol.addbezier13, " Add Bezier y-op to path");
("-circle", Arg.String Cpdfdrawcontrol.addcircle, " Add circle to path");
("-strokecol", Arg.String Cpdfdrawcontrol.setstroke, " Set stroke colour");
("-fillcol", Arg.String Cpdfdrawcontrol.setfill, " Set fill colour");
("-stroke", Arg.Unit Cpdfdrawcontrol.stroke, " Stroke path");
("-fill", Arg.Unit Cpdfdrawcontrol.fill, " Fill path");
("-filleo", Arg.Unit Cpdfdrawcontrol.fillevenodd, " Fill path, even odd");
("-strokefill", Arg.Unit Cpdfdrawcontrol.strokefill, " Stroke and fill path");
("-strokefilleo", Arg.Unit Cpdfdrawcontrol.strokefillevenodd, " Stroke and fill path, even odd");
("-clip", Arg.Unit Cpdfdrawcontrol.clip, " Clip");
("-clipeo", Arg.Unit Cpdfdrawcontrol.clipevenodd, " Clip, even odd");
("-close", Arg.Unit Cpdfdrawcontrol.closepath, " Close path");
("-thick", Arg.String Cpdfdrawcontrol.setthickness, " Set stroke thickness");
("-cap", Arg.String Cpdfdrawcontrol.setcap, " Set cap");
("-join", Arg.String Cpdfdrawcontrol.setjoin, " Set join");
("-miter", Arg.String Cpdfdrawcontrol.setmiter, " Set miter limit");
("-dash", Arg.String Cpdfdrawcontrol.setdash, " Set dash pattern");
("-push", Arg.Unit Cpdfdrawcontrol.push, " Push graphics stack");
("-pop", Arg.Unit Cpdfdrawcontrol.pop, " Pop graphics stack");
("-matrix", Arg.String Cpdfdrawcontrol.setmatrix, " Append to graphics matrix");
("-mtrans", Arg.String Cpdfdrawcontrol.setmtranslate, " Translate the graphics matrix");
("-mrot", Arg.String Cpdfdrawcontrol.setmrotate, " Rotate the graphics matrix");
("-mscale", Arg.String Cpdfdrawcontrol.setmscale, " Scale the graphics matrix");
("-mshearx", Arg.String Cpdfdrawcontrol.setmshearx, " Shear the graphics matrix in X");
("-msheary", Arg.String Cpdfdrawcontrol.setmsheary, " Shear the graphics matrix in Y");
("-xobj-bbox", Arg.String Cpdfdrawcontrol.xobjbbox, " Specify the bounding box for xobjects");
("-xobj", Arg.String Cpdfdrawcontrol.startxobj, " Begin saving a sequence of graphics operators");
("-end-xobj", Arg.Unit Cpdfdrawcontrol.endxobj, " End saving a sequence of graphics operators");
("-use", Arg.String Cpdfdrawcontrol.usexobj, " Use a saved sequence of graphics operators");
("-draw-jpeg", Arg.String Cpdfdrawcontrol.addjpeg, " Load a JPEG from file and name it");
("-draw-png", Arg.String Cpdfdrawcontrol.addpng, " Load a PNG from file and name it");
("-image", Arg.String Cpdfdrawcontrol.addimage, " Draw an image which has already been loaded");
("-fill-opacity", Arg.Float Cpdfdrawcontrol.addopacity, " Set opacity");
("-stroke-opacity", Arg.Float Cpdfdrawcontrol.addsopacity, " Set stroke opacity");
("-bt", Arg.Unit Cpdfdrawcontrol.addbt, " Begin text");
("-et", Arg.Unit Cpdfdrawcontrol.addet, " End text");
("-text", Arg.String Cpdfdrawcontrol.addtext, " Draw text");
("-stext", Arg.String Cpdfdrawcontrol.addspecialtext, " Draw text with %specials");
("-leading", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.Leading f)), " Set leading");
("-charspace", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.CharSpace f)), " Set character spacing");
("-wordspace", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.WordSpace f)), " Set word space");
("-textscale", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.TextScale f)), " Set text scale");
("-rendermode", Arg.Int (fun i -> Cpdfdrawcontrol.addop (Cpdfdraw.RenderMode i)), " Set text rendering mode");
("-rise", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.Rise f)), " Set text rise");
("-nl", Arg.Unit (fun () -> Cpdfdrawcontrol.addop Cpdfdraw.Newline), " New line");
("-newpage", Arg.Unit Cpdfdrawcontrol.addnewpage, " Move to a fresh page");
2024-01-17 19:37:58 +01:00
("-extract-stream", Arg.Int setextractstream, " Extract a stream");
("-extract-stream-decompress", Arg.Int setextractstreamdecomp, "Extract a stream, decompressing");
("-obj", Arg.Int setprintobj, "Print object");
2020-02-26 17:24:27 +01:00
(* These items are undocumented *)
2013-08-20 16:32:57 +02:00
("-debug", Arg.Unit setdebug, "");
2024-01-23 19:46:09 +01:00
("-debug-crypt", Arg.Unit (fun () -> args.debugcrypt <- true), "");
("-debug-force", Arg.Unit (fun () -> args.debugforce <- true), "");
("-debug-malformed", Arg.Set Pdfread.debug_always_treat_malformed, "");
2023-04-25 14:45:56 +02:00
("-debug-stderr-to-stdout", Arg.Unit setstderrtostdout, "");
2023-04-12 20:37:30 +02:00
("-stay-on-error", Arg.Unit setstayonerror, "");
(* These items are unfinished *)
2016-11-09 19:15:23 +01:00
("-extract-text", Arg.Unit (setop ExtractText), "");
2016-11-13 15:02:09 +01:00
("-extract-text-font-size", Arg.Float setextracttextfontsize, "");
2016-11-10 16:03:34 +01:00
]
2013-08-20 16:32:57 +02:00
and usage_msg =
"Syntax: cpdf [<operation>] <input files> [-o <output file>]\n\n\
2013-08-20 16:32:57 +02:00
This is a copyrighted, commercial program, and may NOT be freely copied.\n\n\
Version " ^ string_of_int major_version ^ "." ^ string_of_int minor_version ^ " " ^ version_date ^ "\n\n\
2023-10-17 14:43:19 +02:00
To buy, visit https://www.coherentpdf.com/\n\n\
2013-08-20 16:32:57 +02:00
Input names are distinguished by containing a '.' and may be\n\
followed by a page range specification, for instance \"1,2,3\"\n\
or \"1-6,9-end\" or \"even\" or \"odd\" or \"reverse\".\n\nOperations (See \
manual for full details):\n"
(* Reading and writing *)
2014-10-14 20:36:57 +02:00
let filesize name =
try
let x = open_in_bin name in
let r = in_channel_length x in
close_in x;
r
with
_ -> 0
2013-08-20 16:32:57 +02:00
(* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF
* cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *)
let mend_pdf_file_with_ghostscript filename =
2023-04-25 14:45:56 +02:00
match args.path_to_ghostscript with
| "" ->
Pdfe.log "Please supply path to gs with -gs\n";
exit 2
| _ ->
Pdfe.log "CPDF could not mend. Attempting to mend file with gs\n";
let tmpout = Filename.temp_file "cpdf" ".pdf" in
tempfiles := tmpout::!tempfiles;
let gscall =
Filename.quote_command args.path_to_ghostscript
((if args.gs_quiet then ["-dQUIET"] else []) @
["-dNOPAUSE"; "-sDEVICE=pdfwrite"; "-sOUTPUTFILE=" ^ tmpout; "-dBATCH"; filename])
in
match Sys.command gscall with
| 0 -> Pdfe.log "Succeeded!\n"; tmpout
| _ -> Pdfe.log "Could not fix malformed PDF file, even with gs\n"; exit 2
2019-07-02 15:10:42 +02:00
exception StdInBytes of bytes
let pdf_of_stdin ?revision user_pw owner_pw =
2019-07-02 15:10:42 +02:00
let rbytes = ref (mkbytes 0) in
try
let user_pw = Some user_pw
and owner_pw = if owner_pw = "" then None else Some owner_pw in
let o, bytes = Pdfio.input_output_of_bytes 16384 in
try
while true do o.Pdfio.output_char (input_char stdin) done;
Pdf.empty ()
with
End_of_file ->
let thebytes = Pdfio.extract_bytes_from_input_output o bytes in
rbytes := thebytes;
let i = Pdfio.input_of_bytes thebytes in
pdfread_pdf_of_input ?revision user_pw owner_pw i
with
_ -> raise (StdInBytes !rbytes)
2014-10-15 18:51:15 +02:00
2019-07-02 16:50:36 +02:00
let rec get_single_pdf ?(decrypt=true) ?(fail=false) op read_lazy =
2019-07-02 15:10:42 +02:00
let failout () =
if fail then begin
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
2023-04-25 14:45:56 +02:00
Pdfe.log "Failed to read gs-reconstructed PDF even though gs succeeded\n";
2019-07-02 15:10:42 +02:00
exit 2
end
in
let warn_gs () =
2019-07-12 15:53:55 +02:00
begin match args.inputs with
(InFile inname, _, _, _, _, _)::_ ->
begin try ignore (close_in (open_in_bin inname)) with _ ->
2023-04-25 14:45:56 +02:00
Pdfe.log (Printf.sprintf "File %s does not exist\n" inname);
2019-07-12 15:53:55 +02:00
exit 2
end
| _ -> ()
end;
2023-04-25 14:45:56 +02:00
Pdfe.log "get_single_pdf: failed to read malformed PDF file. Consider using -gs-malformed\n";
2019-07-02 15:10:42 +02:00
exit 2
in
2014-10-15 18:51:15 +02:00
match args.inputs with
| (InFile inname, x, u, o, y, revision) as input::more ->
2014-10-15 18:51:15 +02:00
if args.squeeze then
Printf.printf "Initial file size is %i bytes\n" (filesize inname);
let pdf =
try
if read_lazy then
pdfread_pdf_of_channel_lazy ?revision (optstring u) (optstring o) (open_in_bin inname)
else
pdfread_pdf_of_file ?revision (optstring u) (optstring o) inname
with
2021-10-02 13:22:59 +02:00
| Cpdferror.SoftError _ as e -> raise e (* Bad owner or user password *)
| _ ->
if args.gs_malformed then
begin
2019-07-02 15:10:42 +02:00
failout ();
let newname = mend_pdf_file_with_ghostscript inname in
args.inputs <- (InFile newname, x, u, o, y, revision)::more;
get_single_pdf ~fail:true op read_lazy
end
else
2019-07-02 15:10:42 +02:00
warn_gs ()
in
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
2019-07-02 16:50:36 +02:00
if decrypt then decrypt_if_necessary input op pdf else pdf
2019-07-02 15:10:42 +02:00
| (StdIn, x, u, o, y, revision) as input::more ->
let pdf =
try pdf_of_stdin ?revision u o with
StdInBytes b ->
if args.gs_malformed then
begin
2019-07-02 15:10:42 +02:00
failout ();
let inname = Filename.temp_file "cpdf" ".pdf" in
tempfiles := inname::!tempfiles;
let fh = open_out_bin inname in
Pdfio.bytes_to_output_channel fh b;
close_out fh;
let newname = mend_pdf_file_with_ghostscript inname in
args.inputs <- (InFile newname, x, u, o, y, revision)::more;
get_single_pdf ~fail:true op read_lazy
end
2019-07-02 15:10:42 +02:00
else
warn_gs ()
2014-10-15 18:51:15 +02:00
in
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
2019-07-02 16:50:36 +02:00
if decrypt then decrypt_if_necessary input op pdf else pdf
2023-12-05 13:20:03 +01:00
| (AlreadyInMemory (pdf, s), _, _, _, _, _)::_ -> pdf
2014-10-15 18:51:15 +02:00
| _ ->
raise (Arg.Bad "cpdf: No input specified.\n")
let get_single_pdf_nodecrypt read_lazy =
2019-07-02 16:50:36 +02:00
get_single_pdf ~decrypt:false None read_lazy
let filenames = null_hash ()
2019-08-16 16:16:21 +02:00
let squeeze_logto filename x =
let fh = open_out_gen [Open_wronly; Open_creat] 0o666 filename in
seek_out fh (out_channel_length fh);
output_string fh x;
close_out fh
2019-07-02 16:50:36 +02:00
(* This now memoizes on the name of the file to make sure we only load each
file once *)
2019-07-02 17:50:39 +02:00
let rec get_pdf_from_input_kind ?(read_lazy=false) ?(decrypt=true) ?(fail=false) ((_, x, u, o, y, revision) as input) op ik =
2019-07-02 16:50:36 +02:00
let failout () =
if fail then begin
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
2023-04-25 14:45:56 +02:00
Pdfe.log "Failed to read gs-reconstructed PDF even though gs succeeded\n";
2019-07-02 16:50:36 +02:00
exit 2
end
in
let warn_gs () =
2019-07-12 15:53:55 +02:00
begin match input with
(InFile inname, _, _, _, _, _) ->
begin try ignore (close_in (open_in_bin inname)) with _ ->
2023-04-25 14:45:56 +02:00
Pdfe.log (Printf.sprintf "File %s does not exist\n" inname);
2019-07-12 15:53:55 +02:00
exit 2
end
| _ -> ()
end;
2023-04-25 14:45:56 +02:00
Pdfe.log "get_pdf_from_input_kind: failed to read malformed PDF file. Consider using -gs-malformed\n";
2019-07-02 16:50:36 +02:00
exit 2
in
match ik with
2023-12-05 13:20:03 +01:00
| AlreadyInMemory (pdf, _) -> pdf
2019-07-02 16:50:36 +02:00
| InFile s ->
2014-10-15 18:51:15 +02:00
if args.squeeze then
2019-07-02 16:50:36 +02:00
begin
let size = filesize s in
initial_file_size := size;
2019-08-16 16:16:21 +02:00
let str = Printf.sprintf "Initial file size is %i bytes\n" size in
begin match !logto with
| None -> print_string str
| Some filename -> squeeze_logto filename str
end
2019-07-02 16:50:36 +02:00
end;
begin try Hashtbl.find filenames s with
Not_found ->
let pdf =
2019-07-02 17:50:39 +02:00
try
if read_lazy then
pdfread_pdf_of_channel_lazy ?revision (optstring u) (optstring o) (open_in_bin s)
else
pdfread_pdf_of_file ?revision (optstring u) (optstring o) s
with
2021-10-02 13:22:59 +02:00
| Cpdferror.SoftError _ as e -> raise e (* Bad owner or user password *)
| e ->
Printf.printf "%s\n" (Printexc.to_string e);
2019-07-02 16:50:36 +02:00
if args.gs_malformed then
begin
failout ();
let newname = mend_pdf_file_with_ghostscript s in
get_pdf_from_input_kind ~fail:true (InFile newname, x, u, o, y, revision) op (InFile newname);
end
else
warn_gs ()
in
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
let pdf = if decrypt then decrypt_if_necessary input op pdf else pdf in
Hashtbl.add filenames s pdf; pdf
end
| StdIn ->
let pdf =
try pdf_of_stdin ?revision u o with
StdInBytes b ->
if args.gs_malformed then
begin
failout ();
let inname = Filename.temp_file "cpdf" ".pdf" in
tempfiles := inname::!tempfiles;
let fh = open_out_bin inname in
Pdfio.bytes_to_output_channel fh b;
close_out fh;
let newname = mend_pdf_file_with_ghostscript inname in
get_pdf_from_input_kind ~fail:true (InFile newname, x, u, o, y, revision) op (InFile newname);
end
else
warn_gs ()
in
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
if decrypt then decrypt_if_necessary input op pdf else pdf
2014-10-15 18:51:15 +02:00
2015-01-22 20:16:56 +01:00
let rec unescape_octals prev = function
| [] -> rev prev
| '\\'::('0'..'9' as a)::('0'..'9' as b)::('0'..'9' as c)::t ->
let chr = char_of_int (int_of_string ("0o" ^ implode [a;b;c])) in
unescape_octals (chr::prev) t
| '\\'::'\\'::t -> unescape_octals ('\\'::prev) t
| h::t -> unescape_octals (h::prev) t
let unescape_octals s =
implode (unescape_octals [] (explode s))
let process s =
2021-12-19 13:55:06 +01:00
if args.encoding <> Cpdfmetadata.Raw
2015-01-22 20:16:56 +01:00
then Pdftext.pdfdocstring_of_utf8 s
else unescape_octals s
let set_producer s pdf =
2021-12-19 13:55:06 +01:00
ignore (Cpdfmetadata.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf)
2015-01-22 20:16:56 +01:00
let set_creator s pdf =
2021-12-19 13:55:06 +01:00
ignore (Cpdfmetadata.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf)
2015-01-22 20:16:56 +01:00
let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outname =
2015-01-22 20:16:56 +01:00
if args.producer <> None then set_producer (unopt args.producer) pdf;
if noncomp &&
(match args.op with Some (SetProducer _) -> false | _ -> match args.producer with None -> true | _ -> false)
then
set_producer "cpdf non-commercial use only. To buy: https://coherentpdf.com/" pdf;
2015-01-22 20:16:56 +01:00
if args.creator <> None then set_creator (unopt args.creator) pdf;
2023-04-25 14:45:56 +02:00
if args.debugcrypt then Printf.printf "really_write_pdf\n";
2014-12-02 16:12:28 +01:00
let will_linearize =
args.linearize || args.keeplinearize && pdf.Pdf.was_linearized
in
2014-10-02 14:32:30 +02:00
let outname' =
2014-12-02 16:12:28 +01:00
if will_linearize then Filename.temp_file "cpdflin" ".pdf" else outname
2014-10-02 14:32:30 +02:00
in
if args.debugcrypt then
2014-11-19 18:36:02 +01:00
Printf.printf "args.recrypt = %b, args.was_encrypted = %b\n"
args.recrypt args.was_encrypted;
2014-10-24 18:24:29 +02:00
begin
if args.recrypt && args.was_encrypted then
begin
2014-11-19 18:36:02 +01:00
if args.debugcrypt then
Printf.printf "Recrypting in really_write_pdf\n";
2014-10-30 18:00:00 +01:00
match args.inputs with
[] -> raise (Pdf.PDFError "no input in recryption")
| (_, _, user_pw, owner_pw, _, _)::_ ->
2014-11-19 18:36:02 +01:00
let best_password =
if owner_pw <> "" then owner_pw else user_pw
in
2014-11-02 15:59:37 +01:00
Pdfwrite.pdf_to_file_options
~preserve_objstm:args.preserve_objstm
~generate_objstm:args.create_objstm
~compress_objstm:(not is_decompress)
~recrypt:(Some best_password)
2023-04-23 22:00:46 +02:00
None mk_id pdf outname'
2014-10-24 18:24:29 +02:00
end
else
begin
2018-03-20 16:53:25 +01:00
if args.debugforce || not args.was_encrypted || args.was_decrypted_with_owner then
begin
2014-11-19 18:36:02 +01:00
if args.debugcrypt then
Printf.printf "Pdf to file in really_write_pdf\n";
Pdfwrite.pdf_to_file_options
~preserve_objstm:args.preserve_objstm
~generate_objstm:args.create_objstm
~compress_objstm:(not is_decompress)
2023-04-23 22:00:46 +02:00
encryption mk_id pdf outname'
end
else
2014-11-19 18:36:02 +01:00
soft_error
2021-10-12 19:58:37 +02:00
"You must supply -recrypt here, or add -decrypt-force, or provide the owner password."
2014-10-24 18:24:29 +02:00
end
2014-10-15 18:51:15 +02:00
end;
2014-10-14 20:36:57 +02:00
begin
2014-12-02 16:12:28 +01:00
if will_linearize then
2021-12-18 17:14:31 +01:00
let cpdflin = find_cpdflin args.cpdflin in
2014-11-19 18:36:02 +01:00
match args.inputs with
[] -> raise (Pdf.PDFError "no input in recryption")
| (_, _, user_pw, owner_pw, _, _)::_ ->
2014-11-19 18:36:02 +01:00
let best_password =
if owner_pw <> "" then owner_pw else user_pw
in
let code =
2021-12-18 17:14:31 +01:00
call_cpdflin cpdflin outname' outname best_password
2014-11-19 18:36:02 +01:00
in
if code > 0 then
begin
begin try Sys.remove outname with _ -> () end;
Sys.rename outname' outname;
soft_error
"Linearizer failed with above error. \
File written without linearization."
end
else
begin try Sys.remove outname' with _ -> () end;
2014-10-14 20:36:57 +02:00
end;
if args.squeeze then
let s = filesize outname in
2019-08-16 16:16:21 +02:00
begin
let str =
Printf.sprintf
"Final file size is %i bytes, %.2f%% of original.\n"
s
((float s /. float !initial_file_size) *. 100.)
in
match !logto with
| None -> print_string str
| Some filename -> squeeze_logto filename str
end
2014-10-02 14:32:30 +02:00
2014-10-02 15:19:05 +02:00
let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
if args.debugcrypt then Printf.printf "write_pdf\n";
if args.create_objstm && not (args.keepversion || pdf.Pdf.major > 1)
2013-08-20 16:32:57 +02:00
then pdf.Pdf.minor <- max pdf.Pdf.minor 5;
match args.out with
| NoOutputSpecified ->
output_pdfs =| pdf
| File outname ->
2014-10-14 20:36:57 +02:00
begin match encryption with
None ->
2021-10-15 18:18:23 +02:00
if not is_decompress then
begin
2021-12-18 17:26:33 +01:00
ignore (Cpdfsqueeze.recompress_pdf pdf);
2023-06-21 16:38:32 +02:00
if args.squeeze then Cpdfsqueeze.squeeze ~pagedata:args.squeeze_pagedata ?logto:!logto pdf;
2021-10-15 18:18:23 +02:00
end;
Pdf.remove_unreferenced pdf;
really_write_pdf ~is_decompress mk_id pdf outname
2014-10-14 20:36:57 +02:00
| Some _ ->
really_write_pdf ~encryption ~is_decompress mk_id pdf outname
2014-10-14 20:36:57 +02:00
end
2013-08-20 16:32:57 +02:00
| Stdout ->
let temp = Filename.temp_file "cpdfstdout" ".pdf" in
2014-10-02 14:32:30 +02:00
begin match encryption with
None ->
2021-10-15 18:18:23 +02:00
if not is_decompress then
begin
2021-12-18 17:26:33 +01:00
ignore (Cpdfsqueeze.recompress_pdf pdf);
2023-06-21 16:38:32 +02:00
if args.squeeze then Cpdfsqueeze.squeeze ~pagedata:args.squeeze_pagedata ?logto:!logto pdf;
2021-10-15 18:18:23 +02:00
Pdf.remove_unreferenced pdf
end;
really_write_pdf ~encryption ~is_decompress mk_id pdf temp;
2014-10-02 14:32:30 +02:00
| Some _ ->
really_write_pdf ~encryption ~is_decompress mk_id pdf temp
2014-10-02 14:32:30 +02:00
end;
let temp_file = open_in_bin temp in
try
while true do output_char stdout (input_char temp_file) done;
assert false
with
End_of_file ->
2018-02-13 19:14:48 +01:00
begin try close_in temp_file; Sys.remove temp with
2023-04-25 14:45:56 +02:00
e -> Pdfe.log (Printf.sprintf "Failed to remove temp file %s (%s)\n" temp (Printexc.to_string e))
end;
flush stdout (*r For Windows *)
2013-08-20 16:32:57 +02:00
(* Find the stem of a filename *)
let stem s =
2014-10-28 19:40:56 +01:00
implode
(rev (tail_no_fail
(dropwhile
(neq '.') (rev (explode (Filename.basename s))))))
let fast_write_split_pdfs
2023-10-30 19:30:49 +01:00
?(names=[]) enc splitlevel original_filename sq spec main_pdf pagenums pdf_pages
=
let marks = Pdfmarks.read_bookmarks main_pdf in
iter2
(fun number pagenums ->
2021-10-15 18:18:23 +02:00
let pdf = Pdfpage.pdf_of_pages main_pdf pagenums in
let startpage, endpage = extremes pagenums in
2014-10-28 19:40:56 +01:00
let name =
2023-10-30 19:30:49 +01:00
if names <> [] then List.nth names (number - 1) else
Cpdfbookmarks.name_of_spec
args.encoding marks main_pdf splitlevel spec number
(stem original_filename) startpage endpage
2014-10-28 19:40:56 +01:00
in
Pdf.remove_unreferenced pdf;
2023-06-21 16:38:32 +02:00
if sq then Cpdfsqueeze.squeeze ~pagedata:args.squeeze_pagedata ?logto:!logto pdf;
really_write_pdf ~encryption:enc (not (enc = None)) pdf name)
(indx pagenums)
pagenums
(* Return list, in order, a *set* of page numbers of bookmarks at a given level *)
let bookmark_pages level pdf =
2017-05-28 20:19:17 +02:00
let refnums = Pdf.page_reference_numbers pdf in
2017-05-29 15:39:01 +02:00
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
2017-05-28 20:19:17 +02:00
setify_preserving_order
(option_map
(function
l when l.Pdfmarks.level = level ->
2017-05-29 15:39:01 +02:00
Some (Pdfpage.pagenumber_of_target ~fastrefnums pdf l.Pdfmarks.target)
2017-05-28 20:19:17 +02:00
| _ -> None)
(Pdfmarks.read_bookmarks pdf))
let split_at_bookmarks
2021-10-15 18:18:23 +02:00
enc original_filename ~squeeze level spec pdf
=
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
let points = bookmark_pages level pdf in
let points =
lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points)
in
let pts = splitat points (indx pdf_pages) in
fast_write_split_pdfs
2021-10-15 18:18:23 +02:00
enc level original_filename squeeze spec pdf pts pdf_pages
let split_pdf
enc original_filename
chunksize linearize ~cpdflin ~preserve_objstm ~create_objstm ~squeeze
2021-10-15 18:18:23 +02:00
spec pdf
=
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
fast_write_split_pdfs
2021-10-15 18:18:23 +02:00
enc 0 original_filename squeeze spec pdf
2014-11-03 20:19:12 +01:00
(splitinto chunksize (indx pdf_pages)) pdf_pages
2013-08-20 16:32:57 +02:00
2023-10-26 14:46:51 +02:00
(* Given a PDF, write the split as if we had selected pages, and return its filesize. Delete it. *)
let split_max_fits pdf s p q =
2023-10-30 15:21:49 +01:00
if q < p then error "split_max_fits" else
2023-10-26 14:46:51 +02:00
let filename = Filename.temp_file "cpdf" "sm" in
let range = ilist p q in
let newpdf = Pdfpage.pdf_of_pages ~retain_numbering:args.retain_numbering pdf range in
let r = args.out in
args.out <- File filename;
write_pdf false newpdf;
args.out <- r;
let fh = open_in_bin filename in
let size = in_channel_length fh in
2023-10-31 13:25:54 +01:00
begin try close_in fh; Sys.remove filename with _ -> () end;
size <= s
(* Binary search on q from current value down to p to find max which fits. Returns q. Upon failure, returns -1 *)
2023-10-26 17:32:48 +02:00
let rec split_max_search pdf s b p q =
if p = q then
2023-10-26 17:32:48 +02:00
if split_max_fits pdf s b q then q else -1
else
2023-10-26 17:51:54 +02:00
let half = (q + p) / 2 in
if split_max_fits pdf s b (half + 1)
then split_max_search pdf s b (half + 1) q
else split_max_search pdf s b p half
2023-10-26 14:46:51 +02:00
let rec split_max enc original_filename ~squeeze output_spec s pdf =
let outs = ref [] in
let p = ref 1 in
let endpage = Pdfpage.endpage pdf in
let q = ref endpage in
2023-10-26 18:59:57 +02:00
while !p < !q || !p = endpage do
2023-10-26 17:32:48 +02:00
let newq = split_max_search pdf s !p !p !q in
2023-10-30 15:21:49 +01:00
if newq = -1 then (Printf.eprintf "Failed to make small enough split at page %i. No files written.\n" !p; exit 2) else
begin
2023-10-31 13:25:54 +01:00
(*Printf.printf "Pages %i-%i will fit...\n%!" !p newq;*)
outs := ilist !p newq::!outs;
p := newq + 1;
q := endpage
end
done;
fast_write_split_pdfs enc 0 original_filename squeeze output_spec pdf (rev !outs) (Pdfpage.pages_of_pagetree pdf)
2023-10-25 19:15:19 +02:00
2013-08-20 16:32:57 +02:00
let getencryption pdf =
match Pdfread.what_encryption pdf with
| None | Some Pdfwrite.AlreadyEncrypted -> "Not encrypted"
| Some Pdfwrite.PDF40bit -> "40bit"
| Some Pdfwrite.PDF128bit -> "128bit"
| Some (Pdfwrite.AES128bit true) -> "128bit AES, Metadata encrypted"
| Some (Pdfwrite.AES128bit false) -> "128bit AES, Metadata not encrypted"
| Some (Pdfwrite.AES256bit true) -> "256bit AES, Metadata encrypted"
| Some (Pdfwrite.AES256bit false) -> "256bit AES, Metadata not encrypted"
| Some (Pdfwrite.AES256bitISO true) -> "256bit AES ISO, Metadata encrypted"
| Some (Pdfwrite.AES256bitISO false) -> "256bit AES ISO, Metadata not encrypted"
(* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *)
let equalize_pages_extend main stamp =
let length_stamp = Pdfpage.endpage stamp
in let length_main = Pdfpage.endpage main
in let extend_lastpage lastpage page len =
Pdfpage.change_pages true page (Pdfpage.pages_of_pagetree page @ (many lastpage len))
in let chop pdf n =
Pdfpage.change_pages true pdf (take (Pdfpage.pages_of_pagetree pdf) n)
in
if length_stamp > length_main
then chop stamp length_main
else extend_lastpage (last (Pdfpage.pages_of_pagetree stamp)) stamp (length_main - length_stamp)
2020-01-30 11:20:33 +01:00
let write_json output pdf =
match output with
| NoOutputSpecified ->
error "-output-json: no output name specified"
| Stdout ->
2021-10-04 19:38:36 +02:00
Cpdfjson.to_output
(Pdfio.output_of_channel stdout)
2023-01-17 06:37:54 +01:00
~utf8:(args.encoding = Cpdfmetadata.UTF8)
2021-10-12 16:35:08 +02:00
~parse_content:args.jsonparsecontentstreams
~no_stream_data:args.jsonnostreamdata
~decompress_streams:args.jsondecompressstreams
2021-12-30 16:25:24 +01:00
~clean_strings:args.jsoncleanstrings
2021-10-04 19:38:36 +02:00
pdf
2020-01-30 11:20:33 +01:00
| File filename ->
let f = open_out filename in
2021-10-04 19:38:36 +02:00
Cpdfjson.to_output
(Pdfio.output_of_channel f)
2023-01-17 06:37:54 +01:00
~utf8:(args.encoding = Cpdfmetadata.UTF8)
2021-10-12 16:35:08 +02:00
~parse_content:args.jsonparsecontentstreams
~no_stream_data:args.jsonnostreamdata
~decompress_streams:args.jsondecompressstreams
2021-12-30 16:25:24 +01:00
~clean_strings:args.jsoncleanstrings
2021-10-04 19:38:36 +02:00
pdf;
2020-01-30 11:20:33 +01:00
close_out f
2021-10-16 16:47:41 +02:00
let collate (names, pdfs, ranges) =
let ois = map ref (combine3 names pdfs ranges) in
let nis = ref [] in
while flatten (map (fun {contents = (_, _, r)} -> r) ois) <> [] do
iter
(fun ({contents = (name, pdf, range)} as r) ->
match range with
| [] -> ()
| h::t ->
nis := (name, pdf, [h])::!nis;
r := (name, pdf, t))
ois
done;
split3 (rev !nis)
2022-01-02 16:18:55 +01:00
let warn_prerotate range pdf =
2024-01-24 19:40:20 +01:00
if not args.prerotate && not (Cpdfpage.alluprightonly range pdf) then
Pdfe.log "Some pages in the range have non-zero rotation. \
2023-04-25 14:45:56 +02:00
Consider adding -prerotate or pre-processing with -upright. \
To silence this warning use -no-warn-rotate\n"
2022-01-02 16:18:55 +01:00
let prerotate range pdf =
Cpdfpage.upright ~fast:args.fast range pdf
2023-02-20 20:39:42 +01:00
let check_bookmarks_mistake () =
if args.merge_add_bookmarks_use_titles && not args.merge_add_bookmarks then
2023-04-25 14:45:56 +02:00
Pdfe.log "Warning: -merge-add-bookmarks-use-titles is for use with -merge-add-bookmarks\n"
2023-02-20 20:39:42 +01:00
2023-02-28 17:55:35 +01:00
let check_clashing_output_name () =
match args.out with
| File s ->
if (List.exists (function (InFile s', _, _, _, _, _) when s = s' -> true | _ -> false) args.inputs) then
2023-04-25 14:45:56 +02:00
Pdfe.log "Warning: output file name clashes with input file name. Malformed file may result.\n"
2023-02-28 17:55:35 +01:00
| _ -> ()
2023-10-30 18:55:19 +01:00
let build_enc () =
match args.crypt_method with
| "" -> None
| _ ->
Some
{Pdfwrite.encryption_method =
(match args.crypt_method with
| "40bit" -> Pdfwrite.PDF40bit
| "128bit" -> Pdfwrite.PDF128bit
| "AES" -> Pdfwrite.AES128bit args.encrypt_metadata
| "AES256" -> Pdfwrite.AES256bit args.encrypt_metadata
| "AES256ISO" -> Pdfwrite.AES256bitISO args.encrypt_metadata
| _ -> assert false (* Pre-checked *));
Pdfwrite.owner_password = args.owner;
Pdfwrite.user_password = args.user;
Pdfwrite.permissions = banlist_of_args ()}
2024-01-18 17:20:51 +01:00
let extract_stream pdf decomp objnum =
let obj = Pdf.lookup_obj pdf objnum in
Pdf.getstream obj;
if decomp then Pdfcodec.decode_pdfstream_until_unknown pdf obj;
let data =
match obj with
| Pdf.Stream {contents = (_, Pdf.Got x)} -> x
| _ -> mkbytes 0
in
match args.out with
| NoOutputSpecified ->
()
| File outname ->
let fh = open_out_bin outname in
output_string fh (Pdfio.string_of_bytes data);
close_out fh
| Stdout ->
output_string stdout (Pdfio.string_of_bytes data)
2024-01-17 19:37:58 +01:00
let print_obj pdf objnum =
let obj = if objnum = 0 then pdf.Pdf.trailerdict else Pdf.lookup_obj pdf objnum in
2024-01-17 21:10:07 +01:00
Printf.printf "%S\n" (Pdfwrite.string_of_pdf obj)
2024-01-17 19:37:58 +01:00
2014-10-14 20:45:10 +02:00
(* Main function *)
2013-08-20 16:32:57 +02:00
let go () =
2023-02-20 20:39:42 +01:00
check_bookmarks_mistake ();
2023-02-28 17:55:35 +01:00
check_clashing_output_name ();
2013-08-20 16:32:57 +02:00
match args.op with
| Some Version ->
flprint
("cpdf Version " ^ string_of_int major_version ^ "." ^ string_of_int minor_version ^ " " ^ version_date ^ "\n")
| None | Some Merge ->
begin match args.out, args.inputs with
| _, (_::_ as inputs) ->
2014-10-28 19:40:56 +01:00
let op = match inputs with [_] -> None | _ -> Some Merge in
let names, ranges, rotations, _, _, _ = split6 inputs in
2014-10-28 19:40:56 +01:00
let pdfs = map2 (fun i -> get_pdf_from_input_kind i op) inputs names in
2013-08-20 16:32:57 +02:00
(* If at least one file had object streams and args.preserve_objstm is true, set -objstm-create *)
if args.preserve_objstm then
iter
(fun pdf ->
if Hashtbl.length pdf.Pdf.objects.Pdf.object_stream_ids > 0
then args.create_objstm <- true)
pdfs;
begin match pdfs with
| [pdf] ->
2023-04-12 20:20:47 +02:00
if hd ranges <> "all" then
2014-10-14 20:36:57 +02:00
let range = parse_pagespec pdf (hd ranges) in
let newpdf = Pdfpage.pdf_of_pages ~retain_numbering:args.retain_numbering pdf range in
write_pdf false newpdf
2013-08-20 16:32:57 +02:00
else
write_pdf false pdf
| _ ->
(* We check permissions. A merge is allowed if each file
2014-11-05 15:05:20 +01:00
included was (a) not encrypted (detected by the absence of
saved encryption information in the PDF, or (b) decrypted using
the owner password (stored in the input) *)
if
2018-03-20 16:43:15 +01:00
(not args.debugforce) &&
(not
2014-11-05 15:05:20 +01:00
(fold_left ( && ) true
(map2
(fun (_, _, _, _, was_dec_with_owner, _) pdf ->
2014-11-05 15:05:20 +01:00
!was_dec_with_owner || pdf.Pdf.saved_encryption = None)
inputs
2018-03-20 16:43:15 +01:00
pdfs)))
2014-11-05 15:05:20 +01:00
then
2021-10-12 19:58:37 +02:00
soft_error "Merge requires the owner password for all encrypted files, or -decrypt-force."
2014-11-05 15:05:20 +01:00
else
2019-07-03 15:40:32 +02:00
let pdfs =
if args.merge_add_bookmarks then
2020-03-04 19:50:32 +01:00
map2
2021-12-21 16:06:40 +01:00
(fun filename pdf -> Cpdfbookmarks.add_bookmark_title filename args.merge_add_bookmarks_use_titles pdf)
2020-03-04 19:50:32 +01:00
(map (function InFile s -> s | StdIn -> "" | AlreadyInMemory _ -> "") names)
2019-07-03 15:40:32 +02:00
pdfs
else
pdfs
in
2014-11-05 15:05:20 +01:00
(* If args.keep_this_id is set, change the ID to the one from the kept one *)
let rangenums = map2 parse_pagespec pdfs ranges in
2021-10-16 16:47:41 +02:00
(* At this point, we have the information for collation. *)
let names = map string_of_input_kind names in
let names, pdfs, rangenums =
(if args.collate then collate else Fun.id) (names, pdfs, rangenums)
in
2014-11-05 15:05:20 +01:00
let outpdf =
2019-07-03 15:40:32 +02:00
Pdfmerge.merge_pdfs
args.retain_numbering args.remove_duplicate_fonts ~struct_hierarchy:args.process_struct_trees names pdfs rangenums
2014-11-05 15:05:20 +01:00
in
2023-12-05 13:56:56 +01:00
if args.remove_duplicate_streams then Pdfmerge.remove_duplicate_fonts outpdf; (* JBIG2 Globals *)
2014-11-05 15:05:20 +01:00
write_pdf false outpdf
2013-08-20 16:32:57 +02:00
end
| _ ->
match args.op with
| Some Merge ->
error "Merge: Must specify one output and at least one input"
| None ->
error "Must specify one output and at least one input"
| _ -> assert false
end
| Some (CopyFont fromfile) ->
begin match args.inputs, args.out with
| (_, pagespec, u, o, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf (Some (CopyFont fromfile)) false
and frompdf = pdfread_pdf_of_file (optstring u) (optstring o) fromfile in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2013-08-20 16:32:57 +02:00
let copyfontname =
match args.copyfontname with
| Some x -> x
| None -> failwith "copy_font: no font name given"
in
2021-10-18 17:22:38 +02:00
let outpdf = Cpdffont.copy_font frompdf copyfontname args.copyfontpage range pdf in
2013-08-20 16:32:57 +02:00
write_pdf true outpdf
| _ -> error "copyfont: bad command line"
end
| Some RemoveFonts ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf (Some RemoveFonts) false in
2021-11-12 23:09:49 +01:00
write_pdf true (Cpdffont.remove_fonts pdf)
2013-08-20 16:32:57 +02:00
| _ -> error "remove fonts: bad command line"
end
2023-11-02 19:49:15 +01:00
| Some (ExtractFontFile spec) ->
2013-08-20 16:32:57 +02:00
begin match args.inputs, args.out with
2023-11-02 19:49:15 +01:00
| (_, pagespec, u, o, _, _)::_, File filename ->
let pdf = get_single_pdf (Some (ExtractFontFile spec)) false in
begin match String.split_on_char ',' spec with
| [pnum; name] ->
begin try Cpdffont.extract_fontfile (int_of_string pnum) name filename pdf with
Failure _ (*"int_of_string"*) -> error "extract font: bad page number"
end
| _ -> error "extract font: bad specification"
end
2013-08-20 16:32:57 +02:00
| _ -> error "extract fontfile: bad command line"
end
| Some CountPages ->
2019-07-02 16:50:36 +02:00
begin match args.inputs with
[(ik, _, _, _, _, _) as input] ->
2019-07-02 17:50:39 +02:00
let pdf = get_pdf_from_input_kind ~read_lazy:true ~decrypt:false input (Some CountPages) ik in
2019-07-02 16:50:36 +02:00
output_page_count pdf
| _ -> raise (Arg.Bad "CountPages: must have a single input file only")
end
2013-08-20 16:32:57 +02:00
| Some Clean ->
2019-10-21 12:28:40 +02:00
let pdf' = get_single_pdf (Some Clean) false in
write_pdf false pdf'
2013-08-20 16:32:57 +02:00
| Some Info ->
2019-10-26 15:10:03 +02:00
let pdf, inname, input =
match args.inputs with
| (InFile inname, _, u, o, _, _) as input::_ ->
pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname), inname, input
| (StdIn, _, u, o, _, _) as input::_ -> pdf_of_stdin u o, "", input
2023-12-05 13:20:03 +01:00
| (AlreadyInMemory (pdf, _), _, _, _, _, _) as input::_ -> pdf, "", input
2019-10-26 15:10:03 +02:00
| _ -> raise (Arg.Bad "cpdf: No input specified.\n")
in
2023-11-01 15:55:14 +01:00
let json = ref [] in
if args.format_json
then json =| ("Encryption", `String (getencryption pdf))
else Printf.printf "Encryption: %s\n" (getencryption pdf);
if args.format_json
then json =| ("Permissions", `List (map (fun p -> `String (string_of_permission p)) (Pdfread.permissions pdf)))
else Printf.printf "Permissions: %s\n" (getpermissions pdf);
2019-10-26 15:10:03 +02:00
if inname <> "" then
2023-11-01 15:55:14 +01:00
let lin = Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname)) in
if args.format_json then
json =| ("Linearized", `Bool lin) else Printf.printf "Linearized: %b\n" lin;
let objstm = length (list_of_hashtbl pdf.Pdf.objects.Pdf.object_stream_ids) > 0 in
if args.format_json
then json =| ("Object streams", `Bool objstm)
else Printf.printf "Object streams: %b\n" objstm;
let ida, idb =
match Pdf.lookup_direct pdf "/ID" pdf.Pdf.trailerdict with
| Some (Pdf.Array [Pdf.String s; Pdf.String s']) ->
(Pdfwrite.make_hex_pdf_string s, Pdfwrite.make_hex_pdf_string s')
| _ -> "", ""
in
let fixid s = implode (rev (tl (rev (tl (explode s))))) in
if args.format_json
then json =| ("ID", if ida ^ idb = "" then `Null else `List [`String (fixid ida); `String (fixid idb)])
2024-02-14 16:05:46 +01:00
else (if ida ^ idb = "" then Printf.printf "ID: None\n" else Printf.printf "ID: %s %s\n" ida idb);
2019-10-26 15:10:03 +02:00
let pdf = decrypt_if_necessary input (Some Info) pdf in
2023-11-01 15:55:14 +01:00
if args.format_json then
2023-11-01 21:47:52 +01:00
begin
Cpdfmetadata.output_info ~json Cpdfmetadata.UTF8 pdf;
Cpdfmetadata.output_xmp_info ~json Cpdfmetadata.UTF8 pdf;
flprint (Cpdfyojson.Safe.pretty_to_string (`Assoc (rev !json)))
end
2023-11-01 15:55:14 +01:00
else
begin
Cpdfmetadata.output_info args.encoding pdf;
Cpdfmetadata.output_xmp_info args.encoding pdf
end
2013-08-20 16:32:57 +02:00
| Some PageInfo ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf args.op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2023-10-31 17:51:57 +01:00
Cpdfpage.output_page_info ~json:args.format_json pdf range
| _ -> error "list-bookmarks: bad command line"
end
2013-08-20 16:32:57 +02:00
| Some Metadata ->
2021-12-19 13:55:06 +01:00
Cpdfmetadata.print_metadata (get_single_pdf (Some Metadata) true)
2013-08-20 16:32:57 +02:00
| Some Fonts ->
2021-11-03 17:51:28 +01:00
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some Fonts) true in
let range = parse_pagespec_allow_empty pdf pagespec in
2023-10-31 17:23:20 +01:00
Cpdffont.print_fonts ~json:args.format_json pdf range
2021-11-03 17:51:28 +01:00
| _ -> error "-list-fonts: bad command line"
end
2013-08-20 16:32:57 +02:00
| Some ListBookmarks ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args.op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-21 14:44:46 +01:00
Cpdfbookmarks.list_bookmarks ~json:args.format_json args.encoding range pdf (Pdfio.output_of_channel stdout);
2013-08-20 16:32:57 +02:00
flush stdout
| _ -> error "list-bookmarks: bad command line"
end
| Some Crop ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some Crop) false in
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage.crop_pdf xywhlist pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error "crop: bad command line"
end
| Some Art ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some Art) false in
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage.crop_pdf ~box:"/ArtBox" xywhlist pdf range in
write_pdf false pdf
2019-08-11 20:39:12 +02:00
| _ -> error "art: bad command line"
end
| Some Bleed ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some Bleed) false in
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage.crop_pdf ~box:"/BleedBox" xywhlist pdf range in
write_pdf false pdf
2019-08-11 20:39:12 +02:00
| _ -> error "bleed: bad command line"
end
| Some Trim ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some Trim) false in
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage.crop_pdf ~box:"/TrimBox" xywhlist pdf range in
write_pdf false pdf
2019-08-11 20:39:12 +02:00
| _ -> error "trim: bad command line"
end
2013-08-20 16:32:57 +02:00
| Some MediaBox ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some MediaBox) false in
let xywhlist = Cpdfcoord.parse_rectangles pdf args.rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage.set_mediabox xywhlist pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error "set media box: bad command line"
end
2017-05-19 20:10:49 +02:00
| Some (HardBox box) ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some (HardBox box)) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage.hard_box pdf range box args.mediabox_if_missing args.fast in
2017-05-19 20:10:49 +02:00
write_pdf false pdf
| _ -> error "hard box: bad command line"
end
2013-08-20 16:32:57 +02:00
| Some CopyBox ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf (Some CopyBox) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2013-08-20 16:32:57 +02:00
let f, t =
begin match args.frombox, args.tobox with
| Some f, Some t -> f, t
| _ -> error "Copy box: no tobox or no frombox specified"
end
in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage.copy_box f t args.mediabox_if_missing pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error "Copy Box: bad command line"
end
| Some Decompress ->
let pdf = get_single_pdf (Some Decompress) false in
2013-08-20 16:32:57 +02:00
Pdf.iter_stream
(function stream ->
try Pdfcodec.decode_pdfstream_until_unknown pdf stream with
2023-04-25 14:45:56 +02:00
e -> Pdfe.log (Printf.sprintf "Decode failure: %s. Carrying on...\n" (Printexc.to_string e)); ())
2013-08-20 16:32:57 +02:00
pdf;
2020-11-11 15:26:48 +01:00
write_pdf ~is_decompress:true false pdf
2013-08-20 16:32:57 +02:00
| Some Compress ->
let pdf = get_single_pdf (Some Compress) false in
if args.remove_duplicate_streams then
Pdfmerge.remove_duplicate_fonts pdf;
2021-12-18 17:26:33 +01:00
write_pdf false (Cpdfsqueeze.recompress_pdf pdf)
2013-08-20 16:32:57 +02:00
| Some RemoveCrop ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf (Some RemoveCrop) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage.remove_cropping_pdf pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error "remove-crop: bad command line"
end
| Some RemoveArt ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some RemoveArt) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage.remove_art_pdf pdf range in
write_pdf false pdf
| _ -> error "remove-crop: bad command line"
end
| Some RemoveTrim ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some RemoveTrim) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage.remove_trim_pdf pdf range in
write_pdf false pdf
| _ -> error "remove-crop: bad command line"
end
| Some RemoveBleed ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
let pdf = get_single_pdf (Some RemoveBleed) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage.remove_bleed_pdf pdf range in
write_pdf false pdf
| _ -> error "remove-crop: bad command line"
end
2013-08-20 16:32:57 +02:00
| Some (Rotate _) | Some (Rotateby _) ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2013-08-20 16:32:57 +02:00
let rotate =
match args.op with
2021-12-22 09:58:56 +01:00
| Some (Rotate i) -> Cpdfpage.rotate_pdf i
| Some (Rotateby i) -> Cpdfpage.rotate_pdf_by i
2013-08-20 16:32:57 +02:00
| _ -> assert false
in
let pdf = rotate pdf range in
write_pdf false pdf
| _ -> error "rotate: bad command line"
end
| Some (RotateContents a) ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage.rotate_contents ~fast:args.fast a pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error "rotate-contents: bad command line"
end
| Some Upright ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage.upright ~fast:args.fast range pdf in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error "rotate-contents: bad command line"
end
| Some ((VFlip | HFlip) as flip) ->
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2013-08-20 16:32:57 +02:00
let pdf =
if flip = VFlip
2021-12-22 10:31:39 +01:00
then Cpdfpage.vflip_pdf ~fast:args.fast pdf range
else Cpdfpage.hflip_pdf ~fast:args.fast pdf range
2013-08-20 16:32:57 +02:00
in
write_pdf false pdf
| _ -> error "flip: bad command line"
end
| Some ((SetAuthor _ | SetTitle _ | SetSubject _ | SetKeywords _
| SetCreate _ | SetModify _ | SetCreator _ | SetProducer _
| SetTrapped | SetUntrapped) as op) ->
let key, value, version =
2021-12-19 13:55:06 +01:00
let f s = if args.encoding <> Cpdfmetadata.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in
2013-08-20 16:32:57 +02:00
match op with
| SetAuthor s -> "/Author", Pdf.String (f s), 0
| SetTitle s -> "/Title", Pdf.String (f s), 1
| SetSubject s -> "/Subject", Pdf.String (f s), 1
| SetKeywords s -> "/Keywords", Pdf.String (f s), 1
2021-12-19 13:55:06 +01:00
| SetCreate s -> "/CreationDate", Pdf.String (Cpdfmetadata.expand_date s), 0
| SetModify s -> "/ModDate", Pdf.String (Cpdfmetadata.expand_date s), 0
2013-08-20 16:32:57 +02:00
| SetCreator s -> "/Creator", Pdf.String (f s), 0
| SetProducer s -> "/Producer", Pdf.String (f s), 0
| SetTrapped -> "/Trapped", Pdf.Boolean true, 3
| SetUntrapped -> "/Trapped", Pdf.Boolean false, 3
| _ -> assert false
in
let pdf = get_single_pdf args.op false in
let version = if args.keepversion || pdf.Pdf.major > 1 then pdf.Pdf.minor else version in
2019-06-28 16:01:28 +02:00
write_pdf false
2021-12-19 13:55:06 +01:00
(Cpdfmetadata.set_pdf_info
2019-06-28 16:01:28 +02:00
~xmp_also:args.alsosetxml
~xmp_just_set:args.justsetxml
(key, value, version) pdf)
| Some (SetMetadataDate date) ->
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.set_metadata_date (get_single_pdf args.op false) date)
2013-08-20 16:32:57 +02:00
| Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _
| FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) ->
begin match args.out with
| _ ->
let key, value, version =
match op with
| HideToolbar s -> "/HideToolbar", Pdf.Boolean s, 0
| HideMenubar s -> "/HideMenubar", Pdf.Boolean s, 0
| HideWindowUI s -> "/HideWindowUI", Pdf.Boolean s, 0
| FitWindow s -> "/FitWindow", Pdf.Boolean s, 0
| CenterWindow s -> "/CenterWindow", Pdf.Boolean s, 0
| DisplayDocTitle s -> "/DisplayDocTitle", Pdf.Boolean s, 4
| _ -> assert false
in
let pdf = get_single_pdf args.op false in
let version = if args.keepversion || pdf.Pdf.major > 1 then pdf.Pdf.minor else version in
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.set_viewer_preference (key, value, version) pdf)
2013-08-20 16:32:57 +02:00
end
| Some (OpenAtPage str) ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf str in
let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.set_open_action pdf false n)
| Some (OpenAtPageFit str) ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf str in
2023-04-18 15:42:17 +02:00
let n = match range with [x] -> x | _ -> error "open_at_page_fit: range does not specify single page" in
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.set_open_action pdf true n)
2023-04-18 15:42:17 +02:00
| Some (OpenAtPageCustom dest) ->
let pdf = get_single_pdf args.op false in
write_pdf false (Cpdfmetadata.set_open_action ~dest pdf true 1)
2013-08-20 16:32:57 +02:00
| Some (SetMetadata metadata_file) ->
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.set_metadata args.keepversion metadata_file (get_single_pdf args.op false))
2013-08-20 16:32:57 +02:00
| Some (SetVersion v) ->
let pdf = get_single_pdf args.op false in
2019-08-01 15:34:45 +02:00
let pdf =
if v >= 10
then {pdf with Pdf.major = 2; Pdf.minor = v - 10}
else {pdf with Pdf.major = 1; Pdf.minor = v}
in
write_pdf false pdf
2013-08-20 16:32:57 +02:00
| Some (SetPageLayout s) ->
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.set_page_layout (get_single_pdf args.op false) s)
2013-08-20 16:32:57 +02:00
| Some (SetPageMode s) ->
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.set_page_mode (get_single_pdf args.op false) s)
2023-04-17 20:55:11 +02:00
| Some (SetNonFullScreenPageMode s) ->
write_pdf false (Cpdfmetadata.set_non_full_screen_page_mode (get_single_pdf args.op false) s)
2013-08-20 16:32:57 +02:00
| Some Split ->
begin match args.inputs, args.out with
| [(f, ranges, _, _, _, _)], File output_spec ->
let pdf = get_single_pdf args.op true in
2023-10-30 18:55:19 +01:00
let enc = build_enc () in
args.create_objstm <- args.preserve_objstm;
split_pdf
enc args.original_filename args.chunksize args.linearize ~cpdflin:args.cpdflin
~preserve_objstm:args.preserve_objstm ~create_objstm:args.preserve_objstm (*yes--always create if preserving *)
~squeeze:args.squeeze output_spec pdf
2013-08-20 16:32:57 +02:00
| _, Stdout -> error "Can't split to standard output"
| _, NoOutputSpecified -> error "Split: No output format specified"
| _ -> error "Split: bad parameters"
end
| Some (SplitOnBookmarks level) ->
begin match args.out with
| File output_spec ->
let pdf = get_single_pdf args.op false in
2023-10-30 18:55:19 +01:00
let enc = build_enc () in
args.create_objstm <- args.preserve_objstm;
split_at_bookmarks
enc args.original_filename ~squeeze:args.squeeze level output_spec pdf
| Stdout -> error "Can't split to standard output"
| NoOutputSpecified -> error "Split: No output format specified"
end
2023-10-25 19:15:19 +02:00
| Some (SplitMax s) ->
begin match args.out with
| File output_spec ->
let pdf = get_single_pdf args.op false in
2023-10-30 18:55:19 +01:00
let enc = build_enc () in
args.create_objstm <- args.preserve_objstm;
split_max enc args.original_filename ~squeeze:args.squeeze output_spec s pdf
2023-10-25 19:15:19 +02:00
| Stdout -> error "Can't split to standard output"
| NoOutputSpecified -> error "Split: No output format specified"
end
2023-10-30 17:36:41 +01:00
| Some Spray ->
2023-10-30 20:10:22 +01:00
begin match args.inputs, args.out with
| (_, pagespec, _, _, _, _)::_, File output_spec ->
2023-10-30 18:55:19 +01:00
let pdf = get_single_pdf args.op false in
2023-10-30 20:10:22 +01:00
let range = ref (parse_pagespec pdf pagespec) in
2023-10-30 18:55:19 +01:00
let enc = build_enc () in
2023-10-30 20:10:22 +01:00
let pagenums = map ref (many [] (length !spray_outputs)) in
let n = ref 0 in
while !range <> [] do
List.nth pagenums (!n mod (length !spray_outputs)) =| hd !range;
range := tl !range;
n += 1;
done;
let names = rev !spray_outputs in
iter (fun x -> if !x = [] then error "Spray: must have at least one page for each output") pagenums;
2023-10-30 18:55:19 +01:00
args.create_objstm <- args.preserve_objstm;
2023-10-30 20:10:22 +01:00
fast_write_split_pdfs ~names enc 0 args.original_filename args.squeeze output_spec pdf (map rev (map (!) pagenums)) (Pdfpage.pages_of_pagetree pdf)
| _, Stdout -> error "Can't spray to standard output"
| _, NoOutputSpecified -> error "Spray: No output format specified"
| _, _ -> error "Spray: no input"
2023-10-30 18:55:19 +01:00
end
2013-08-20 16:32:57 +02:00
| Some Presentation ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2013-08-20 16:32:57 +02:00
let pdf' =
2021-12-17 17:12:03 +01:00
Cpdfpresent.presentation
2013-08-20 16:32:57 +02:00
range
args.transition args.duration args.horizontal
args.inward args.direction args.effect_duration pdf
in
pdf.Pdf.minor <- if args.keepversion || pdf.Pdf.major > 1 then pdf.Pdf.minor else max pdf.Pdf.minor 1;
2013-08-20 16:32:57 +02:00
write_pdf false pdf'
| Some ChangeId ->
2014-11-18 15:57:18 +01:00
if args.recrypt then
soft_error "Cannot recrypt with change id: an id is part of encryption information";
2013-08-20 16:32:57 +02:00
begin match args.inputs, args.out with
| [(k, _, _, _, _, _) as input], File s ->
2013-08-20 16:32:57 +02:00
let pdf = get_pdf_from_input_kind input args.op k in
2014-10-02 15:19:05 +02:00
write_pdf true pdf
| [(k, _, _, _, _, _) as input], Stdout ->
2013-08-20 16:32:57 +02:00
let pdf = get_pdf_from_input_kind input args.op k in
2014-10-02 15:19:05 +02:00
write_pdf true pdf
2013-08-20 16:32:57 +02:00
| _ -> error "ChangeId: exactly one input file and output file required."
end
| Some RemoveId ->
2014-11-18 15:57:18 +01:00
if args.recrypt then
soft_error "Cannot recrypt with remove id: an id is part of encryption information";
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args.op false in
pdf.Pdf.trailerdict <- Pdf.remove_dict_entry pdf.Pdf.trailerdict "/ID";
write_pdf false pdf
| Some (CopyId getfrom) ->
2014-11-18 15:57:18 +01:00
if args.recrypt then
2014-11-18 16:16:11 +01:00
soft_error "Cannot recrypt with copy id: an id is part of encryption information";
2013-08-20 16:32:57 +02:00
begin match args.inputs with
| [(k, _, u, o, _, _) as input] ->
2013-08-20 16:32:57 +02:00
let pdf =
2021-12-19 13:55:06 +01:00
Cpdfmetadata.copy_id
2013-08-20 16:32:57 +02:00
args.keepversion
(pdfread_pdf_of_file (optstring u) (optstring o) getfrom)
(get_pdf_from_input_kind input args.op k)
in
write_pdf false pdf
| _ -> error "copy-id: No input file specified"
end
| Some (ThinLines w) ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-22 10:31:39 +01:00
write_pdf false (Cpdftweak.thinlines range w pdf)
2013-08-20 16:32:57 +02:00
| Some BlackText ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-22 10:31:39 +01:00
write_pdf false (Cpdftweak.blacktext args.color range pdf)
2013-08-20 16:32:57 +02:00
| Some BlackLines ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-22 10:31:39 +01:00
write_pdf false (Cpdftweak.blacklines args.color range pdf)
2013-08-20 16:32:57 +02:00
| Some BlackFills ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-22 10:31:39 +01:00
write_pdf false (Cpdftweak.blackfills args.color range pdf)
2013-08-20 16:32:57 +02:00
| Some RemoveAnnotations ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-21 14:44:46 +01:00
write_pdf false (Cpdfannot.remove_annotations range pdf)
2013-08-20 16:32:57 +02:00
| Some (CopyAnnotations getfrom) ->
begin match args.inputs with
| [(k, _, u, o, _, _) as input] ->
2023-04-21 16:39:09 +02:00
let input_pdf = get_pdf_from_input_kind input args.op k in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty input_pdf (get_pagespec ()) in
2023-04-21 16:39:09 +02:00
Cpdfannot.copy_annotations
range
(pdfread_pdf_of_file (optstring u) (optstring o) getfrom)
input_pdf;
write_pdf false input_pdf
2013-08-20 16:32:57 +02:00
| _ -> error "copy-annotations: No input file specified"
end
2023-01-13 07:30:46 +01:00
| Some (SetAnnotations json) ->
let data = Pdfio.input_of_channel (open_in_bin json) in
let pdf = get_single_pdf args.op false in
Cpdfannot.set_annotations_json pdf data;
write_pdf false pdf
2013-08-20 16:32:57 +02:00
| Some ListAnnotations ->
2023-01-11 07:55:50 +01:00
let pdf = get_single_pdf args.op true in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2023-04-22 17:58:05 +02:00
if args.format_json then
flprint (Pdfio.string_of_bytes (Cpdfannot.get_annotations_json pdf range))
else
Cpdfannot.list_annotations range args.encoding pdf
2013-08-20 16:32:57 +02:00
| Some Shift ->
let pdf = get_single_pdf args.op false in
2024-02-07 17:55:10 +01:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
let dxdylist = Cpdfcoord.parse_coordinates pdf args.coord in
write_pdf false (Cpdfpage.shift_pdf ~fast:args.fast dxdylist pdf range)
2024-01-22 17:36:37 +01:00
| Some ShiftBoxes ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
let dxdylist = Cpdfcoord.parse_coordinates pdf args.coord in
2024-02-07 17:55:10 +01:00
write_pdf false (Cpdfpage.shift_boxes dxdylist pdf range)
2013-08-20 16:32:57 +02:00
| Some Scale ->
let pdf = get_single_pdf args.op false in
2024-02-07 17:55:10 +01:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
let sxsylist = Cpdfcoord.parse_coordinates pdf args.coord in
write_pdf false (Cpdfpage.scale_pdf ~fast:args.fast sxsylist pdf range)
2013-08-20 16:32:57 +02:00
| Some ScaleToFit ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2023-06-27 17:24:41 +02:00
warn_prerotate range pdf;
let pdf = if args.prerotate then prerotate range pdf else pdf in
let xylist = Cpdfcoord.parse_coordinates pdf args.coord
2013-08-20 16:32:57 +02:00
and scale = args.scale in
2021-12-22 09:58:56 +01:00
write_pdf false (Cpdfpage.scale_to_fit_pdf ~fast:args.fast args.position scale xylist args.op pdf range)
2013-08-20 16:32:57 +02:00
| Some (ScaleContents scale) ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-22 09:58:56 +01:00
write_pdf false (Cpdfpage.scale_contents ~fast:args.fast args.position scale pdf range)
2013-08-20 16:32:57 +02:00
| Some ListAttachedFiles ->
let pdf = get_single_pdf args.op false in
2021-10-02 13:22:59 +02:00
let attachments = Cpdfattach.list_attached_files pdf in
2015-09-26 21:30:57 +02:00
iter
2021-10-02 13:22:59 +02:00
(fun a -> Printf.printf "%i %s\n" a.Cpdfattach.pagenumber a.Cpdfattach.name)
2015-09-26 21:30:57 +02:00
attachments;
flprint ""
2013-08-20 16:32:57 +02:00
| Some DumpAttachedFiles ->
let pdf = get_single_pdf args.op false in
begin match args.out with
2021-12-19 14:38:27 +01:00
| NoOutputSpecified -> Cpdfattach.dump_attached_files pdf ""
| File n -> Cpdfattach.dump_attached_files pdf n
2013-08-20 16:32:57 +02:00
| Stdout -> error "Can't dump attachments to stdout"
end
| Some RemoveAttachedFiles ->
2021-10-02 13:22:59 +02:00
write_pdf false (Cpdfattach.remove_attached_files (get_single_pdf args.op false))
2013-08-20 16:32:57 +02:00
| Some (AttachFile files) ->
begin match args.inputs with
| [(k, _, _, _, _, _) as input] ->
2013-08-20 16:32:57 +02:00
let pdf = get_pdf_from_input_kind input args.op k in
let topage =
try
match args.topage with
| None -> None
| Some "end" -> Some (Pdfpage.endpage pdf)
| Some s -> Some (int_of_string s)
with _ -> error "Bad -to-page"
in
2021-10-02 13:22:59 +02:00
let pdf = fold_left (Cpdfattach.attach_file args.keepversion topage) pdf (rev files) in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error "attach file: No input file specified"
end
| Some PadBefore ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2017-12-18 20:44:02 +01:00
let padwith =
match args.padwith with
None -> None
| Some filename -> Some (pdfread_pdf_of_file None None filename)
in
2021-12-18 16:47:06 +01:00
write_pdf false (Cpdfpad.padbefore ?padwith range pdf)
2013-08-20 16:32:57 +02:00
| Some PadAfter ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2017-12-18 20:44:02 +01:00
let padwith =
match args.padwith with
None -> None
| Some filename -> Some (pdfread_pdf_of_file None None filename)
in
2021-12-18 16:47:06 +01:00
write_pdf false (Cpdfpad.padafter ?padwith range pdf)
2013-08-20 16:32:57 +02:00
| Some (PadEvery n) ->
let pdf = get_single_pdf args.op false in
let range =
match keep (function m -> m mod n = 0) (ilist 1 (Pdfpage.endpage pdf)) with
| [] -> []
| l -> if last l = Pdfpage.endpage pdf then all_but_last l else l
in
2017-12-21 15:54:01 +01:00
let padwith =
match args.padwith with
None -> None
| Some filename -> Some (pdfread_pdf_of_file None None filename)
in
2021-12-18 16:47:06 +01:00
write_pdf false (Cpdfpad.padafter ?padwith range pdf)
2013-08-20 16:32:57 +02:00
| Some (PadMultiple n) ->
let pdf = get_single_pdf args.op false in
2021-12-18 16:47:06 +01:00
write_pdf false (Cpdfpad.padmultiple n pdf)
2019-07-01 16:35:17 +02:00
| Some (PadMultipleBefore n) ->
let pdf = get_single_pdf args.op false in
2021-12-18 16:47:06 +01:00
write_pdf false (Cpdfpad.padmultiple (-n) pdf)
2013-08-20 16:32:57 +02:00
| Some Draft ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-21 16:25:59 +01:00
write_pdf false (Cpdfdraft.draft args.removeonly args.boxes range pdf)
2013-08-20 16:32:57 +02:00
| Some (AddText text) ->
2022-10-19 17:32:56 +02:00
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2022-10-19 17:32:56 +02:00
let cpdffont = embed_font () in
2022-01-03 16:13:03 +01:00
warn_prerotate range pdf;
2013-08-20 16:32:57 +02:00
let pdf =
2022-01-02 16:18:55 +01:00
if args.prerotate then prerotate range pdf else pdf
2013-08-20 16:32:57 +02:00
and filename =
match args.inputs with
| (InFile inname, _, _, _, _, _)::_ -> inname
2013-08-20 16:32:57 +02:00
| _ -> ""
in
2022-10-19 14:48:13 +02:00
write_pdf false
(Cpdfaddtext.addtexts
2022-10-19 17:32:56 +02:00
args.linewidth args.outline args.fast args.fontname
cpdffont args.bates args.batespad args.color args.position
2022-10-19 14:48:13 +02:00
args.linespacing args.fontsize args.underneath text range
args.relative_to_cropbox args.opacity
args.justification args.midline args.topline filename
2022-10-19 17:32:56 +02:00
args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf)
2013-08-20 16:32:57 +02:00
| Some RemoveText ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2022-09-27 20:58:27 +02:00
write_pdf false (Cpdfremovetext.removetext range pdf)
2013-08-20 16:32:57 +02:00
| Some AddRectangle ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2013-08-20 16:32:57 +02:00
write_pdf false
2021-12-21 15:00:58 +01:00
(Cpdfaddtext.addrectangle
args.fast (Cpdfcoord.parse_coordinate pdf args.coord)
2016-11-08 19:15:04 +01:00
args.color args.outline args.linewidth args.opacity args.position
args.relative_to_cropbox args.underneath range pdf)
2013-08-20 16:32:57 +02:00
| Some (AddBookmarks file) ->
write_pdf false
2021-12-21 14:44:46 +01:00
(Cpdfbookmarks.add_bookmarks ~json:args.format_json true (Pdfio.input_of_channel (open_in_bin file))
2013-08-20 16:32:57 +02:00
(get_single_pdf args.op false))
| Some RemoveBookmarks ->
write_pdf false (Pdfmarks.remove_bookmarks (get_single_pdf args.op false))
| Some TwoUp ->
2021-12-22 09:58:56 +01:00
write_pdf false (Cpdfimpose.twoup args.fast (get_single_pdf args.op false))
2013-08-20 16:32:57 +02:00
| Some TwoUpStack ->
2021-12-22 09:58:56 +01:00
write_pdf false (Cpdfimpose.twoup_stack args.fast (get_single_pdf args.op false))
2021-10-18 19:33:52 +02:00
| Some Impose fit ->
2021-10-19 17:18:15 +02:00
let pdf = get_single_pdf args.op false in
let x, y = Cpdfcoord.parse_coordinate pdf args.coord in
2021-10-19 20:26:02 +02:00
if not fit && (x < 0.0 || y < 0.0) then error "Negative imposition parameters not allowed." else
2021-10-19 17:18:15 +02:00
write_pdf false
2021-12-22 09:58:56 +01:00
(Cpdfimpose.impose ~x ~y ~fit ~columns:args.impose_columns ~rtl:args.impose_rtl ~btt:args.impose_btt ~center:args.impose_center
2021-10-27 21:15:05 +02:00
~margin:args.impose_margin ~spacing:args.impose_spacing ~linewidth:args.impose_linewidth ~fast:args.fast pdf)
2013-08-20 16:32:57 +02:00
| Some (StampOn over) ->
let overpdf =
match over with
| "stamp_use_stdin" -> pdf_of_stdin "" ""
| x -> pdfread_pdf_of_file None None x
in
2014-10-14 20:45:10 +02:00
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2014-10-14 20:45:10 +02:00
let pdf =
2021-12-22 10:31:39 +01:00
Cpdfpage.stamp
args.relative_to_cropbox args.position args.topline args.midline args.fast
2015-02-17 16:48:00 +01:00
args.scale_stamp_to_fit true range overpdf pdf
2014-10-14 20:45:10 +02:00
in
write_pdf false pdf
2013-08-20 16:32:57 +02:00
| Some (StampUnder under) ->
let underpdf =
match under with
| "stamp_use_stdin" -> pdf_of_stdin "" ""
| x -> pdfread_pdf_of_file None None x
in
2014-10-14 20:45:10 +02:00
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2014-10-14 20:45:10 +02:00
let pdf =
2021-12-22 10:31:39 +01:00
Cpdfpage.stamp
args.relative_to_cropbox args.position args.topline args.midline args.fast
2015-02-17 16:48:00 +01:00
args.scale_stamp_to_fit false range underpdf pdf
2014-10-14 20:45:10 +02:00
in
write_pdf false pdf
2013-08-20 16:32:57 +02:00
| Some (CombinePages over) ->
2022-01-03 16:13:03 +01:00
let underpdf = get_single_pdf args.op false in
let overpdf = pdfread_pdf_of_file None None over in
warn_prerotate (parse_pagespec underpdf "all") underpdf;
warn_prerotate (parse_pagespec overpdf "all") overpdf;
write_pdf false
(Cpdfpage.combine_pages
args.fast
(prerotate (parse_pagespec underpdf "all") underpdf)
(prerotate (parse_pagespec overpdf "all") overpdf)
false false true)
2013-08-20 16:32:57 +02:00
| Some Encrypt ->
let pdf = get_single_pdf args.op false in
2021-12-18 17:26:33 +01:00
let pdf = Cpdfsqueeze.recompress_pdf pdf
2023-10-30 18:55:19 +01:00
and encryption = build_enc () in
2013-08-20 16:32:57 +02:00
Pdf.remove_unreferenced pdf;
if not args.keepversion then
begin
let newversion =
match args.crypt_method with
"40bit" -> 1 | "128bit" -> 4 | "AES" -> 6 | "AES256" | "AES256ISO" -> 7 | _ -> 0
in
let newversion = if args.create_objstm then 5 else newversion in
if pdf.Pdf.major = 1 then pdf.Pdf.minor <- max pdf.Pdf.minor newversion
2013-08-20 16:32:57 +02:00
end;
2023-10-30 18:55:19 +01:00
write_pdf ~encryption false pdf
2013-08-20 16:32:57 +02:00
| Some Decrypt ->
args.recrypt <- false;
2013-08-20 16:32:57 +02:00
write_pdf false (get_single_pdf args.op false)
| Some RemoveMetadata ->
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.remove_metadata (get_single_pdf args.op false))
2013-08-20 16:32:57 +02:00
| Some ExtractImages ->
let output_spec =
begin match args.out with
| File output_spec -> output_spec
| _ -> ""
end
in
let pdf = get_single_pdf args.op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2023-11-10 14:46:52 +01:00
Cpdfimage.extract_images ~raw:(args.encoding = Cpdfmetadata.Raw) ?path_to_p2p:(match args.path_to_p2p with "" -> None | x -> Some x) ?path_to_im:(match args.path_to_im with "" -> None | x -> Some x) args.encoding args.dedup args.dedup_per_page pdf range output_spec
2013-08-20 16:32:57 +02:00
| Some (ImageResolution f) ->
let pdf = get_single_pdf args.op true in
2024-02-07 15:42:38 +01:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
if args.format_json then
flprint (Pdfio.string_of_bytes (Cpdfimage.image_resolution_json pdf range f))
else
let images = Cpdfimage.image_resolution pdf range f in
iter
(function (pagenum, xobject, w, h, wdpi, hdpi, objnum) ->
if wdpi < f || hdpi < f then
Printf.printf "%i, %s, %i, %i, %f, %f, %i\n" pagenum xobject w h wdpi hdpi objnum)
images
2023-11-09 12:25:19 +01:00
| Some ListImages ->
2023-11-13 18:55:59 +01:00
let pdf = get_single_pdf args.op true in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
let json = Cpdfimage.images pdf range in
if args.format_json then
flprint (Cpdfyojson.Safe.pretty_to_string json)
else
2023-11-14 18:47:44 +01:00
begin match json with
| `List l ->
iter
2023-12-28 16:48:30 +01:00
(function (`Assoc [(_, `Int i); (_, `List pages); (_, `String name); (_, `Int w); (_, `Int h); (_, `Int size); (_, `Int bpc); (_, `String cs); (_, `String filter)]) ->
2023-11-14 18:47:44 +01:00
let pages = combine_with_spaces (map (function `Int i -> string_of_int i | _ -> "") pages) in
2023-12-28 16:48:30 +01:00
flprint (Printf.sprintf "%i, %s, %s, %i, %i, %i, %i, %s, %s\n" i pages name w h size bpc cs filter)
2023-11-14 18:47:44 +01:00
| _ -> ())
l
| _ -> ()
end
2013-08-20 16:32:57 +02:00
| Some MissingFonts ->
let pdf = get_single_pdf args.op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-10-18 17:22:38 +02:00
Cpdffont.missing_fonts pdf range
2013-08-20 16:32:57 +02:00
| Some ExtractText ->
let pdf = get_single_pdf args.op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2022-09-27 20:58:27 +02:00
let text = Cpdfextracttext.extract_text args.extract_text_font_size pdf range in
2013-08-20 16:32:57 +02:00
begin match args.out with
| File filename ->
let fh = open_out_bin filename in
output_string fh text;
close_out fh
| NoOutputSpecified | Stdout ->
print_string text;
print_newline ()
end
2013-10-24 16:21:54 +02:00
| Some AddPageLabels ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec pdf (get_pagespec ()) in
2021-12-20 10:02:39 +01:00
Cpdfpagelabels.add_page_labels
pdf args.labelsprogress args.labelstyle args.labelprefix args.labelstartval range;
2014-09-18 16:40:22 +02:00
write_pdf false pdf
| Some RemovePageLabels ->
let pdf = get_single_pdf args.op false in
Pdfpagelabels.remove pdf;
write_pdf false pdf
| Some PrintPageLabels ->
let pdf = get_single_pdf args.op true in
2023-10-31 18:28:07 +01:00
if args.format_json then
let json_of_pagelabel l =
`Assoc
[("labelstyle", `String (Pdfpagelabels.string_of_labelstyle l.Pdfpagelabels.labelstyle));
("labelprefix", begin match l.Pdfpagelabels.labelprefix with None -> `Null | Some s -> `String s end);
("startpage", `Int l.Pdfpagelabels.startpage);
("startvalue", `Int l.Pdfpagelabels.startvalue)]
in
flprint (Cpdfyojson.Safe.pretty_to_string (`List (map json_of_pagelabel (Pdfpagelabels.read pdf))))
else
iter
print_string
(map Pdfpagelabels.string_of_pagelabel (Pdfpagelabels.read pdf))
2015-01-07 19:29:11 +01:00
| Some (RemoveDictEntry key) ->
let pdf = get_single_pdf args.op true in
2021-12-22 10:31:39 +01:00
Cpdftweak.remove_dict_entry pdf key args.dict_entry_search;
2015-01-07 19:29:11 +01:00
write_pdf false pdf
2021-10-28 18:06:46 +02:00
| Some (ReplaceDictEntry key) ->
let pdf = get_single_pdf args.op true in
2021-12-22 10:31:39 +01:00
Cpdftweak.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search;
2021-10-28 18:06:46 +02:00
write_pdf false pdf
2021-10-29 16:09:21 +02:00
| Some (PrintDictEntry key) ->
let pdf = get_single_pdf args.op true in
2023-01-17 06:37:54 +01:00
Cpdftweak.print_dict_entry ~utf8:(args.encoding = Cpdfmetadata.UTF8) pdf key
2016-11-03 18:11:08 +01:00
| Some ListSpotColours ->
2016-11-04 13:43:58 +01:00
let pdf = get_single_pdf args.op false in
2021-12-19 14:26:15 +01:00
Cpdfspot.list_spot_colours pdf
2016-11-09 16:42:47 +01:00
| Some RemoveClipping ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-22 10:31:39 +01:00
write_pdf false (Cpdftweak.remove_clipping pdf range)
2019-07-01 15:40:22 +02:00
| Some CreateMetadata ->
let pdf = get_single_pdf args.op false in
2021-12-19 13:55:06 +01:00
write_pdf false (Cpdfmetadata.create_metadata pdf)
2019-07-02 19:20:05 +02:00
| Some EmbedMissingFonts ->
let fi =
match args.inputs with
[(InFile fi, _, _, _, _, _)] -> fi
| _ -> error "Input method not supported for -embed-missing-fonts"
in
let fo =
match args.out with
File fo -> fo
| _ -> error "Output method not supported for -embed-missing-fonts"
in
2021-10-18 17:22:38 +02:00
Cpdffont.embed_missing_fonts args.path_to_ghostscript args.gs_quiet fi fo
2019-07-06 18:55:26 +02:00
| Some (BookmarksOpenToLevel n) ->
let pdf = get_single_pdf args.op false in
2021-12-21 16:06:40 +01:00
write_pdf false (Cpdfbookmarks.bookmarks_open_to_level n pdf)
| Some CreatePDF ->
2021-12-21 16:25:59 +01:00
let pdf = Cpdfcreate.blank_document_paper args.createpdf_pagesize args.createpdf_pages in
write_pdf false pdf
2019-07-11 18:19:40 +02:00
| Some RemoveAllText ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2022-09-27 20:58:27 +02:00
write_pdf false (Cpdfremovetext.remove_all_text range pdf)
2019-07-15 12:52:14 +02:00
| Some ShowBoxes ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-22 10:31:39 +01:00
write_pdf false (Cpdfpage.show_boxes pdf range)
2019-07-15 14:42:32 +02:00
| Some TrimMarks ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2021-12-22 10:31:39 +01:00
write_pdf false (Cpdfpage.trim_marks pdf range)
| Some (Postpend s | Prepend s as x) ->
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
let before = match x with Prepend _ -> true | _ -> false in
2021-12-22 10:31:39 +01:00
write_pdf false (Cpdftweak.append_page_content s before args.fast range pdf)
2020-01-30 11:20:33 +01:00
| Some OutputJSON ->
let pdf = get_single_pdf args.op false in
write_json args.out pdf
2020-02-26 17:24:27 +01:00
| Some OCGCoalesce ->
let pdf = get_single_pdf args.op false in
2021-12-18 17:14:31 +01:00
Cpdfocg.ocg_coalesce pdf;
2020-02-27 15:14:51 +01:00
write_pdf false pdf
2020-02-27 14:32:45 +01:00
| Some OCGList ->
let pdf = get_single_pdf args.op true in
2021-12-18 17:14:31 +01:00
Cpdfocg.ocg_list pdf
2020-02-27 14:32:45 +01:00
| Some OCGRename ->
let pdf = get_single_pdf args.op false in
2021-12-18 17:14:31 +01:00
Cpdfocg.ocg_rename args.ocgrenamefrom args.ocgrenameto pdf;
2020-02-27 15:14:51 +01:00
write_pdf false pdf
2020-03-02 13:37:39 +01:00
| Some OCGOrderAll ->
let pdf = get_single_pdf args.op false in
2021-12-18 17:14:31 +01:00
Cpdfocg.ocg_order_all pdf;
2020-03-02 13:37:39 +01:00
write_pdf false pdf
| Some (StampAsXObject stamp) ->
let stamp_pdf =
match stamp with
| "stamp_use_stdin" -> pdf_of_stdin "" ""
| x -> pdfread_pdf_of_file None None x
in
let pdf = get_single_pdf args.op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
let pdf, xobj_name =
2021-12-21 16:33:56 +01:00
Cpdfxobject.stamp_as_xobject pdf range stamp_pdf
in
Printf.printf "%s\n" xobj_name;
flush stdout;
write_pdf false pdf
2021-11-01 16:40:33 +01:00
| Some (PrintFontEncoding fontname) ->
let pdf = get_single_pdf args.op true in
2021-11-12 23:09:49 +01:00
Cpdffont.print_font_table pdf fontname args.copyfontpage
2021-11-19 01:32:35 +01:00
| Some TableOfContents ->
2022-10-19 16:34:19 +02:00
let pdf = get_single_pdf args.op false in
2022-10-19 14:48:13 +02:00
let cpdffont = embed_font () in
let pdf =
Cpdftoc.typeset_table_of_contents
2022-10-19 16:34:19 +02:00
~font:cpdffont ~fontsize:args.fontsize ~title:args.toc_title ~bookmark:args.toc_bookmark pdf
2022-10-19 14:48:13 +02:00
in
2022-10-19 16:34:19 +02:00
write_pdf false pdf
2021-11-19 01:32:35 +01:00
| Some (Typeset filename) ->
let text = Pdfio.bytes_of_input_channel (open_in_bin filename) in
2022-10-19 14:48:13 +02:00
let cpdffont = embed_font () in
let pdf = Cpdftexttopdf.typeset ~font:cpdffont ~papersize:args.createpdf_pagesize ~fontsize:args.fontsize text in
2021-11-20 00:21:37 +01:00
write_pdf false pdf
2023-05-02 16:04:35 +02:00
| Some (TextWidth s) ->
2023-05-02 16:25:16 +02:00
let rawwidth =
match args.font with
| StandardFont f ->
Pdfstandard14.textwidth false WinAnsiEncoding f s
| _ ->
error "-text-width only works for the standard 14 fonts"
in
let w = (float rawwidth *. args.fontsize) /. 1000. in
Printf.printf "%f\n" w
2022-11-28 17:11:07 +01:00
| Some Draw ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
let ops = match !Cpdfdrawcontrol.drawops with [("_MAIN", ops)] -> rev ops | _ -> error "not enough -end-xobj or -et" in
2023-05-03 14:53:48 +02:00
write_pdf
false
2023-07-27 14:21:15 +02:00
(Cpdfdraw.draw ~fast:args.fast ~underneath:args.underneath ~filename:args.original_filename ~bates:args.bates ~batespad:args.batespad range pdf ops)
2023-03-03 17:02:16 +01:00
| Some (Composition json) ->
let pdf = get_single_pdf args.op false in
2023-03-03 18:07:12 +01:00
let filesize =
match args.inputs with
| (InFile inname, _, _, _, _, _)::_ -> filesize inname
| _ -> 0
in
2023-04-13 17:51:11 +02:00
Cpdfcomposition.show_composition filesize json pdf
2023-11-15 18:26:43 +01:00
| Some (Chop (x, y)) ->
2023-11-20 11:53:51 +01:00
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
2023-11-20 15:42:00 +01:00
write_pdf false (Cpdfchop.chop ~x ~y ~columns:args.impose_columns ~btt:args.impose_btt ~rtl:args.impose_rtl pdf range)
| Some (ChopHV (is_h, line)) ->
2024-02-05 15:01:16 +01:00
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
write_pdf false (Cpdfchop.chop_hv ~is_h ~line ~columns:args.impose_columns pdf range)
2023-12-06 13:20:27 +01:00
| Some ProcessImages ->
let pdf = get_single_pdf args.op false in
2024-01-04 12:33:17 +01:00
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
Cpdfimage.process
2024-01-12 16:00:28 +01:00
~q:args.jpegquality ~qlossless:args.jpegqualitylossless ~onebppmethod:args.onebppmethod ~jbig2_lossy_threshold:args.jbig2_lossy_threshold
2023-12-31 12:59:48 +01:00
~length_threshold:args.length_threshold ~percentage_threshold:args.percentage_threshold ~pixel_threshold:args.pixel_threshold
~dpi_threshold:args.dpi_threshold ~factor:args.resample_factor ~interpolate:args.resample_interpolate
2024-01-04 12:33:17 +01:00
~path_to_jbig2enc:args.path_to_jbig2enc ~path_to_convert:args.path_to_convert range pdf;
2023-12-06 13:20:27 +01:00
write_pdf false pdf
2024-01-17 19:37:58 +01:00
| Some (ExtractStream i) ->
let pdf = get_single_pdf args.op false in
extract_stream pdf args.extract_stream_decompress i
| Some (PrintObj i) ->
let pdf = get_single_pdf args.op false in
print_obj pdf i
2013-08-20 16:32:57 +02:00
(* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all. *)
let check_command_line () =
if args.gs_malformed && !Pdfread.error_on_malformed then
error "Setting both -gs-malformed and -error-on-malformed makes no sense"
let parse_argv () s specs anon_fun usage_msg =
if args.debug then
2023-04-25 14:45:56 +02:00
Array.iter (fun s -> Pdfe.log (Printf.sprintf "arg: %s\n" s)) Sys.argv;
Arg.parse_argv ~current:(ref 0) s specs anon_fun usage_msg;
check_command_line ()
2013-08-20 16:32:57 +02:00
let align_specs s =
Arg.align s
2013-10-09 15:31:55 +02:00
(* The old -control mechanism clashed with AND, but must be retained for
backwards compatibility. There is a new mechanism -args file which performs
direct textual substitution of the file, before any expansion of ANDs *)
let rec expand_args_inner prev = function
[] -> rev prev
| "-args"::filename::r ->
expand_args_inner (rev (parse_control_file filename) @ prev) r
| h::t -> expand_args_inner (h::prev) t
let expand_args argv =
let l = Array.to_list argv in
Array.of_list (expand_args_inner [] l)
2019-07-08 15:44:34 +02:00
let gs_malformed_force fi fo =
if args.path_to_ghostscript = "" then begin
2023-04-25 14:45:56 +02:00
Pdfe.log "Please supply path to gs with -gs\n";
2019-07-08 15:44:34 +02:00
exit 2
end;
let gscall =
2023-02-21 15:50:07 +01:00
Filename.quote_command args.path_to_ghostscript
((if args.gs_quiet then ["-dQUIET"] else []) @
["-dNOPAUSE"; "-sDEVICE=pdfwrite"; "-sOUTPUTFILE=" ^ fo; "-dBATCH"; fi])
2019-07-08 15:44:34 +02:00
in
match Sys.command gscall with
| 0 -> exit 0
2023-04-25 14:45:56 +02:00
| _ -> Pdfe.log "Failed to mend file.\n"; exit 2
2019-07-08 15:44:34 +02:00
2021-08-10 21:11:20 +02:00
let process_env_vars () =
match Sys.getenv_opt "CPDF_DEBUG" with
| Some "true" -> args.debug <- true
| Some "false" -> args.debug <- false
| _ -> ()
2013-08-20 16:32:57 +02:00
(* Main function. *)
let go_withargv argv =
2019-07-08 15:44:34 +02:00
(* Check for the standalone -gs-malformed-force special command line. This
* has exactly one file input and exactly one output and just -gs <gs>
* -gs-malformed-force between. *)
match argv with
[|_; inputfilename; "-gs"; gslocation; "-gs-malformed-force"; "-o"; outputfilename|] ->
args.path_to_ghostscript <- gslocation;
ignore (gs_malformed_force inputfilename outputfilename);
exit 0
2019-08-05 13:35:07 +02:00
| [|_; inputfilename; "-gs"; gslocation; "-gs-malformed-force"; "-o"; outputfilename; "-gs-quiet"|] ->
args.path_to_ghostscript <- gslocation;
args.gs_quiet <- true;
ignore (gs_malformed_force inputfilename outputfilename);
exit 0
2019-07-08 15:44:34 +02:00
| _ ->
Hashtbl.clear filenames;
2021-10-15 18:36:11 +02:00
if demo then
flprint "This demo is for evaluation only. http://www.coherentpdf.com/\n";
2013-08-29 13:10:51 +02:00
if noncomp then
begin
2023-04-25 14:45:56 +02:00
Pdfe.log "For non-commercial use only\n";
Pdfe.log "To purchase a license visit http://www.coherentpdf.com/\n\n";
2013-08-29 13:10:51 +02:00
end;
2013-08-20 16:32:57 +02:00
try
2013-10-09 15:31:55 +02:00
(* Pre-expand -args *)
let argv = expand_args argv in
2013-08-20 16:32:57 +02:00
(* Split the arguments into sets either side of ANDs *)
let sets =
let args =
(map (fun l -> "cpdf"::l) (split_around (eq "AND") (tl (Array.to_list argv))))
in
match args with
| [] -> []
| _ -> combine (map Array.of_list args) (map (eq (length args)) (ilist 1 (length args)))
in
iter
(fun (s, islast) ->
(*Printf.printf "AND:%b, %s\n" islast (Array.fold_left (fun x y -> x ^ " " ^ y) "" s);
flprint "\n";*)
reset_arguments ();
Cpdfdrawcontrol.drawops := [("_MAIN", [])];
2021-08-10 21:11:20 +02:00
process_env_vars ();
2013-08-20 16:32:57 +02:00
parse_argv () s (align_specs specs) anon_fun usage_msg;
parse_argv () (Array.of_list ("cpdf"::!control_args)) (align_specs specs) anon_fun usage_msg;
2023-12-05 13:20:03 +01:00
let addrange pdf = AlreadyInMemory (pdf, "fromAND"), args.dashrange, "", "", ref false, None in
2013-08-20 16:32:57 +02:00
args.inputs <- rev (map addrange !output_pdfs) @ rev args.inputs;
output_pdfs := [];
2023-05-03 15:19:55 +02:00
go ())
2013-08-20 16:32:57 +02:00
sets;
2019-07-02 15:10:42 +02:00
flush stdout; (*r for Windows *)
exit 0
2013-08-20 16:32:57 +02:00
with
| Arg.Bad s ->
2023-04-25 14:45:56 +02:00
Pdfe.log
2013-10-09 15:31:55 +02:00
(implode (takewhile (neq '\n') (explode s)) ^ " Use -help for help.\n\n");
2015-01-12 18:55:45 +01:00
if not !stay_on_error then exit 2 else raise StayOnError
2013-08-20 16:32:57 +02:00
| Arg.Help _ ->
Arg.usage (align_specs specs) usage_msg;
flush stderr (*r for Windows *)
| Sys_error s as e ->
2023-04-25 14:45:56 +02:00
Pdfe.log (s ^ "\n\n");
if not !stay_on_error then
2015-01-12 18:55:45 +01:00
(if args.debug then raise e else exit 2)
else raise StayOnError
2013-08-20 16:32:57 +02:00
| Pdf.PDFError s as e ->
2023-04-25 14:45:56 +02:00
Pdfe.log
2013-08-20 16:32:57 +02:00
("cpdf encountered an error. Technical details follow:\n\n" ^ s ^ "\n\n");
if not !stay_on_error then
if args.debug then raise e else exit 2
2015-01-12 18:55:45 +01:00
else
raise StayOnError
2021-10-02 13:22:59 +02:00
| Cpdferror.SoftError s -> soft_error s
| Cpdferror.HardError s -> error s
2013-08-20 16:32:57 +02:00
| e ->
2023-04-25 14:45:56 +02:00
Pdfe.log
2013-08-20 16:32:57 +02:00
("cpdf encountered an unexpected error. Technical Details follow:\n" ^
Printexc.to_string e ^ "\n\n");
2015-01-12 18:55:45 +01:00
if not !stay_on_error then
(if args.debug then raise e else exit 2) else raise StayOnError
2013-08-20 16:32:57 +02:00
let go () =
go_withargv Sys.argv