2013-10-02 16:29:53 +02:00
(* cpdf command line tools *)
2021-10-15 18:36:11 +02:00
let demo = false
2024-07-24 15:54:12 +02:00
let agpl = true
2014-11-17 19:48:17 +01:00
let major_version = 2
2024-07-19 15:04:47 +02:00
let minor_version = 7
let minor_minor_version = 1
2024-07-24 17:04:02 +02:00
let version_date = " (24th July 2024) "
2013-08-20 16:32:57 +02:00
open Pdfutil
open Pdfio
2023-11-14 18:47:44 +01:00
let combine_with_spaces strs =
String . trim
( fold_left ( fun x y -> x ^ ( if x < > " " then " " else " " ) ^ y ) " " strs )
2019-06-30 16:00:51 +02:00
let tempfiles = ref []
let exit n =
2020-03-04 19:50:32 +01:00
begin try iter Sys . remove ! tempfiles with _ -> exit n end ;
2019-06-30 16:00:51 +02:00
exit n
2023-10-04 16:07:29 +02:00
let null () = ()
2014-10-14 20:36:57 +02:00
let initial_file_size = ref 0
2016-07-21 18:02:11 +02:00
let empty = Pdf . empty ()
2013-08-20 16:32:57 +02:00
(* Wrap up the file reading functions to exit with code 1 when an encryption
problem occurs . This happens when object streams are in an encrypted document
and so it can't be read without the right password .. . The existing error
handling only dealt with the case where the document couldn't be decrypted once
it had been loaded . * )
2014-12-11 20:36:43 +01:00
let pdfread_pdf_of_input ? revision a b c =
try Pdfread . pdf_of_input ? revision a b c with
2013-08-20 16:32:57 +02:00
Pdf . PDFError s when String . length s > = 10 && String . sub s 0 10 = " Encryption " ->
2021-10-02 13:22:59 +02:00
raise ( Cpdferror . SoftError " Bad owner or user password when reading document " )
2013-08-20 16:32:57 +02:00
2014-12-11 20:36:43 +01:00
let pdfread_pdf_of_channel_lazy ? revision ? source b c d =
try Pdfread . pdf_of_channel_lazy ? revision ? source b c d with
2013-08-20 16:32:57 +02:00
Pdf . PDFError s when String . length s > = 10 && String . sub s 0 10 = " Encryption " ->
2021-10-02 13:22:59 +02:00
raise ( Cpdferror . SoftError " Bad owner or user password when reading document " )
2013-08-20 16:32:57 +02:00
2014-12-11 20:36:43 +01:00
let pdfread_pdf_of_file ? revision a b c =
try Pdfread . pdf_of_file ? revision a b c with
2013-08-20 16:32:57 +02:00
Pdf . PDFError s when String . length s > = 10 && String . sub s 0 10 = " Encryption " ->
2021-10-02 13:22:59 +02:00
raise ( Cpdferror . SoftError " Bad owner or user password when reading document " )
2013-08-20 16:32:57 +02:00
let optstring = function
| " " -> None
| x -> Some x
let _ =
set_binary_mode_in stdin true ;
set_binary_mode_out stdout true
2015-01-11 19:39:35 +01:00
let stay_on_error = ref false
exception StayOnError
2013-08-20 16:32:57 +02:00
(* Fatal error reporting. *)
let error s =
2023-04-25 14:45:56 +02:00
Pdfe . log ( s ^ " \n Use -help for help. \n " ) ;
2015-01-11 19:39:35 +01:00
if not ! stay_on_error then exit 2 else raise StayOnError
2013-08-20 16:32:57 +02:00
let soft_error s =
2023-04-25 14:45:56 +02:00
Pdfe . log ( Printf . sprintf " %s \n " s ) ;
2015-01-11 19:39:35 +01:00
if not ! stay_on_error then exit 1 else raise StayOnError
2013-08-20 16:32:57 +02:00
let parse_pagespec pdf spec =
2021-08-12 21:14:48 +02:00
try Cpdfpagespec . parse_pagespec pdf spec with
2013-08-20 16:32:57 +02:00
Failure x -> error x
2021-05-25 14:49:51 +02:00
(* We allow an operation such as ScaleToFit on a range such as 'portrait' to be silently null to allow, for example:
cpdf - scale - to - fit a4portrait in . pdf portrait AND - scale - to - fit a4landscape landscape - o out . pdf
* )
let parse_pagespec_allow_empty pdf spec =
2021-08-12 21:14:48 +02:00
try Cpdfpagespec . parse_pagespec pdf spec with
2021-05-25 14:49:51 +02:00
Pdf . PDFError ( " Page range specifies no pages " ) -> []
2013-08-20 16:32:57 +02:00
(* Operations. *)
type op =
| CopyFont of string
| CountPages
| Version
| Encrypt
| Decrypt
| StampOn of string
| StampUnder of string
| CombinePages of string
| TwoUp
| TwoUpStack
2021-10-18 19:19:59 +02:00
| Impose of bool
2013-08-20 16:32:57 +02:00
| RemoveBookmarks
| AddBookmarks of string
| AddText of string
| AddRectangle
| RemoveText
| Draft
| PadBefore
| PadAfter
| PadEvery of int
| PadMultiple of int
2019-07-01 16:35:17 +02:00
| PadMultipleBefore of int
2013-08-20 16:32:57 +02:00
| Shift
2024-01-22 17:36:37 +01:00
| ShiftBoxes
2013-08-20 16:32:57 +02:00
| Scale
| ScaleToFit
2024-09-21 18:35:29 +02:00
| Stretch
2013-08-20 16:32:57 +02:00
| ScaleContents of float
| AttachFile of string list
| RemoveAttachedFiles
| ListAttachedFiles
| DumpAttachedFiles
| RemoveAnnotations
| ListAnnotations
| CopyAnnotations of string
2023-01-13 07:30:46 +01:00
| SetAnnotations of string
2013-08-20 16:32:57 +02:00
| Merge
| Split
| SplitOnBookmarks of int
2023-10-25 19:15:19 +02:00
| SplitMax of int
2023-10-30 17:36:41 +01:00
| Spray
2013-08-20 16:32:57 +02:00
| Clean
| Info
| PageInfo
| Metadata
| SetMetadata of string
| RemoveMetadata
| Fonts
| RemoveFonts
| Compress
| Decompress
| Crop
2019-06-26 15:43:24 +02:00
| Trim
| Bleed
| Art
2013-08-20 16:32:57 +02:00
| RemoveCrop
2019-06-26 15:43:24 +02:00
| RemoveArt
| RemoveTrim
| RemoveBleed
2013-08-20 16:32:57 +02:00
| CopyBox
| MediaBox
2017-05-19 20:10:49 +02:00
| HardBox of string
2013-08-20 16:32:57 +02:00
| Rotate of int
| Rotateby of int
| RotateContents of float
| Upright
| VFlip
| HFlip
| ThinLines of float
| SetAuthor of string
| SetTitle of string
| SetSubject of string
| SetKeywords of string
| SetCreate of string
| SetModify of string
| SetCreator of string
| SetProducer of string
| SetTrapped
| SetUntrapped
| SetVersion of int
| ListBookmarks
| SetPageLayout of string
| SetPageMode of string
2023-04-17 20:55:11 +02:00
| SetNonFullScreenPageMode of string
2013-08-20 16:32:57 +02:00
| HideToolbar of bool
| HideMenubar of bool
| HideWindowUI of bool
| FitWindow of bool
| CenterWindow of bool
| DisplayDocTitle of bool
| Presentation
| ChangeId
| RemoveId
| CopyId of string
| BlackText
| BlackLines
| BlackFills
| ExtractImages
2023-11-09 12:25:19 +01:00
| ListImages
2013-08-20 16:32:57 +02:00
| ImageResolution of float
| MissingFonts
2023-11-02 19:49:15 +01:00
| ExtractFontFile of string
2013-08-20 16:32:57 +02:00
| ExtractText
2020-05-04 13:25:42 +02:00
| OpenAtPage of string
| OpenAtPageFit of string
2023-04-18 15:42:17 +02:00
| OpenAtPageCustom of string
2013-10-24 16:21:54 +02:00
| AddPageLabels
2013-10-24 12:21:52 +02:00
| RemovePageLabels
| PrintPageLabels
2015-01-07 19:29:11 +01:00
| RemoveDictEntry of string
2021-10-28 18:06:46 +02:00
| ReplaceDictEntry of string
2021-10-29 16:09:21 +02:00
| PrintDictEntry of string
2016-11-03 18:11:08 +01:00
| ListSpotColours
2016-11-09 16:42:47 +01:00
| RemoveClipping
2019-06-26 18:43:59 +02:00
| SetMetadataDate of string
2019-07-01 15:40:22 +02:00
| CreateMetadata
2019-07-02 19:20:05 +02:00
| EmbedMissingFonts
2019-07-06 18:55:26 +02:00
| BookmarksOpenToLevel of int
2019-07-07 18:07:52 +02:00
| CreatePDF
2019-07-11 18:19:40 +02:00
| RemoveAllText
2019-07-15 12:52:14 +02:00
| ShowBoxes
2019-07-15 14:42:32 +02:00
| TrimMarks
2019-10-01 16:02:12 +02:00
| Prepend of string
| Postpend of string
2020-01-30 11:20:33 +01:00
| OutputJSON
2020-02-26 17:24:27 +01:00
| OCGCoalesce
2020-02-27 14:32:45 +01:00
| OCGList
| OCGRename
2020-03-02 13:37:39 +01:00
| OCGOrderAll
2020-03-18 15:01:27 +01:00
| StampAsXObject of string
2021-11-01 16:40:33 +01:00
| PrintFontEncoding of string
2021-11-19 01:32:35 +01:00
| TableOfContents
| Typeset of string
2023-05-02 16:04:35 +02:00
| TextWidth of string
2022-11-28 17:11:07 +01:00
| Draw
2023-03-03 17:02:16 +01:00
| Composition of bool
2023-11-15 18:26:43 +01:00
| Chop of int * int
2024-02-05 15:01:16 +01:00
| ChopHV of bool * float
2023-12-06 13:20:27 +01:00
| ProcessImages
2024-06-24 16:29:32 +02:00
| ExtractStream of string
| PrintObj of string
2024-09-02 17:30:02 +02:00
| ReplaceObj of string * string
2024-05-30 16:33:24 +02:00
| Verify of string
2024-09-14 15:43:55 +02:00
| MarkAs of Cpdfua . subformat
| RemoveMark of Cpdfua . subformat
2024-06-27 15:21:47 +02:00
| PrintStructTree
2024-06-03 16:49:52 +02:00
| ExtractStructTree
| ReplaceStructTree of string
2024-06-12 18:21:20 +02:00
| SetLanguage of string
2024-09-21 19:44:43 +02:00
| Redact
2013-08-20 16:32:57 +02:00
2014-10-16 16:28:46 +02:00
let string_of_op = function
2021-11-01 16:40:33 +01:00
| PrintFontEncoding _ -> " PrintFontEncoding "
2021-10-29 16:09:21 +02:00
| PrintDictEntry _ -> " PrintDictEntry "
2021-10-18 19:19:59 +02:00
| Impose _ -> " Impose "
2014-10-16 16:28:46 +02:00
| CopyFont _ -> " CopyFont "
| CountPages -> " CountPages "
| Version -> " Version "
| Encrypt -> " Encrypt "
| Decrypt -> " Decrypt "
| StampOn _ -> " StampOn "
| StampUnder _ -> " StampUnder "
| CombinePages _ -> " CombinePages "
| TwoUp -> " TwoUp "
| TwoUpStack -> " TwoUpStack "
| RemoveBookmarks -> " RemoveBookmarks "
| AddBookmarks _ -> " AddBookmarks "
| AddText _ -> " AddText "
| AddRectangle -> " AddRectangle "
| RemoveText -> " RemoveText "
| Draft -> " Draft "
| PadBefore -> " PadBefore "
| PadAfter -> " PadAfter "
| PadEvery _ -> " PadEvery "
| PadMultiple _ -> " PadMultiple "
2019-07-01 16:35:17 +02:00
| PadMultipleBefore _ -> " PadMultipleBefore "
2014-10-16 16:28:46 +02:00
| Shift -> " Shift "
2024-01-22 17:36:37 +01:00
| ShiftBoxes -> " ShiftBoxes "
2014-10-16 16:28:46 +02:00
| Scale -> " Scale "
| ScaleToFit -> " ScaleToFit "
2024-09-21 18:35:29 +02:00
| Stretch -> " Stretch "
2014-10-16 16:28:46 +02:00
| ScaleContents _ -> " ScaleContents "
| AttachFile _ -> " AttachFile "
| RemoveAttachedFiles -> " RemoveAttachedFiles "
| ListAttachedFiles -> " ListAttachedFiles "
| DumpAttachedFiles -> " DumpAttachedFiles "
| RemoveAnnotations -> " RemoveAnnotations "
| ListAnnotations -> " ListAnnotations "
| CopyAnnotations _ -> " CopyAnnotations "
2023-01-13 07:30:46 +01:00
| SetAnnotations _ -> " SetAnnotations "
2014-10-16 16:28:46 +02:00
| Merge -> " Merge "
| Split -> " Split "
| SplitOnBookmarks _ -> " SplitOnBookmarks "
2023-10-25 19:15:19 +02:00
| SplitMax _ -> " SplitMax "
2023-10-30 17:36:41 +01:00
| Spray -> " Spray "
2014-10-16 16:28:46 +02:00
| Clean -> " Clean "
| Info -> " Info "
| PageInfo -> " PageInfo "
| Metadata -> " Metadata "
| SetMetadata _ -> " SetMetadata "
| RemoveMetadata -> " RemoveMetadata "
| Fonts -> " Fonts "
| RemoveFonts -> " RemoveFonts "
| Compress -> " Compress "
| Decompress -> " Decompress "
| Crop -> " Crop "
| RemoveCrop -> " RemoveCrop "
| CopyBox -> " CopyBox "
| MediaBox -> " MediaBox "
2017-05-19 20:10:49 +02:00
| HardBox _ -> " HardBox "
2014-10-16 16:28:46 +02:00
| Rotate _ -> " Rotate "
| Rotateby _ -> " Rotateby "
| RotateContents _ -> " RotateContents "
| Upright -> " Upright "
| VFlip -> " VFlip "
| HFlip -> " HFlip "
| ThinLines _ -> " ThinLines "
| SetAuthor _ -> " SetAuthor "
| SetTitle _ -> " SetTitle "
| SetSubject _ -> " SetSubject "
| SetKeywords _ -> " SetKeywords "
| SetCreate _ -> " SetCreate "
| SetModify _ -> " SetModify "
| SetCreator _ -> " SetCreator "
| SetProducer _ -> " SetProducer "
| SetTrapped -> " SetTrapped "
| SetUntrapped -> " SetUntrapped "
| SetVersion _ -> " SetVersion "
| ListBookmarks -> " ListBookmarks "
| SetPageLayout _ -> " SetPageLayout "
| SetPageMode _ -> " SetPageMode "
2023-04-17 20:55:11 +02:00
| SetNonFullScreenPageMode _ -> " SetNonFullScreenPageMode "
2014-10-16 16:28:46 +02:00
| HideToolbar _ -> " HideToolbar "
| HideMenubar _ -> " HideMenubar "
| HideWindowUI _ -> " HideWindowUI "
| FitWindow _ -> " FitWindow "
| CenterWindow _ -> " CenterWindow "
| DisplayDocTitle _ -> " DisplayDocTitle "
| Presentation -> " Presentation "
| ChangeId -> " ChangeId "
| RemoveId -> " RemoveId "
| CopyId _ -> " CopyId "
| BlackText -> " BlackText "
| BlackLines -> " BlackLines "
| BlackFills -> " BlackFills "
| ExtractImages -> " ExtractImages "
2023-11-09 12:25:19 +01:00
| ListImages -> " ListImages "
2014-10-16 16:28:46 +02:00
| ImageResolution _ -> " ImageResolution "
| MissingFonts -> " MissingFonts "
2023-11-02 19:49:15 +01:00
| ExtractFontFile _ -> " ExtractFontFile "
2014-10-16 16:28:46 +02:00
| ExtractText -> " ExtractText "
| OpenAtPage _ -> " OpenAtPage "
| OpenAtPageFit _ -> " OpenAtPageFit "
2023-04-18 15:42:17 +02:00
| OpenAtPageCustom _ -> " OpenAtPageCustom "
2014-10-16 16:28:46 +02:00
| AddPageLabels -> " AddPageLabels "
| RemovePageLabels -> " RemovePageLabels "
| PrintPageLabels -> " PrintPageLabels "
2015-01-07 19:29:11 +01:00
| RemoveDictEntry _ -> " RemoveDictEntry "
2021-10-28 18:06:46 +02:00
| ReplaceDictEntry _ -> " ReplaceDictEntry "
2016-11-03 18:11:08 +01:00
| ListSpotColours -> " ListSpotColours "
2016-11-09 16:42:47 +01:00
| RemoveClipping -> " RemoveClipping "
2019-06-26 15:43:24 +02:00
| Trim -> " Trim "
| Art -> " Art "
| Bleed -> " Bleed "
| RemoveArt -> " RemoveArt "
| RemoveTrim -> " RemoveTrim "
| RemoveBleed -> " RemoveBleed "
2019-06-26 18:43:59 +02:00
| SetMetadataDate _ -> " SetMetadataDate "
2019-07-01 15:40:22 +02:00
| CreateMetadata -> " CreateMetadata "
2019-07-02 19:20:05 +02:00
| EmbedMissingFonts -> " EmbedMissingFonts "
2019-07-06 18:55:26 +02:00
| BookmarksOpenToLevel _ -> " BookmarksOpenToLevel "
2019-07-07 18:07:52 +02:00
| CreatePDF -> " CreatePDF "
2019-07-11 18:19:40 +02:00
| RemoveAllText -> " RemoveAllText "
2019-07-15 12:52:14 +02:00
| ShowBoxes -> " ShowBoxes "
2019-07-15 14:42:32 +02:00
| TrimMarks -> " TrimMarks "
2019-10-01 16:02:12 +02:00
| Prepend _ -> " Prepend "
| Postpend _ -> " Postpend "
2020-01-30 11:20:33 +01:00
| OutputJSON -> " OutputJSON "
2020-02-26 17:24:27 +01:00
| OCGCoalesce -> " OCGCoalesce "
2020-02-27 14:32:45 +01:00
| OCGList -> " OCGList "
| OCGRename -> " OCGRename "
2020-03-02 13:37:39 +01:00
| OCGOrderAll -> " OCGOrderAll "
2020-03-18 15:01:27 +01:00
| StampAsXObject _ -> " StampAsXObject "
2021-11-19 01:32:35 +01:00
| TableOfContents -> " TableOfContents "
| Typeset _ -> " Typeset "
2023-05-02 16:04:35 +02:00
| TextWidth _ -> " TextWidth "
2022-11-28 17:11:07 +01:00
| Draw -> " Draw "
2023-03-03 17:02:16 +01:00
| Composition _ -> " Composition "
2023-11-15 18:26:43 +01:00
| Chop _ -> " Chop "
2024-02-05 15:01:16 +01:00
| ChopHV _ -> " ChopHV "
2023-12-06 13:20:27 +01:00
| ProcessImages -> " ProcessImages "
2024-01-17 19:37:58 +01:00
| ExtractStream _ -> " ExtractStream "
| PrintObj _ -> " PrintObj "
2024-09-02 17:30:02 +02:00
| ReplaceObj _ -> " ReplaceObj "
2024-05-30 16:33:24 +02:00
| Verify _ -> " Verify "
2024-05-30 18:49:23 +02:00
| MarkAs _ -> " MarkAs "
2024-06-18 16:28:19 +02:00
| RemoveMark _ -> " RemoveMark "
2024-06-27 15:21:47 +02:00
| PrintStructTree -> " PrintStructTree "
2024-06-03 16:49:52 +02:00
| ExtractStructTree -> " ExtractStructTree "
| ReplaceStructTree _ -> " ReplaceStructTree "
2024-06-12 18:21:20 +02:00
| SetLanguage _ -> " SetLanguage "
2024-09-21 19:44:43 +02:00
| Redact -> " Redact "
2014-10-16 16:28:46 +02:00
2013-08-20 16:32:57 +02:00
(* Inputs: filename, pagespec. *)
2023-12-05 13:20:03 +01:00
type input_kind =
| AlreadyInMemory of Pdf . t * string
2013-08-20 16:32:57 +02:00
| InFile of string
| StdIn
let string_of_input_kind = function
2023-12-05 13:20:03 +01:00
| AlreadyInMemory ( _ , s ) -> s
2013-08-20 16:32:57 +02:00
| InFile s -> s
| StdIn -> " Stdin "
type input =
2014-12-11 19:17:02 +01:00
input_kind * string * string * string * bool ref * int option
(* input kind, range, user_pw, owner_pw, was_decrypted_with_owner, revision *)
2013-08-20 16:32:57 +02:00
type output_method =
| NoOutputSpecified
| Stdout
| File of string
2023-10-30 17:36:41 +01:00
(* Outputs are also added here, in case -spray is in use. *)
let spray_outputs = ref []
2013-08-20 16:32:57 +02:00
(* A list of PDFs to be output, if no output method was specified. *)
let output_pdfs : Pdf . t list ref = ref []
2024-09-18 16:31:24 +02:00
let standard_namespace = " http://iso.org/pdf/ssn "
let pdf2_namespace = " http://iso.org/pdf2/ssn "
2013-08-20 16:32:57 +02:00
type font =
| StandardFont of Pdftext . standard_font
2023-07-14 13:37:57 +02:00
| EmbeddedFont of string
2013-08-20 16:32:57 +02:00
| OtherFont of string
type args =
{ mutable op : op option ;
mutable preserve_objstm : bool ;
mutable create_objstm : bool ;
mutable out : output_method ;
mutable inputs : input list ;
mutable chunksize : int ;
mutable linearize : bool ;
2014-11-23 20:20:41 +01:00
mutable keeplinearize : bool ;
2016-07-18 21:00:10 +02:00
mutable rectangle : string ;
mutable coord : string ;
2013-08-20 16:32:57 +02:00
mutable duration : float option ;
mutable transition : string option ;
mutable horizontal : bool ;
mutable inward : bool ;
mutable direction : int ;
mutable effect_duration : float ;
mutable font : font ;
mutable fontname : string ;
2022-09-21 17:10:48 +02:00
mutable fontencoding : Pdftext . encoding ;
2013-08-20 16:32:57 +02:00
mutable fontsize : float ;
2022-09-27 17:59:04 +02:00
mutable embedstd14 : string option ;
2024-03-04 16:16:14 +01:00
mutable color : Cpdfaddtext . colour ;
2013-08-20 16:32:57 +02:00
mutable opacity : float ;
2021-08-12 21:38:55 +02:00
mutable position : Cpdfposition . position ;
2013-08-20 16:32:57 +02:00
mutable underneath : bool ;
mutable linespacing : float ;
mutable midline : bool ;
2015-01-20 16:50:36 +01:00
mutable topline : bool ;
2021-12-21 15:00:58 +01:00
mutable justification : Cpdfaddtext . justification ;
2013-08-20 16:32:57 +02:00
mutable bates : int ;
2015-07-17 17:34:47 +02:00
mutable batespad : int option ;
2013-08-20 16:32:57 +02:00
mutable prerotate : bool ;
mutable relative_to_cropbox : bool ;
mutable keepversion : bool ;
mutable bycolumns : bool ;
mutable pagerotation : int ;
mutable crypt_method : string ;
mutable owner : string ;
mutable user : string ;
mutable no_edit : bool ;
mutable no_print : bool ;
mutable no_copy : bool ;
mutable no_annot : bool ;
mutable no_forms : bool ;
mutable no_extract : bool ;
mutable no_assemble : bool ;
mutable no_hq_print : bool ;
mutable debug : bool ;
2014-10-24 18:24:29 +02:00
mutable debugcrypt : bool ;
2018-03-20 11:19:50 +01:00
mutable debugforce : bool ;
2013-08-20 16:32:57 +02:00
mutable boxes : bool ;
mutable encrypt_metadata : bool ;
mutable retain_numbering : bool ;
2024-01-23 14:35:49 +01:00
mutable process_struct_trees : bool ;
2013-08-20 16:32:57 +02:00
mutable remove_duplicate_fonts : bool ;
mutable remove_duplicate_streams : bool ;
2021-12-19 13:55:06 +01:00
mutable encoding : Cpdfmetadata . encoding ;
2013-08-20 16:32:57 +02:00
mutable scale : float ;
mutable copyfontpage : int ;
mutable copyfontname : string option ;
mutable fast : bool ;
mutable dashrange : string ;
mutable outline : bool ;
mutable linewidth : float ;
mutable path_to_ghostscript : string ;
2020-12-11 15:13:24 +01:00
mutable path_to_im : string ;
mutable path_to_p2p : string ;
2023-12-22 20:33:10 +01:00
mutable path_to_jbig2enc : string ;
2013-08-20 16:32:57 +02:00
mutable frombox : string option ;
mutable tobox : string option ;
mutable mediabox_if_missing : bool ;
mutable topage : string option ;
mutable scale_stamp_to_fit : bool ;
2014-09-18 15:27:07 +02:00
mutable labelstyle : Pdfpagelabels . labelstyle ;
2013-10-24 16:21:54 +02:00
mutable labelprefix : string option ;
2014-09-18 16:40:22 +02:00
mutable labelstartval : int ;
2020-01-25 08:22:45 +01:00
mutable labelsprogress : bool ;
2014-09-30 15:46:05 +02:00
mutable squeeze : bool ;
2020-05-06 18:00:55 +02:00
mutable squeeze_recompress : bool ;
mutable squeeze_pagedata : bool ;
2014-10-02 20:57:06 +02:00
mutable original_filename : string ;
2014-10-16 16:28:46 +02:00
mutable was_encrypted : bool ;
2014-10-15 14:48:39 +02:00
mutable cpdflin : string option ;
2014-10-27 17:48:08 +01:00
mutable recrypt : bool ;
2015-01-22 20:16:56 +01:00
mutable was_decrypted_with_owner : bool ;
mutable creator : string option ;
2015-04-13 15:17:48 +02:00
mutable producer : string option ;
2017-12-18 20:44:02 +01:00
mutable extract_text_font_size : float option ;
2019-06-26 18:43:59 +02:00
mutable padwith : string option ;
mutable alsosetxml : bool ;
2019-06-30 15:05:20 +02:00
mutable justsetxml : bool ;
2019-07-03 15:40:32 +02:00
mutable gs_malformed : bool ;
2019-07-22 15:00:37 +02:00
mutable gs_quiet : bool ;
2019-07-03 15:40:32 +02:00
mutable merge_add_bookmarks : bool ;
2019-07-07 18:07:52 +02:00
mutable merge_add_bookmarks_use_titles : bool ;
mutable createpdf_pages : int ;
2019-07-09 17:31:45 +02:00
mutable createpdf_pagesize : Pdfpaper . t ;
2020-01-30 11:20:33 +01:00
mutable removeonly : string option ;
2020-02-01 11:18:15 +01:00
mutable jsonparsecontentstreams : bool ;
2020-02-27 14:32:45 +01:00
mutable jsonnostreamdata : bool ;
2021-10-04 19:38:36 +02:00
mutable jsondecompressstreams : bool ;
2021-12-30 16:25:24 +01:00
mutable jsoncleanstrings : bool ;
2020-02-27 14:32:45 +01:00
mutable ocgrenamefrom : string ;
2020-12-20 16:41:52 +01:00
mutable ocgrenameto : string ;
mutable dedup : bool ;
2021-10-16 16:47:41 +02:00
mutable dedup_per_page : bool ;
2021-10-18 19:19:59 +02:00
mutable collate : bool ;
mutable impose_columns : bool ;
mutable impose_rtl : bool ;
mutable impose_btt : bool ;
mutable impose_center : bool ;
mutable impose_margin : float ;
mutable impose_spacing : float ;
2021-10-26 18:32:36 +02:00
mutable impose_linewidth : float ;
2021-10-28 18:06:46 +02:00
mutable format_json : bool ;
2021-10-29 16:09:21 +02:00
mutable replace_dict_entry_value : Pdf . pdfobject ;
2021-12-02 00:50:04 +01:00
mutable dict_entry_search : Pdf . pdfobject option ;
2021-12-10 13:58:30 +01:00
mutable toc_title : string ;
2021-12-15 14:01:51 +01:00
mutable toc_bookmark : bool ;
2022-01-02 16:18:55 +01:00
mutable idir_only_pdfs : bool ;
2023-12-06 13:20:27 +01:00
mutable no_warn_rotate : bool ;
2024-02-01 17:38:07 +01:00
mutable jpegquality : float ;
mutable jpegqualitylossless : float ;
2023-12-24 14:54:21 +01:00
mutable onebppmethod : string ;
2023-12-27 20:53:02 +01:00
mutable pixel_threshold : int ;
mutable length_threshold : int ;
2024-02-01 17:38:07 +01:00
mutable percentage_threshold : float ;
mutable dpi_threshold : float ;
mutable resample_factor : float ;
2024-01-12 16:00:28 +01:00
mutable resample_interpolate : bool ;
2024-01-17 19:37:58 +01:00
mutable jbig2_lossy_threshold : float ;
2024-06-24 17:33:43 +02:00
mutable extract_stream_decompress : bool ;
2024-09-06 15:04:17 +02:00
mutable verify_single : string option ;
2024-09-11 14:57:57 +02:00
mutable draw_struct_tree : bool ;
2024-09-20 15:15:10 +02:00
mutable subformat : Cpdfua . subformat option ;
2024-09-27 14:45:18 +02:00
mutable indent : float option ;
mutable title : string option }
2013-08-20 16:32:57 +02:00
let args =
{ op = None ;
preserve_objstm = true ;
create_objstm = false ;
out = NoOutputSpecified ;
inputs = [] ;
chunksize = 1 ;
linearize = false ;
2014-11-23 20:20:41 +01:00
keeplinearize = false ;
2016-07-18 21:00:10 +02:00
rectangle = " 0 0 0 0 " ;
coord = " 0 0 " ;
2013-08-20 16:32:57 +02:00
duration = None ;
transition = None ;
horizontal = true ;
inward = true ;
direction = 0 ;
effect_duration = 1 . ;
font = StandardFont Pdftext . TimesRoman ;
fontname = " Times-Roman " ;
fontsize = 12 . ;
2022-09-21 17:10:48 +02:00
fontencoding = Pdftext . WinAnsiEncoding ;
2021-12-21 15:00:58 +01:00
color = Cpdfaddtext . RGB ( 0 . , 0 . , 0 . ) ;
2013-08-20 16:32:57 +02:00
opacity = 1 . ;
2023-04-07 16:31:21 +02:00
position = Cpdfposition . TopLeft ( 100 . , 100 . ) ;
2013-08-20 16:32:57 +02:00
underneath = false ;
linespacing = 1 . ;
midline = false ;
2015-01-20 16:50:36 +01:00
topline = false ;
2021-12-21 15:00:58 +01:00
justification = Cpdfaddtext . LeftJustify ;
2013-08-20 16:32:57 +02:00
bates = 0 ;
2015-07-17 17:34:47 +02:00
batespad = None ;
2013-08-20 16:32:57 +02:00
prerotate = false ;
relative_to_cropbox = false ;
keepversion = false ;
bycolumns = false ;
pagerotation = 0 ;
crypt_method = " " ;
owner = " " ;
user = " " ;
no_edit = false ;
no_print = false ;
no_copy = false ;
no_annot = false ;
no_forms = false ;
no_extract = false ;
no_assemble = false ;
no_hq_print = false ;
debug = false ;
2014-10-24 18:24:29 +02:00
debugcrypt = false ;
2018-03-20 11:19:50 +01:00
debugforce = false ;
2013-08-20 16:32:57 +02:00
boxes = false ;
encrypt_metadata = true ;
retain_numbering = false ;
2024-07-03 13:53:14 +02:00
process_struct_trees = false ;
2013-08-20 16:32:57 +02:00
remove_duplicate_fonts = false ;
remove_duplicate_streams = false ;
2024-01-24 15:21:11 +01:00
encoding = Cpdfmetadata . Stripped ;
2013-08-20 16:32:57 +02:00
scale = 1 . ;
copyfontpage = 1 ;
copyfontname = None ;
fast = false ;
dashrange = " all " ;
outline = false ;
linewidth = 1 . 0 ;
2024-02-14 20:30:36 +01:00
path_to_ghostscript = " " ;
2024-02-14 15:51:22 +01:00
path_to_im = " " ;
path_to_p2p = " " ;
2024-02-14 20:30:36 +01:00
path_to_jbig2enc = " " ;
2013-08-20 16:32:57 +02:00
frombox = None ;
tobox = None ;
mediabox_if_missing = false ;
topage = None ;
scale_stamp_to_fit = false ;
2014-09-18 15:27:07 +02:00
labelstyle = Pdfpagelabels . DecimalArabic ;
2013-10-24 16:21:54 +02:00
labelprefix = None ;
2014-09-18 16:40:22 +02:00
labelstartval = 1 ;
2020-01-25 08:22:45 +01:00
labelsprogress = false ;
2014-09-30 15:46:05 +02:00
squeeze = false ;
2020-05-06 18:00:55 +02:00
squeeze_recompress = true ;
squeeze_pagedata = true ;
2014-10-02 20:57:06 +02:00
original_filename = " " ;
2014-10-16 16:28:46 +02:00
was_encrypted = false ;
2014-10-15 14:48:39 +02:00
cpdflin = None ;
2014-10-27 17:48:08 +01:00
recrypt = false ;
2015-01-22 20:16:56 +01:00
was_decrypted_with_owner = false ;
producer = None ;
2015-04-13 15:17:48 +02:00
creator = None ;
2022-09-26 21:38:16 +02:00
embedstd14 = None ;
2017-12-18 20:44:02 +01:00
extract_text_font_size = None ;
2019-06-26 18:43:59 +02:00
padwith = None ;
alsosetxml = false ;
2019-06-30 15:05:20 +02:00
justsetxml = false ;
2019-07-03 15:40:32 +02:00
gs_malformed = false ;
2019-07-22 15:00:37 +02:00
gs_quiet = false ;
2019-07-03 15:40:32 +02:00
merge_add_bookmarks = false ;
2019-07-07 18:07:52 +02:00
merge_add_bookmarks_use_titles = false ;
createpdf_pages = 1 ;
2019-07-09 17:31:45 +02:00
createpdf_pagesize = Pdfpaper . a4 ;
2020-01-30 11:20:33 +01:00
removeonly = None ;
2020-02-01 11:18:15 +01:00
jsonparsecontentstreams = false ;
2020-02-27 14:32:45 +01:00
jsonnostreamdata = false ;
2021-10-04 19:38:36 +02:00
jsondecompressstreams = false ;
2021-12-30 16:25:24 +01:00
jsoncleanstrings = false ;
2020-02-27 14:32:45 +01:00
ocgrenamefrom = " " ;
2020-12-20 16:41:52 +01:00
ocgrenameto = " " ;
dedup = false ;
2021-10-16 16:47:41 +02:00
dedup_per_page = false ;
2021-10-18 19:19:59 +02:00
collate = false ;
impose_columns = false ;
impose_rtl = false ;
impose_btt = false ;
impose_center = false ;
impose_margin = 0 . ;
impose_spacing = 0 . ;
2021-10-26 18:32:36 +02:00
impose_linewidth = 0 . ;
2021-10-28 18:06:46 +02:00
format_json = false ;
2021-10-29 16:09:21 +02:00
replace_dict_entry_value = Pdf . Null ;
2021-12-02 00:50:04 +01:00
dict_entry_search = None ;
2021-12-10 13:58:30 +01:00
toc_title = " Table of Contents " ;
2021-12-15 14:01:51 +01:00
toc_bookmark = true ;
2022-01-02 16:18:55 +01:00
idir_only_pdfs = false ;
2023-12-06 13:20:27 +01:00
no_warn_rotate = false ;
2024-02-01 17:38:07 +01:00
jpegquality = 100 . ;
jpegqualitylossless = 101 . ;
2023-12-24 14:54:21 +01:00
onebppmethod = " " ;
2023-12-27 20:53:02 +01:00
pixel_threshold = 25 ;
length_threshold = 100 ;
2024-02-01 17:38:07 +01:00
percentage_threshold = 99 . ;
dpi_threshold = 0 . ;
resample_factor = 101 . ;
2024-01-12 16:00:28 +01:00
resample_interpolate = false ;
2024-01-17 19:37:58 +01:00
jbig2_lossy_threshold = 0 . 85 ;
2024-06-24 17:33:43 +02:00
extract_stream_decompress = false ;
2024-09-06 15:04:17 +02:00
verify_single = None ;
2024-09-11 14:57:57 +02:00
draw_struct_tree = false ;
2024-09-20 15:15:10 +02:00
subformat = None ;
2024-09-27 14:45:18 +02:00
indent = None ;
title = None }
2023-12-31 12:59:48 +01:00
(* Do not reset original_filename or cpdflin or was_encrypted or
was_decrypted_with_owner or recrypt or producer or creator or path_to_ * or
gs_malformed or gs_quiet or no - warn - rotate , since we want these to work
across ANDs . Or squeeze options : a little odd , but we want it to happen on
eventual output . Or - debug - force ( from v2 . 6 ) . * )
2013-08-20 16:32:57 +02:00
let reset_arguments () =
args . op <- None ;
args . preserve_objstm <- true ;
args . create_objstm <- false ;
args . out <- NoOutputSpecified ;
args . inputs <- [] ;
args . chunksize <- 1 ;
args . linearize <- false ;
2014-11-23 20:20:41 +01:00
args . keeplinearize <- false ;
2016-07-18 21:00:10 +02:00
args . rectangle <- " 0 0 0 0 " ;
args . coord <- " 0 0 " ;
2013-08-20 16:32:57 +02:00
args . duration <- None ;
args . transition <- None ;
args . horizontal <- true ;
args . inward <- true ;
args . direction <- 0 ;
args . effect_duration <- 1 . ;
args . font <- StandardFont Pdftext . TimesRoman ;
args . fontname <- " Times-Roman " ;
args . fontsize <- 12 . ;
2022-09-21 17:10:48 +02:00
args . fontencoding <- Pdftext . WinAnsiEncoding ;
2021-12-21 15:00:58 +01:00
args . color <- Cpdfaddtext . RGB ( 0 . , 0 . , 0 . ) ;
2013-08-20 16:32:57 +02:00
args . opacity <- 1 . ;
2023-04-07 16:31:21 +02:00
args . position <- Cpdfposition . TopLeft ( 100 . , 100 . ) ;
2013-08-20 16:32:57 +02:00
args . underneath <- false ;
args . linespacing <- 1 . ;
args . midline <- false ;
2015-01-20 16:50:36 +01:00
args . topline <- false ;
2021-12-21 15:00:58 +01:00
args . justification <- Cpdfaddtext . LeftJustify ;
2013-08-20 16:32:57 +02:00
args . bates <- 0 ;
2015-07-17 17:34:47 +02:00
args . batespad <- None ;
2013-08-20 16:32:57 +02:00
args . prerotate <- false ;
args . relative_to_cropbox <- false ;
args . keepversion <- false ;
args . bycolumns <- false ;
args . pagerotation <- 0 ;
args . crypt_method <- " " ;
args . owner <- " " ;
args . user <- " " ;
args . no_edit <- false ;
args . no_print <- false ;
args . no_copy <- false ;
args . no_annot <- false ;
args . no_forms <- false ;
args . no_extract <- false ;
args . no_assemble <- false ;
args . no_hq_print <- false ;
args . debug <- false ;
2014-10-24 18:24:29 +02:00
args . debugcrypt <- false ;
2013-08-20 16:32:57 +02:00
args . boxes <- false ;
args . encrypt_metadata <- true ;
args . retain_numbering <- false ;
2024-07-03 13:53:14 +02:00
args . process_struct_trees <- false ;
2013-08-20 16:32:57 +02:00
args . remove_duplicate_fonts <- false ;
args . remove_duplicate_streams <- false ;
2024-01-24 15:21:11 +01:00
args . encoding <- Cpdfmetadata . Stripped ;
2013-08-20 16:32:57 +02:00
args . scale <- 1 . ;
args . copyfontpage <- 1 ;
args . copyfontname <- None ;
args . fast <- false ;
args . dashrange <- " all " ;
args . outline <- false ;
args . linewidth <- 1 . 0 ;
args . frombox <- None ;
args . tobox <- None ;
args . mediabox_if_missing <- false ;
args . topage <- None ;
args . scale_stamp_to_fit <- false ;
2014-09-18 15:27:07 +02:00
args . labelstyle <- Pdfpagelabels . DecimalArabic ;
2013-10-24 16:21:54 +02:00
args . labelprefix <- None ;
2014-09-18 16:40:22 +02:00
args . labelstartval <- 1 ;
2020-01-25 08:22:45 +01:00
args . labelsprogress <- false ;
2022-09-26 21:38:16 +02:00
args . embedstd14 <- None ;
2017-12-18 20:44:02 +01:00
args . extract_text_font_size <- None ;
2019-06-26 18:43:59 +02:00
args . padwith <- None ;
args . alsosetxml <- false ;
2019-07-03 15:40:32 +02:00
args . justsetxml <- false ;
args . merge_add_bookmarks <- false ;
2019-07-07 18:07:52 +02:00
args . merge_add_bookmarks_use_titles <- false ;
args . createpdf_pages <- 1 ;
2019-07-09 17:31:45 +02:00
args . createpdf_pagesize <- Pdfpaper . a4 ;
2020-01-30 11:20:33 +01:00
args . removeonly <- None ;
2020-02-01 11:18:15 +01:00
args . jsonparsecontentstreams <- false ;
2020-02-27 14:32:45 +01:00
args . jsonnostreamdata <- false ;
2021-10-04 19:38:36 +02:00
args . jsondecompressstreams <- false ;
2021-12-30 16:25:24 +01:00
args . jsoncleanstrings <- false ;
2020-02-27 14:32:45 +01:00
args . ocgrenamefrom <- " " ;
2020-12-20 16:41:52 +01:00
args . ocgrenameto <- " " ;
args . dedup <- false ;
2021-10-16 16:47:41 +02:00
args . dedup_per_page <- false ;
2021-10-18 19:19:59 +02:00
args . collate <- false ;
args . impose_columns <- false ;
args . impose_rtl <- false ;
args . impose_btt <- false ;
args . impose_center <- false ;
args . impose_margin <- 0 . ;
args . impose_spacing <- 0 . ;
2021-10-26 18:32:36 +02:00
args . impose_linewidth <- 0 . ;
2021-10-28 18:06:46 +02:00
args . format_json <- false ;
2021-10-29 16:09:21 +02:00
args . replace_dict_entry_value <- Pdf . Null ;
2021-12-02 00:50:04 +01:00
args . dict_entry_search <- None ;
2021-12-10 13:58:30 +01:00
args . toc_title <- " Table of Contents " ;
2021-12-15 14:01:51 +01:00
args . toc_bookmark <- true ;
2023-05-04 14:53:49 +02:00
args . idir_only_pdfs <- false ;
2024-02-01 17:38:07 +01:00
args . jpegquality <- 100 . ;
args . jpegqualitylossless <- 101 . ;
2023-12-22 17:45:53 +01:00
args . onebppmethod <- " " ;
2023-12-27 20:53:02 +01:00
args . pixel_threshold <- 25 ;
args . length_threshold <- 100 ;
2024-02-01 17:38:07 +01:00
args . percentage_threshold <- 99 . ;
args . dpi_threshold <- 0 . ;
args . resample_factor <- 101 . ;
2023-12-31 12:59:48 +01:00
args . resample_interpolate <- false ;
2024-01-12 16:00:28 +01:00
args . jbig2_lossy_threshold <- 0 . 85 ;
2024-01-17 19:37:58 +01:00
args . extract_stream_decompress <- false ;
2024-06-24 17:33:43 +02:00
clear Cpdfdrawcontrol . fontpack_initialised ;
2024-09-06 15:04:17 +02:00
args . verify_single <- None ;
2024-09-11 14:57:57 +02:00
args . draw_struct_tree <- false ;
2024-09-20 15:15:10 +02:00
args . subformat <- None ;
2024-09-27 14:45:18 +02:00
args . indent <- None ;
args . title <- None
2014-10-27 17:48:08 +01:00
2021-12-18 17:14:31 +01:00
(* Prefer a ) the one given with -cpdflin b ) a local cpdflin, c ) otherwise assume
installed at a system place * )
let find_cpdflin provided =
match provided with
Some x -> x
| None ->
let dotslash = match Sys . os_type with " Win32 " -> " " | _ -> " ./ " in
if Sys . file_exists " cpdflin " then ( dotslash ^ " cpdflin " ) else
if Sys . file_exists " cpdflin.exe " then ( dotslash ^ " cpdflin.exe " ) else
match Sys . os_type with
" Win32 " -> " cpdflin.exe "
| _ -> " cpdflin "
(* Call cpdflin, given the ( temp ) input name, the output name, and the location
of the cpdflin binary . Returns the exit code . * )
let call_cpdflin cpdflin temp output best_password =
let command =
2023-02-21 15:50:07 +01:00
Filename . quote_command cpdflin
[ " --linearize " ; ( " --password= " ^ best_password ) ; temp ; output ]
2021-12-18 17:14:31 +01:00
in
match Sys . os_type with
" Win32 " ->
(* On windows, don't use LD_LIBRARY_PATH - it will happen automatically *)
2023-04-25 14:45:56 +02:00
if args . debug then Pdfe . log ( command ^ " \n " ) ;
2021-12-18 17:14:31 +01:00
Sys . command command
| _ ->
(* On other platforms, if -cpdflin was provided, or cpdflin was in the
current folder , set up LD_LIBRARY_PATH : * )
match cpdflin with
" cpdflin " ->
2023-04-25 14:45:56 +02:00
if args . debug then Pdfe . log ( command ^ " \n " ) ;
2021-12-18 17:14:31 +01:00
Sys . command command
| _ ->
let command =
2023-02-21 15:50:07 +01:00
" DYLD_FALLBACK_LIBRARY_PATH= " ^ Filename . quote ( Filename . dirname cpdflin ) ^ " " ^
" LD_LIBRARY_PATH= " ^ Filename . quote ( Filename . dirname cpdflin ) ^ " " ^
2021-12-18 17:14:31 +01:00
command
in
2023-04-25 14:45:56 +02:00
if args . debug then Pdfe . log ( command ^ " \n " ) ;
2021-12-18 17:14:31 +01:00
Sys . command command
2015-07-26 13:31:43 +02:00
let get_pagespec () =
match args . inputs with
| ( _ , ps , _ , _ , _ , _ ) :: _ -> ps
| _ -> error " No range specified for input, or specified too late. "
2014-10-27 17:48:08 +01:00
let string_of_permission = function
| Pdfcrypt . NoEdit -> " No edit "
| Pdfcrypt . NoPrint -> " No print "
| Pdfcrypt . NoCopy -> " No copy "
| Pdfcrypt . NoAnnot -> " No annotate "
| Pdfcrypt . NoForms -> " No edit forms "
| Pdfcrypt . NoExtract -> " No extract "
| Pdfcrypt . NoAssemble -> " No assemble "
| Pdfcrypt . NoHqPrint -> " No high-quality print "
let getpermissions pdf =
fold_left
( fun x y -> if x = " " then x ^ y else x ^ " , " ^ y )
" "
( map string_of_permission ( Pdfread . permissions pdf ) )
2013-08-20 16:32:57 +02:00
let banlist_of_args () =
let l = ref [] in
if args . no_edit then l = | Pdfcrypt . NoEdit ;
if args . no_print then l = | Pdfcrypt . NoPrint ;
if args . no_copy then l = | Pdfcrypt . NoCopy ;
if args . no_annot then l = | Pdfcrypt . NoAnnot ;
if args . no_forms then l = | Pdfcrypt . NoForms ;
if args . no_extract then l = | Pdfcrypt . NoExtract ;
if args . no_assemble then l = | Pdfcrypt . NoAssemble ;
if args . no_hq_print then l = | Pdfcrypt . NoHqPrint ;
! l
(* If a file is encrypted, decrypt it using the owner password or, if not
present , the user password . If the user password is used , the operation to be
performed is checked to see if it's allowable under the permissions regime . * )
(* The bans. Each function has a list of bans. If any of these is present in the
bans list in the input file , the operation cannot proceed . Other operations
cannot proceed at all without owner password . * )
let banned banlist = function
2023-02-15 22:28:44 +01:00
| Fonts | Info | Metadata | PageInfo | CountPages
2021-10-27 19:55:52 +02:00
| ListAttachedFiles | ListAnnotations
2023-11-09 12:25:19 +01:00
| ListBookmarks | ImageResolution _ | ListImages | MissingFonts
2014-10-27 17:48:08 +01:00
| PrintPageLabels | Clean | Compress | Decompress
2023-04-12 20:37:30 +02:00
| ChangeId | CopyId _ | ListSpotColours | Version
2019-07-15 12:52:14 +02:00
| DumpAttachedFiles | RemoveMetadata | EmbedMissingFonts | BookmarksOpenToLevel _ | CreatePDF
2023-04-17 20:55:11 +02:00
| SetPageMode _ | SetNonFullScreenPageMode _ | HideToolbar _ | HideMenubar _ | HideWindowUI _
2019-08-01 15:34:45 +02:00
| FitWindow _ | CenterWindow _ | DisplayDocTitle _
2023-04-18 15:42:17 +02:00
| RemoveId | OpenAtPageFit _ | OpenAtPage _ | OpenAtPageCustom _ | SetPageLayout _
2019-08-18 12:25:55 +02:00
| ShowBoxes | TrimMarks | CreateMetadata | SetMetadataDate _ | SetVersion _
| SetAuthor _ | SetTitle _ | SetSubject _ | SetKeywords _ | SetCreate _
2021-10-29 16:09:21 +02:00
| SetModify _ | SetCreator _ | SetProducer _ | RemoveDictEntry _ | ReplaceDictEntry _ | PrintDictEntry _ | SetMetadata _
2023-11-02 19:49:15 +01:00
| ExtractText | ExtractImages | ExtractFontFile _
2020-02-26 17:24:27 +01:00
| AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce
2023-03-03 17:02:16 +01:00
| OCGRename | OCGList | OCGOrderAll | PrintFontEncoding _ | TableOfContents | Typeset _ | Composition _
2024-09-02 17:30:02 +02:00
| TextWidth _ | SetAnnotations _ | CopyAnnotations _ | ExtractStream _ | PrintObj _ | ReplaceObj _
2024-06-27 15:21:47 +02:00
| Verify _ | MarkAs _ | RemoveMark _ | ExtractStructTree | ReplaceStructTree _ | SetLanguage _
| PrintStructTree
2019-08-18 12:25:55 +02:00
-> false (* Always allowed *)
2014-11-16 17:01:12 +01:00
(* Combine pages is not allowed because we would not know where to get the
- recrypt from - - the first or second file ? * )
| Decrypt | Encrypt | CombinePages _ -> true (* Never allowed *)
2019-07-01 16:35:17 +02:00
| AddBookmarks _ | PadBefore | PadAfter | PadEvery _ | PadMultiple _ | PadMultipleBefore _
2023-10-30 17:36:41 +01:00
| Merge | Split | SplitOnBookmarks _ | SplitMax _ | Spray | RotateContents _ | Rotate _
2024-09-21 19:44:43 +02:00
| Rotateby _ | Upright | VFlip | HFlip | Impose _ | Chop _ | ChopHV _ | Redact ->
2019-08-01 15:34:45 +02:00
mem Pdfcrypt . NoAssemble banlist
2024-09-21 18:35:29 +02:00
| TwoUp | TwoUpStack | RemoveBookmarks | AddRectangle | RemoveText |
Draft | Shift | ShiftBoxes | Scale | ScaleToFit | Stretch | RemoveAttachedFiles |
2019-06-26 15:43:24 +02:00
RemoveAnnotations | RemoveFonts | Crop | RemoveCrop | Trim | RemoveTrim | Bleed | RemoveBleed | Art | RemoveArt |
2023-05-29 14:05:37 +02:00
CopyBox | MediaBox | HardBox _ | SetTrapped | SetUntrapped | Presentation |
2021-10-02 14:21:06 +02:00
BlackText | BlackLines | BlackFills | CopyFont _ | StampOn _ | StampUnder _ | StampAsXObject _ |
2023-05-29 14:05:37 +02:00
AddText _ | ScaleContents _ | AttachFile _ | ThinLines _ | RemoveClipping | RemoveAllText
2023-12-06 13:20:27 +01:00
| Prepend _ | Postpend _ | Draw | ProcessImages ->
2016-11-03 18:11:08 +01:00
mem Pdfcrypt . NoEdit banlist
2013-08-20 16:32:57 +02:00
2014-10-27 17:48:08 +01:00
let operation_allowed pdf banlist op =
2018-03-20 11:19:50 +01:00
args . debugforce | |
2014-10-15 18:51:15 +02:00
match op with
2014-10-27 17:48:08 +01:00
| None ->
2014-10-28 19:40:56 +01:00
if args . debugcrypt then Printf . printf " operation is None, so allowed! \n " ;
2014-10-27 17:48:08 +01:00
true (* Merge *) (* changed to allow it *)
| Some op ->
if args . debugcrypt then Printf . printf " operation_allowed: op = %s \n " ( string_of_op op ) ;
if args . debugcrypt then Printf . printf " Permissions: %s \n " ( getpermissions pdf ) ;
not ( banned banlist op )
2013-08-20 16:32:57 +02:00
2024-02-27 16:57:31 +01:00
let decrypt_if_necessary ( _ , _ , user_pw , owner_pw , was_dec_with_owner , _ ) op pdf =
2014-10-24 18:24:29 +02:00
if args . debugcrypt then
begin match op with
None -> flprint " decrypt_if_necessary: op = None \n "
| Some x -> Printf . printf " decrypt_if_necessary: op = %s \n " ( string_of_op x )
end ;
2013-08-20 16:32:57 +02:00
if not ( Pdfcrypt . is_encrypted pdf ) then pdf else
2014-11-17 15:48:56 +01:00
match op with Some ( CombinePages _ ) ->
(* This is a hack because we don't have support for recryption on combine
2023-03-03 17:02:16 +01:00
* pages . This is prevented by permissions above , but in the case that the
2014-11-17 15:48:56 +01:00
* owner password is blank ( e . g christmas_tree_lights . pdf ) , we would end
* up here . * )
2021-10-12 19:58:37 +02:00
soft_error " Combine pages: both files must be unencrypted for this operation, or add -decrypt-force "
2014-11-17 15:48:56 +01:00
| _ ->
match Pdfcrypt . decrypt_pdf_owner owner_pw pdf with
| Some pdf ->
args . was_decrypted_with_owner <- true ;
was_dec_with_owner := true ;
if args . debugcrypt then Printf . printf " Managed to decrypt with owner password \n " ;
pdf
2013-08-20 16:32:57 +02:00
| _ ->
2014-11-17 15:48:56 +01:00
if args . debugcrypt then Printf . printf " Couldn't decrypt with owner password %s \n " owner_pw ;
2019-10-26 15:10:03 +02:00
match
if args . debugcrypt then Printf . printf " call decrypt_pdf user \n " ;
let r = Pdfcrypt . decrypt_pdf user_pw pdf in
if args . debugcrypt then Printf . printf " returned from decrypt_pdf \n " ;
r
with
2014-11-17 15:48:56 +01:00
| Some pdf , permissions ->
if args . debugcrypt then Printf . printf " Managed to decrypt with user password \n " ;
if operation_allowed pdf permissions op
then pdf
2021-10-12 19:58:37 +02:00
else soft_error " User password cannot give permission for this operation. Supply owner or add -decrypt-force. "
2014-11-17 15:48:56 +01:00
| _ ->
2019-10-26 15:10:03 +02:00
if args . debugcrypt then Printf . printf " Failed to decrypt with user password: raising soft_error " ;
2014-11-17 15:48:56 +01:00
soft_error " Failed to decrypt file: wrong password? "
2013-08-20 16:32:57 +02:00
(* Output Page Count *)
let output_page_count pdf =
2024-01-13 18:30:06 +01:00
Printf . printf " %i \n " ( ( if args . fast then Pdfpage . endpage_fast else Pdfpage . endpage ) pdf )
2013-08-20 16:32:57 +02:00
2019-07-08 13:44:27 +02:00
let detect_duplicate_op op =
2019-09-26 14:30:22 +02:00
match args . op with
None | Some Shift -> ()
| _ ->
2023-04-25 14:45:56 +02:00
Pdfe . log ( Printf . sprintf " Operation %s already specified, so cannot specify operation %s. \n Use AND from Chapter 1 of the manual to chain commands together. \n "
( string_of_op ( unopt args . op ) ) ( string_of_op op ) ) ;
2019-07-08 13:44:27 +02:00
exit 1
2013-08-20 16:32:57 +02:00
let setop op () =
2019-07-08 13:44:27 +02:00
detect_duplicate_op op ;
2013-08-20 16:32:57 +02:00
args . op <- Some op
let setout name =
2023-10-30 17:36:41 +01:00
args . out <- File name ;
spray_outputs := name :: ! spray_outputs
2013-08-20 16:32:57 +02:00
let setchunk c =
if c > 0
then args . chunksize <- c
else error " invalid chunk size "
let fixdashes s =
let bufferdashes chars =
let buf = ref [] in
iter
( function '-' -> buf = @ [ ' ' ; '-' ; ' ' ] | x -> buf = | x )
chars ;
rev ! buf
in
let chars = explode s in
implode ( bufferdashes chars )
2024-01-16 15:54:45 +01:00
let set_input_image f s =
try
let fh = open_in_bin s in
2024-09-30 19:26:39 +02:00
let pdf = Cpdfimage . image_of_input ? subformat : args . subformat ? title : args . title ~ process_struct_tree : args . process_struct_trees f ( Pdfio . input_of_channel fh ) in
2024-01-16 15:54:45 +01:00
begin try close_in fh with _ -> () end ;
args . original_filename <- s ;
args . create_objstm <- true ;
args . inputs <- ( AlreadyInMemory ( pdf , s ) , " all " , " " , " " , ref false , None ) :: args . inputs
with
Sys_error _ -> error " Image file not found "
let jbig2_global = ref None
let set_input_png s = set_input_image ( fun () -> Cpdfimage . obj_of_png_data ) s
let set_input_jpeg s = set_input_image ( fun () -> Cpdfimage . obj_of_jpeg_data ) s
2024-03-22 14:57:04 +01:00
let set_input_jpeg2000 s = set_input_image ( fun () -> Cpdfimage . obj_of_jpeg2000_data ) s
2024-01-16 15:54:45 +01:00
let set_input_jbig2 s =
set_input_image
( fun () -> Cpdfimage . obj_of_jbig2_data ? global : ! jbig2_global ) s ;
args . remove_duplicate_streams <- true
2024-01-23 19:37:35 +01:00
let encrypt_to_collect = ref 0
let setmethod s =
detect_duplicate_op Encrypt ;
if args . op = None then args . op <- Some Encrypt ; (* Could be additional to -split *)
match s with
| " 40bit " | " 128bit " | " AES " | " AES256 " | " AES256ISO " -> args . crypt_method <- s
| _ -> error ( " Unsupported encryption method " ^ s )
2024-01-16 15:54:45 +01:00
2013-08-20 16:32:57 +02:00
let anon_fun s =
try
match ! encrypt_to_collect with
| 3 -> setmethod s ; decr encrypt_to_collect
2024-01-23 19:37:35 +01:00
| 2 -> args . owner <- s ; decr encrypt_to_collect
| 1 -> args . user <- s ; decr encrypt_to_collect
2013-08-20 16:32:57 +02:00
| 0 ->
let before , after = cleavewhile ( neq '=' ) ( explode s ) in
begin match implode before with
| " user " ->
begin match args . inputs with
| [] -> ()
2014-12-11 19:17:02 +01:00
| ( a , b , _ , e , f , g ) :: more ->
args . inputs <- ( a , b , implode ( tl after ) , e , f , g ) :: more
2013-08-20 16:32:57 +02:00
end
| " owner " ->
begin match args . inputs with
| [] -> ()
2014-12-11 19:17:02 +01:00
| ( a , b , d , _ , f , g ) :: more ->
args . inputs <- ( a , b , d , implode ( tl after ) , f , g ) :: more
2013-08-20 16:32:57 +02:00
end
| _ -> raise Not_found
end
| _ -> assert false
with
Not_found ->
try
ignore ( String . index s '.' ) ;
2024-01-16 15:54:45 +01:00
begin match rev ( explode s ) with
| a :: b :: c :: d :: e :: '.' :: r when implode ( map Char . uppercase_ascii [ e ; d ; c ; b ; a ] ) = " JBIG2 " -> set_input_jbig2 s
| a :: b :: c :: d :: '.' :: r when implode ( map Char . uppercase_ascii [ d ; c ; b ; a ] ) = " JPEG " -> set_input_jpeg s
| a :: b :: c :: '.' :: r when implode ( map Char . uppercase_ascii [ c ; b ; a ] ) = " JPG " -> set_input_jpeg s
2024-03-22 14:57:04 +01:00
| a :: b :: c :: '.' :: r when implode ( map Char . uppercase_ascii [ c ; b ; a ] ) = " JP2 " -> set_input_jpeg2000 s
| a :: b :: c :: '.' :: r when implode ( map Char . uppercase_ascii [ c ; b ; a ] ) = " JPX " -> set_input_jpeg2000 s
2024-03-22 16:24:48 +01:00
| a :: b :: c :: '.' :: r when implode ( map Char . uppercase_ascii [ c ; b ; a ] ) = " JPF " -> set_input_jpeg2000 s
2024-01-16 15:54:45 +01:00
| a :: b :: c :: '.' :: r when implode ( map Char . uppercase_ascii [ c ; b ; a ] ) = " PNG " -> set_input_png s
| _ -> args . inputs <- ( InFile s , " all " , " " , " " , ref false , None ) :: args . inputs
end ;
2015-01-11 17:10:18 +01:00
args . original_filename <- s
2013-08-20 16:32:57 +02:00
with
Not_found ->
match args . inputs with
2021-12-15 14:51:26 +01:00
| [] ->
2023-05-12 16:33:28 +02:00
Pdfe . log ( Printf . sprintf " Warning: '%s' ignored \n " s )
2014-12-11 19:17:02 +01:00
| ( a , _ , d , e , f , g ) :: t ->
args . inputs <- ( a , fixdashes s , d , e , f , g ) :: t
2013-08-20 16:32:57 +02:00
2021-01-06 14:41:14 +01:00
(* If a password begins with a dash, we allow -pw=<password> too *)
let setdashpassword = anon_fun
2013-08-20 16:32:57 +02:00
(* Setting operations *)
let setcrop s =
setop Crop () ;
2016-07-18 21:00:10 +02:00
args . rectangle <- s
2013-08-20 16:32:57 +02:00
2019-06-26 15:43:24 +02:00
let settrim s =
setop Trim () ;
args . rectangle <- s
let setbleed s =
setop Bleed () ;
args . rectangle <- s
let setart s =
setop Art () ;
args . rectangle <- s
2013-08-20 16:32:57 +02:00
let setmediabox s =
setop MediaBox () ;
2016-07-18 21:00:10 +02:00
args . rectangle <- s
2013-08-20 16:32:57 +02:00
let setrectangle s =
setop AddRectangle () ;
2016-07-18 21:00:10 +02:00
args . coord <- s
2013-08-20 16:32:57 +02:00
let setrotate i =
if i = 0 | | i = 90 | | i = 180 | | i = 270
then setop ( Rotate i ) ()
else error " bad rotation "
let setrotateby i =
if i = 0 | | i = 90 | | i = 180 | | i = 270
then setop ( Rotateby i ) ()
else error " bad rotation "
let hidetoolbar b =
try setop ( HideToolbar ( bool_of_string b ) ) () with
_ -> failwith " HideToolBar: must use true or false "
let hidemenubar b =
try setop ( HideMenubar ( bool_of_string b ) ) () with
_ -> failwith " HideMenuBar: must use true or false "
let hidewindowui b =
try setop ( HideWindowUI ( bool_of_string b ) ) () with
_ -> failwith " HideWindowUI: must use true or false "
let fitwindow b =
try setop ( FitWindow ( bool_of_string b ) ) () with
_ -> failwith " FitWindow: must use true or false "
let centerwindow b =
try setop ( CenterWindow ( bool_of_string b ) ) () with
_ -> failwith " CenterWindow: must use true or false "
let displaydoctitle b =
try setop ( DisplayDocTitle ( bool_of_string b ) ) () with
_ -> failwith " DisplayDocTitle: must use true or false "
2023-10-25 19:44:29 +02:00
let read_file_size s =
2024-02-14 22:27:04 +01:00
let read_int s' =
try int_of_string ( implode ( rev s' ) ) with
_ -> error ( Printf . sprintf " Could not read file size specification %s " s )
in
2023-10-26 14:46:51 +02:00
match rev ( explode ( String . uppercase_ascii s ) ) with
2023-10-26 18:33:10 +02:00
| 'B' :: 'I' :: 'G' :: s -> 1024 * 1024 * 1024 * read_int s
| 'B' :: 'G' :: s -> 1000 * 1000 * 1000 * read_int s
| 'B' :: 'I' :: 'M' :: s -> 1024 * 1024 * read_int s
| 'B' :: 'M' :: s -> 1000 * 1000 * read_int s
| 'B' :: 'I' :: 'K' :: s -> 1024 * read_int s
| 'B' :: 'K' :: s -> 1000 * read_int s
2023-10-25 19:44:29 +02:00
| s -> read_int s
let setsplitmax i = setop ( SplitMax ( read_file_size i ) ) ()
2013-08-20 16:32:57 +02:00
let setstdout () = args . out <- Stdout
2014-12-11 19:17:02 +01:00
let setstdin () = args . inputs <- [ StdIn , " all " , " " , " " , ref false , None ]
2013-08-20 16:32:57 +02:00
let settrans s = args . transition <- Some s
let setduration f = args . duration <- Some f
let setvertical () = args . horizontal <- false
let setoutward () = args . inward <- false
let setdirection i =
args . direction <-
match i with
| 0 | 90 | 180 | 270 | 315 -> i
| _ -> error " Bad direction "
let seteffectduration f = args . effect_duration <- f
let setcopyid s = setop ( CopyId s ) ()
2019-09-26 12:44:54 +02:00
let setthinlines s = setop ( ThinLines ( Cpdfcoord . parse_single_number empty s ) ) ()
2013-08-20 16:32:57 +02:00
let setcopyannotations s = setop ( CopyAnnotations s ) ()
2023-01-13 07:30:46 +01:00
let setsetannotations s = setop ( SetAnnotations s ) ()
2013-08-20 16:32:57 +02:00
let setshift s =
setop Shift () ;
2016-07-18 21:00:10 +02:00
args . coord <- s
2013-08-20 16:32:57 +02:00
2024-01-22 17:36:37 +01:00
let setshiftboxes s =
setop ShiftBoxes () ;
args . coord <- s
2013-08-20 16:32:57 +02:00
let setscale s =
setop Scale () ;
2016-07-18 21:00:10 +02:00
args . coord <- s
2013-08-20 16:32:57 +02:00
let setscaletofit s =
setop ScaleToFit () ;
2016-07-18 21:00:10 +02:00
args . coord <- s
2013-08-20 16:32:57 +02:00
2024-09-21 18:35:29 +02:00
let setstretch s =
setop Stretch () ;
args . coord <- s
2013-08-20 16:32:57 +02:00
let setattachfile s =
match args . op with
| Some ( AttachFile t ) ->
args . op <- Some ( AttachFile ( s :: t ) )
2019-07-08 13:44:27 +02:00
| None ->
2013-08-20 16:32:57 +02:00
setop ( AttachFile [ s ] ) ()
2019-07-08 13:44:27 +02:00
| Some _ -> detect_duplicate_op ( AttachFile [ s ] )
2013-08-20 16:32:57 +02:00
2016-11-13 15:02:09 +01:00
let setextracttextfontsize f =
args . extract_text_font_size <- Some f
2024-10-03 18:59:10 +02:00
let setfontsize s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
if f > 0 . then args . fontsize <- f else error " Negative font size specified "
let setlinewidth s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
if f > 0 . then args . linewidth <- f else error " Negative line width specified "
let setimposemargin s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
args . impose_margin <- f
let setimposelinewidth s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
if f > 0 . then args . impose_linewidth <- f else error " Negative impose line width specified "
let setimposespacing s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
args . impose_spacing <- f
let setleading s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
Cpdfdrawcontrol . addop ( Cpdfdraw . Leading f )
let setcharspace s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
Cpdfdrawcontrol . addop ( Cpdfdraw . CharSpace f )
let setwordspace s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
Cpdfdrawcontrol . addop ( Cpdfdraw . WordSpace f )
let setrise s =
let f = Cpdfcoord . parse_single_number ( Pdf . empty () ) s in
Cpdfdrawcontrol . addop ( Cpdfdraw . Rise f )
2013-08-20 16:32:57 +02:00
let setaddtext s =
setop ( AddText s ) ()
let setcolor s =
2024-03-04 16:16:14 +01:00
args . color <- Cpdfdrawcontrol . parse_colour s
2013-08-20 16:32:57 +02:00
let setopacity o =
args . opacity <- o
let setaddbookmarks s =
setop ( AddBookmarks s ) ()
2021-10-26 18:32:36 +02:00
let setaddbookmarksjson s =
setop ( AddBookmarks s ) () ;
2021-10-27 19:55:52 +02:00
args . format_json <- true
2021-10-26 18:32:36 +02:00
2023-10-31 16:50:23 +01:00
let setlistfontsjson () =
setop Fonts () ;
args . format_json <- true
let setinfojson () =
setop Info () ;
args . format_json <- true
let setpageinfojson () =
setop PageInfo () ;
args . format_json <- true
let setprintpagelabelsjson () =
setop PrintPageLabels () ;
args . format_json <- true
2021-10-27 19:55:52 +02:00
let setlistbookmarksjson () =
2021-10-26 18:32:36 +02:00
setop ListBookmarks () ;
2021-10-27 19:55:52 +02:00
args . format_json <- true
let setlistannotationsjson () =
2021-10-28 16:34:03 +02:00
setop ListAnnotations () ;
2021-10-27 19:55:52 +02:00
args . format_json <- true
2021-10-26 18:32:36 +02:00
2013-08-20 16:32:57 +02:00
let setstampon f =
2014-10-06 15:32:53 +02:00
setop ( StampOn f ) () ;
(* Due to an earlier bad decision ( default position ) , we have this nasty hack *)
2023-04-07 16:31:21 +02:00
if args . position = Cpdfposition . TopLeft ( 100 . , 100 . ) then args . position <- Cpdfposition . BottomLeft ( 0 . , 0 . )
2013-08-20 16:32:57 +02:00
let setstampunder f =
2014-10-06 15:32:53 +02:00
setop ( StampUnder f ) () ;
2023-04-07 16:31:21 +02:00
if args . position = Cpdfposition . TopLeft ( 100 . , 100 . ) then args . position <- Cpdfposition . BottomLeft ( 0 . , 0 . )
2013-08-20 16:32:57 +02:00
2020-03-18 15:01:27 +01:00
let setstampasxobject f =
setop ( StampAsXObject f ) ()
2013-08-20 16:32:57 +02:00
let setcombinepages f =
setop ( CombinePages f ) ()
let setposcenter s =
2019-09-26 12:44:54 +02:00
let x , y = Cpdfcoord . parse_coordinate empty s in
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . PosCentre ( x , y )
2013-08-20 16:32:57 +02:00
let setposleft s =
2019-09-26 12:44:54 +02:00
let x , y = Cpdfcoord . parse_coordinate empty s in
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . PosLeft ( x , y )
2013-08-20 16:32:57 +02:00
let setposright s =
2019-09-26 12:44:54 +02:00
let x , y = Cpdfcoord . parse_coordinate empty s in
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . PosRight ( x , y )
2013-08-20 16:32:57 +02:00
let settop n =
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . Top ( Cpdfcoord . parse_single_number empty n ) ;
2021-12-21 15:00:58 +01:00
args . justification <- Cpdfaddtext . CentreJustify
2013-08-20 16:32:57 +02:00
let settopleft n =
2023-04-07 16:31:21 +02:00
let coord =
match Cpdfcoord . parse_coordinate empty n with
| ( a , b ) -> Cpdfposition . TopLeft ( a , b )
2023-06-07 22:52:01 +02:00
| exception _ ->
let x = Cpdfcoord . parse_single_number empty n in
Cpdfposition . TopLeft ( x , x )
2023-04-07 16:31:21 +02:00
in
args . position <- coord ;
args . justification <- Cpdfaddtext . LeftJustify
2013-08-20 16:32:57 +02:00
let settopright n =
2023-04-07 16:31:21 +02:00
let coord =
match Cpdfcoord . parse_coordinate empty n with
| ( a , b ) -> Cpdfposition . TopRight ( a , b )
2023-06-07 22:52:01 +02:00
| exception _ ->
let x = Cpdfcoord . parse_single_number empty n in
Cpdfposition . TopRight ( x , x )
2023-04-07 16:31:21 +02:00
in
args . position <- coord ;
args . justification <- Cpdfaddtext . RightJustify
2013-08-20 16:32:57 +02:00
let setleft n =
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . Left ( Cpdfcoord . parse_single_number empty n ) ;
2021-12-21 15:00:58 +01:00
args . justification <- Cpdfaddtext . LeftJustify
2013-08-20 16:32:57 +02:00
let setbottomleft n =
2023-04-07 16:31:21 +02:00
let coord =
match Cpdfcoord . parse_coordinate empty n with
| ( a , b ) -> Cpdfposition . BottomLeft ( a , b )
2023-06-07 22:52:01 +02:00
| exception _ ->
let x = Cpdfcoord . parse_single_number empty n in
Cpdfposition . BottomLeft ( x , x )
2023-04-07 16:31:21 +02:00
in
args . position <- coord ;
args . justification <- Cpdfaddtext . LeftJustify
2013-08-20 16:32:57 +02:00
let setbottom n =
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . Bottom ( Cpdfcoord . parse_single_number empty n ) ;
2021-12-21 15:00:58 +01:00
args . justification <- Cpdfaddtext . CentreJustify
2013-08-20 16:32:57 +02:00
let setbottomright n =
2023-04-07 16:31:21 +02:00
let coord =
match Cpdfcoord . parse_coordinate empty n with
| ( a , b ) -> Cpdfposition . BottomRight ( a , b )
2023-06-07 22:52:01 +02:00
| exception _ ->
let x = Cpdfcoord . parse_single_number empty n in
Cpdfposition . BottomRight ( x , x )
2023-04-07 16:31:21 +02:00
in
args . position <- coord ;
args . justification <- Cpdfaddtext . RightJustify
2013-08-20 16:32:57 +02:00
let setright n =
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . Right ( Cpdfcoord . parse_single_number empty n ) ;
2021-12-21 15:00:58 +01:00
args . justification <- Cpdfaddtext . RightJustify
2013-08-20 16:32:57 +02:00
let setdiagonal n =
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . Diagonal ;
2021-12-21 15:00:58 +01:00
args . justification <- Cpdfaddtext . CentreJustify
2013-08-20 16:32:57 +02:00
let setreversediagonal n =
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . ReverseDiagonal ;
2021-12-21 15:00:58 +01:00
args . justification <- Cpdfaddtext . CentreJustify
2013-08-20 16:32:57 +02:00
let setcenter n =
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . Centre ;
2021-12-21 15:00:58 +01:00
args . justification <- Cpdfaddtext . CentreJustify
2014-10-03 16:55:03 +02:00
2015-07-26 13:31:43 +02:00
(* Calculate -bates automatically so that n is applied to the first page in the range *)
let setbatesrange n =
let first_page =
2021-08-12 21:14:48 +02:00
let range = Cpdfpagespec . parse_pagespec_without_pdf ( get_pagespec () ) in
2015-07-26 13:31:43 +02:00
fold_left min max_int range
in
args . bates <- n + 1 - first_page
2013-08-20 16:32:57 +02:00
let set_input s =
2014-09-30 15:46:05 +02:00
args . original_filename <- s ;
2015-01-11 17:10:18 +01:00
args . inputs <- ( InFile s , " all " , " " , " " , ref false , None ) :: args . inputs
2013-08-20 16:32:57 +02:00
2021-10-01 13:16:55 +02:00
let set_json_input s =
args . original_filename <- s ;
2022-01-08 17:20:26 +01:00
args . create_objstm <- true ;
2021-10-01 13:16:55 +02:00
let fh = open_in_bin s in
2021-10-01 23:05:43 +02:00
let pdf = Cpdfjson . of_input ( Pdfio . input_of_channel fh ) in
2021-10-01 13:16:55 +02:00
close_in fh ;
2023-12-05 13:20:03 +01:00
args . inputs <- ( AlreadyInMemory ( pdf , s ) , " all " , " " , " " , ref false , None ) :: args . inputs
2021-10-01 13:16:55 +02:00
2013-08-20 16:32:57 +02:00
let set_input_dir s =
let names = sort compare ( leafnames_of_dir s ) in
2021-12-15 14:01:51 +01:00
let names =
if args . idir_only_pdfs then
option_map
( fun x ->
if String . length x > 4 && String . lowercase_ascii ( String . sub x ( String . length x - 4 ) 4 ) = " .pdf "
then Some x else None )
names
else
names
in
2013-08-20 16:32:57 +02:00
args . inputs <-
2014-11-05 14:41:47 +01:00
( rev
( map
2014-12-11 19:17:02 +01:00
( fun n -> ( InFile ( s ^ Filename . dir_sep ^ n ) , " all " , " " , " " , ref false , None ) ) names ) )
2014-11-05 14:41:47 +01:00
@ args . inputs
2013-08-20 16:32:57 +02:00
let setdebug () =
set Pdfread . read_debug ;
set Pdfwrite . write_debug ;
set Pdfcrypt . crypt_debug ;
2017-07-04 15:37:28 +02:00
set Pdfops . debug ;
2013-08-20 16:32:57 +02:00
args . debug <- true
let setboxes () =
args . boxes <- true
let set_no_encrypt_metadata () =
args . encrypt_metadata <- false
let set_retain_numbering () =
args . retain_numbering <- true
let set_remove_duplicate_fonts () =
args . remove_duplicate_fonts <- true
let setencoding enc () =
args . encoding <- enc
let setscaletofitscale f =
args . scale <- f
let setscalecontents f =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( ScaleContents f ) ;
2013-08-20 16:32:57 +02:00
args . op <- Some ( ScaleContents f ) ;
2021-08-12 21:38:55 +02:00
args . position <- Cpdfposition . Diagonal (* Will be center *)
2013-08-20 16:32:57 +02:00
2014-09-11 15:05:13 +02:00
let setsqueeze () =
2014-09-20 21:16:08 +02:00
args . squeeze <- true ;
args . create_objstm <- true
2014-09-11 15:05:13 +02:00
2020-09-16 17:16:15 +02:00
let setcreatoraswego s =
2015-01-22 20:16:56 +01:00
args . creator <- Some s
2020-09-16 17:16:15 +02:00
let setproduceraswego s =
2015-01-22 20:16:56 +01:00
args . producer <- Some s
2019-10-01 16:02:12 +02:00
let setprepend s =
args . op <- Some ( Prepend s )
let setpostpend s =
args . op <- Some ( Postpend s )
2013-08-20 16:32:57 +02:00
(* Parsing the control file *)
let rec getuntilendquote prev = function
| [] -> implode ( rev prev ) , []
| '"' :: t -> implode ( rev prev ) , t
| '\\' :: '"' :: t -> getuntilendquote ( '"' :: prev ) t
| h :: t -> getuntilendquote ( h :: prev ) t
let rec getarg prev = function
| [] -> implode ( rev prev ) , []
| h :: t ->
if Pdf . is_whitespace h
then implode ( rev prev ) , t
else getarg ( h :: prev ) t
let rec parse_chars args = function
| [] -> rev args
| h :: more when Pdf . is_whitespace h ->
parse_chars args more
| '"' :: more ->
let this , rest = getuntilendquote [] more in
parse_chars ( this :: args ) rest
| h :: t ->
let this , rest = getarg [] ( h :: t ) in
parse_chars ( this :: args ) rest
let parse_control_file name =
( parse_chars []
( charlist_of_bytes ( Pdfio . bytes_of_input_channel ( open_in_bin name ) ) ) )
2024-09-02 15:25:15 +02:00
let parse_control_file_json name =
try
match Cpdfyojson . Safe . from_file name with
| ` List ls -> map ( function ` String s -> s | _ -> raise Exit ) ls
| _ -> raise Exit
with
Exit -> error " Syntax error in JSON control file. "
2013-08-20 16:32:57 +02:00
let setencryptcollect () =
encrypt_to_collect := 3
let setcopyfont s =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( CopyFont s ) ;
2013-08-20 16:32:57 +02:00
args . op <- Some ( CopyFont s )
let setfontpage i =
args . copyfontpage <- i
let setcopyfontname s =
args . copyfontname <- Some s
let setpadevery i =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( PadEvery i ) ;
2014-11-18 11:51:14 +01:00
if i > 0 then
args . op <- Some ( PadEvery i )
else
error " PadEvery: must be > 0 "
2013-08-20 16:32:57 +02:00
2017-12-18 20:44:02 +01:00
let setpadwith filename =
args . padwith <- Some filename
2013-08-20 16:32:57 +02:00
let setpadmultiple i =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( PadMultiple i ) ;
2013-08-20 16:32:57 +02:00
args . op <- Some ( PadMultiple i )
2019-07-01 16:35:17 +02:00
let setpadmultiplebefore i =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( PadMultipleBefore i ) ;
2019-07-01 16:35:17 +02:00
args . op <- Some ( PadMultipleBefore i )
2013-08-20 16:32:57 +02:00
let setfast () =
args . fast <- true
(* Explicitly add a range. Parse it and replace the top input file with the range. *)
let setrange spec =
2014-10-03 15:18:28 +02:00
args . dashrange <- spec ;
match args . inputs with
2015-01-21 13:18:29 +01:00
( x , _ , c , d , e , f ) :: more ->
args . inputs <- ( x , spec , c , d , e , f ) :: more
2014-10-03 15:18:28 +02:00
| x -> ()
2013-08-20 16:32:57 +02:00
2014-12-11 19:19:57 +01:00
let setrevision n =
match args . inputs with
( a , b , c , d , e , _ ) :: more ->
args . inputs <- ( a , b , c , d , e , Some n ) :: more
2014-12-22 14:20:20 +01:00
| [] ->
2023-04-25 14:45:56 +02:00
Pdfe . log " Warning. -revision ignored. Put it after the filename. \n "
2014-12-11 19:19:57 +01:00
2013-08-20 16:32:57 +02:00
let setimageresolution f =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( ImageResolution f ) ;
2013-08-20 16:32:57 +02:00
args . op <- Some ( ImageResolution f )
2020-12-11 15:13:24 +01:00
let setimpath p =
args . path_to_im <- p
2023-12-22 20:33:10 +01:00
let setjbig2encpath p =
args . path_to_jbig2enc <- p
2020-12-11 15:13:24 +01:00
let setp2ppath p =
args . path_to_p2p <- p
2013-08-20 16:32:57 +02:00
let setfrombox s =
2019-07-08 13:44:27 +02:00
detect_duplicate_op CopyBox ;
2013-08-20 16:32:57 +02:00
args . op <- Some CopyBox ;
args . frombox <- Some s
let settobox s =
args . tobox <- Some s
let setmediaboxifmissing () =
args . mediabox_if_missing <- true
let settopage s =
args . topage <- Some s
let setstdinuser u =
match args . inputs with
2014-12-11 19:17:02 +01:00
| ( StdIn , x , _ , o , f , g ) :: t -> args . inputs <- ( StdIn , x , u , o , f , g ) :: t
2013-08-20 16:32:57 +02:00
| _ -> error " -stdin-user: must follow -stdin "
let setstdinowner o =
match args . inputs with
2014-12-11 19:17:02 +01:00
| ( StdIn , x , u , _ , f , g ) :: t -> args . inputs <- ( StdIn , x , u , o , f , g ) :: t
2014-11-17 19:48:17 +01:00
| _ -> error " -stdin-owner: must follow -stdin "
2013-08-20 16:32:57 +02:00
2013-10-02 16:29:53 +02:00
let setopenatpage n =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( OpenAtPage n ) ;
2013-10-02 16:29:53 +02:00
args . op <- Some ( OpenAtPage n )
let setopenatpagefit n =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( OpenAtPageFit n ) ;
2013-10-02 16:29:53 +02:00
args . op <- Some ( OpenAtPageFit n )
2023-04-18 15:42:17 +02:00
let setopenatpagecustom n =
detect_duplicate_op ( OpenAtPageCustom n ) ;
args . op <- Some ( OpenAtPageCustom n )
2013-10-24 16:21:54 +02:00
let setlabelstyle s =
let style =
match s with
| " DecimalArabic " -> Pdfpagelabels . DecimalArabic
| " UppercaseRoman " -> Pdfpagelabels . UppercaseRoman
| " LowercaseRoman " -> Pdfpagelabels . LowercaseRoman
| " UppercaseLetters " -> Pdfpagelabels . UppercaseLetters
| " LowercaseLetters " -> Pdfpagelabels . LowercaseLetters
2014-09-18 15:27:07 +02:00
| " NoLabelPrefixOnly " -> Pdfpagelabels . NoLabelPrefixOnly
2013-10-24 16:21:54 +02:00
| _ -> error " Unknown label style "
in
2014-09-18 15:27:07 +02:00
args . labelstyle <- style
2013-10-24 16:21:54 +02:00
let setlabelprefix s =
args . labelprefix <- Some s
2014-09-18 16:40:22 +02:00
let setlabelstartval i =
args . labelstartval <- i
2013-10-24 12:21:52 +02:00
2020-01-25 08:22:45 +01:00
let setlabelsprogress () =
args . labelsprogress <- true
2014-10-02 20:57:06 +02:00
let setcpdflin s =
args . cpdflin <- Some s
2014-10-15 18:51:15 +02:00
let setrecrypt () =
args . recrypt <- true
2014-10-15 14:48:39 +02:00
2015-01-07 19:29:11 +01:00
let setremovedictentry s =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( RemoveDictEntry s ) ;
2015-01-07 19:29:11 +01:00
args . op <- Some ( RemoveDictEntry s )
2015-01-07 21:29:39 +01:00
let logto = ref None
let setsqueezelogto s =
logto := Some s
2015-01-11 19:39:35 +01:00
let setstayonerror () =
set stay_on_error
2022-09-26 21:38:16 +02:00
let setembedstd14 s =
args . embedstd14 <- Some s
2015-04-13 15:17:48 +02:00
2023-10-05 17:45:11 +02:00
let _ =
Cpdfdrawcontrol . setembedstd14 := ( fun b dir -> if b then args . embedstd14 <- Some dir else args . embedstd14 <- None )
2017-05-19 20:10:49 +02:00
let sethardbox box =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( HardBox box ) ;
2017-05-19 20:10:49 +02:00
args . op <- Some ( HardBox box )
2019-06-26 18:43:59 +02:00
let setalsosetxml () =
args . alsosetxml <- true
let setjustsetxml () =
args . justsetxml <- true
2019-06-29 16:03:22 +02:00
let setsetmetadatadate d =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( SetMetadataDate d ) ;
2019-06-29 16:03:22 +02:00
args . op <- Some ( SetMetadataDate d )
2019-06-30 15:05:20 +02:00
let setgsmalformed () =
args . gs_malformed <- true
2019-07-03 15:40:32 +02:00
let setmergeaddbookmarks () =
args . merge_add_bookmarks <- true
let setmergeaddbookmarksusetitles () =
args . merge_add_bookmarks_use_titles <- true
2019-07-06 18:55:26 +02:00
let setbookmarksopentolevel l =
2019-07-08 13:44:27 +02:00
detect_duplicate_op ( BookmarksOpenToLevel l ) ;
2019-07-06 18:55:26 +02:00
args . op <- Some ( BookmarksOpenToLevel l )
2019-07-07 18:07:52 +02:00
let setcreatepdfpages i =
args . createpdf_pages <- i
let setcreatepdfpapersize s =
args . createpdf_pagesize <-
2019-09-26 12:44:54 +02:00
let w , h = Cpdfcoord . parse_coordinate ( Pdf . empty () ) s in
2019-07-07 18:07:52 +02:00
Pdfpaper . make Pdfunits . PdfPoint w h
2021-10-18 19:19:59 +02:00
let setimpose s =
setop ( Impose true ) () ;
args . coord <- s
let setimposexy s =
setop ( Impose false ) () ;
args . coord <- s
2023-11-15 18:34:14 +01:00
let setchop s =
let x , y = Cpdfcoord . parse_coordinate empty s in
setop ( Chop ( int_of_float x , int_of_float y ) ) ()
2024-02-05 15:01:16 +01:00
let setchopv x =
2024-08-31 20:06:48 +02:00
setop ( ChopHV ( false , Cpdfcoord . parse_single_number ( Pdf . empty () ) x ) ) ()
2024-02-05 15:01:16 +01:00
let setchoph y =
2024-08-31 20:06:48 +02:00
setop ( ChopHV ( true , Cpdfcoord . parse_single_number ( Pdf . empty () ) y ) ) ()
2024-02-05 15:01:16 +01:00
2021-10-28 18:06:46 +02:00
let setreplacedictentry s =
setop ( ReplaceDictEntry s ) ()
2021-10-29 16:09:21 +02:00
let setprintdictentry s =
setop ( PrintDictEntry s ) ()
2021-10-28 18:06:46 +02:00
let setreplacedictentryvalue s =
2021-10-29 16:09:21 +02:00
try
2023-02-17 16:30:39 +01:00
let pdfobj = Cpdfjson . object_of_json ( Cpdfyojson . Safe . from_string s ) in
2021-10-29 16:09:21 +02:00
args . replace_dict_entry_value <- pdfobj
with
e -> error ( Printf . sprintf " Failed to parse replacement value: %s \n " ( Printexc . to_string e ) )
2021-10-28 18:06:46 +02:00
let setdictentrysearch s =
2021-10-29 16:09:21 +02:00
try
2023-02-17 16:30:39 +01:00
let pdfobj = Cpdfjson . object_of_json ( Cpdfyojson . Safe . from_string s ) in
2021-10-29 16:09:21 +02:00
args . dict_entry_search <- Some pdfobj
with
e -> error ( Printf . sprintf " Failed to parse search term: %s \n " ( Printexc . to_string e ) )
2021-10-28 18:06:46 +02:00
2021-11-01 16:40:33 +01:00
let setprintfontencoding s =
setop ( PrintFontEncoding s ) ()
2021-11-19 01:32:35 +01:00
let settypeset s =
setop ( Typeset s ) ()
2024-09-27 14:20:11 +02:00
let setsubformat s =
2024-09-14 15:40:53 +02:00
args . subformat <- Some ( Cpdfua . subformat_of_string s )
2024-09-14 15:34:27 +02:00
2021-12-02 00:50:04 +01:00
let settableofcontentstitle s =
args . toc_title <- s
2021-12-10 13:58:30 +01:00
let settocnobookmark () =
args . toc_bookmark <- false
2021-12-15 14:01:51 +01:00
let setidironlypdfs () =
args . idir_only_pdfs <- true
2022-01-02 16:18:55 +01:00
let setnowarnrotate () =
args . no_warn_rotate <- true
2019-08-18 11:55:40 +02:00
let whingemalformed () =
2023-04-25 14:45:56 +02:00
Pdfe . log " Command line must be of exactly the form \n cpdf <infile> -gs <path> -gs-malformed-force -o <outfile> \n " ;
2019-08-18 11:55:40 +02:00
exit 1
2022-12-16 17:49:59 +01:00
let addop o =
2023-09-26 18:51:56 +02:00
begin match o with Cpdfdraw . FontPack _ -> set Cpdfdrawcontrol . fontpack_initialised | _ -> () end ;
2023-09-01 20:09:23 +02:00
begin match args . op with Some Draw -> () | _ -> error " Need to be in drawing mode for this. " end ;
Cpdfdrawcontrol . addop o
2022-12-23 15:29:47 +01:00
2023-07-17 14:38:35 +02:00
let embed_font_inner font =
match font with
2023-07-07 15:34:51 +02:00
| StandardFont f ->
2023-07-17 15:13:24 +02:00
(* Printf.printf "embed_font: StandardFont\n"; *)
2023-07-07 15:34:51 +02:00
begin match args . embedstd14 with
| Some dirname ->
2024-04-01 20:03:31 +02:00
begin try
let fontfile , fontname = Cpdfembed . load_substitute dirname f in
Cpdfembed . EmbedInfo { fontfile ; fontname ; encoding = args . fontencoding }
with
e -> error ( Printf . sprintf " Can't load font for embedding: %s \n " ( Printexc . to_string e ) )
end
2023-07-07 15:34:51 +02:00
| None ->
PreMadeFontPack ( Cpdfembed . fontpack_of_standardfont ( Pdftext . StandardFont ( f , args . fontencoding ) ) )
end
| OtherFont f ->
ExistingNamedFont
2023-07-14 13:37:57 +02:00
| EmbeddedFont name ->
2023-07-17 15:13:24 +02:00
(* Printf.printf "embed_font: TTF\n"; *)
2023-07-14 13:37:57 +02:00
try
2023-09-26 18:51:56 +02:00
let fontname , font = Hashtbl . find Cpdfdrawcontrol . ttfs name in
2023-07-14 13:37:57 +02:00
args . fontname <- fontname ;
font
with
Not_found -> error ( Printf . sprintf " Font %s not found " name )
2023-07-07 15:34:51 +02:00
2023-07-17 14:38:35 +02:00
let embed_font () = embed_font_inner args . font
2023-09-26 18:51:56 +02:00
let _ = Cpdfdrawcontrol . embed_font := embed_font
2023-10-04 16:07:29 +02:00
let _ = Cpdfdrawcontrol . setdrawing := ( fun () -> args . op <- Some Draw )
2023-07-14 15:40:59 +02:00
let setfont f =
2023-10-04 16:07:29 +02:00
(* Printf.printf "Cpdfcommand.setfont: |%s|\n%!" f; *)
2023-07-17 14:38:35 +02:00
try
2023-09-26 18:51:56 +02:00
let fontname , _ = Hashtbl . find Cpdfdrawcontrol . ttfs f in
2023-07-17 14:38:35 +02:00
args . font <- EmbeddedFont f ;
args . fontname <- fontname
with
Not_found ->
let convert f = (* convert from written PDF representation to internal PDF string e.g # sequences *)
match Pdfread . lex_name ( Pdfio . input_of_string f ) with Pdfgenlex . LexName s -> s | _ -> assert false
in
args . font <-
begin match Pdftext . standard_font_of_name ( " / " ^ f ) with
| Some x -> StandardFont x
| None ->
2023-10-04 16:27:59 +02:00
if f < > " " && hd ( explode f ) < > '/' then error " Font not found " ;
2023-07-17 14:38:35 +02:00
OtherFont ( convert f )
end ;
args . fontname <-
begin match Pdftext . standard_font_of_name ( " / " ^ f ) with
| Some x -> f
| None -> convert f
end ;
(* If drawing, add the font pack as an op. *)
begin match args . op with Some Draw -> addop ( Cpdfdraw . FontPack ( f , embed_font () , null_hash () ) ) | _ -> () end
2023-07-14 15:40:59 +02:00
2023-07-14 14:57:00 +02:00
let loadttf n =
2023-07-17 17:39:41 +02:00
(* Printf.printf "loadttf: %s\n" n; *)
2023-07-14 14:57:00 +02:00
let name , filename =
match String . split_on_char '=' n with
| [ name ; filename ] -> name , filename
2023-07-14 17:36:13 +02:00
| _ -> error " loadttf: bad file specification. Should be <name>=<filename> "
2023-07-14 14:57:00 +02:00
in
try
let fontfile = Pdfio . bytes_of_string ( contents_of_file filename ) in
let fontname = Filename . remove_extension ( Filename . basename filename ) in
Hashtbl . replace
2023-09-26 18:51:56 +02:00
Cpdfdrawcontrol . ttfs
2023-07-14 14:57:00 +02:00
name
( fontname , Cpdfembed . EmbedInfo { fontfile ; fontname ; encoding = args . fontencoding } ) ;
2023-07-14 15:40:59 +02:00
(* If drawing, add the font pack as an op. *)
2023-07-17 14:38:35 +02:00
begin match args . op with
Some Draw -> addop ( Cpdfdraw . FontPack ( fontname , embed_font_inner ( EmbeddedFont name ) , null_hash () ) ) | _ -> () end
2023-07-14 14:57:00 +02:00
with
2023-08-14 17:15:11 +02:00
_ -> error " addtff: could not load TTF "
2023-07-17 14:53:09 +02:00
2023-10-04 16:59:52 +02:00
let () = Cpdfdrawcontrol . loadttf := loadttf
2023-04-25 14:45:56 +02:00
let setstderrtostdout () =
Pdfe . logger := ( fun s -> print_string s ; flush stdout )
2023-05-02 16:04:35 +02:00
let settextwidth s =
args . op <- Some ( TextWidth s )
2023-07-14 17:27:53 +02:00
let setdraw () =
2023-07-17 14:53:09 +02:00
args . op <- Some Draw
2023-07-14 17:27:53 +02:00
2024-09-06 15:04:17 +02:00
let setdrawstructtree () =
args . draw_struct_tree <- true
2023-11-02 19:49:15 +01:00
let setextractfontfile s =
args . op <- Some ( ExtractFontFile s )
2023-09-26 18:51:56 +02:00
let () = Cpdfdrawcontrol . getfontname := fun () -> args . fontname
let () = Cpdfdrawcontrol . getfontsize := fun () -> args . fontsize
2023-09-27 18:02:22 +02:00
let () = Cpdfdrawcontrol . setfontname := setfont
let () = Cpdfdrawcontrol . setfontsize := fun s -> args . fontsize <- s
2024-09-20 15:15:10 +02:00
let () = Cpdfdrawcontrol . getindent := fun () -> args . indent
2023-09-26 18:51:56 +02:00
2023-11-09 12:25:19 +01:00
let setlistimagesjson () =
setop ListImages () ;
args . format_json <- true
2023-12-04 17:32:12 +01:00
let set_jbig2_global f =
jbig2_global := Some ( Pdfio . bytes_of_string ( contents_of_file f ) )
let clear_jbig2_global () =
jbig2_global := None
2023-12-06 13:20:27 +01:00
let setjpegquality q =
args . jpegquality <- q
let setjpegqualitylossless q =
args . jpegqualitylossless <- q
2023-12-22 17:45:53 +01:00
let set1bppmethod m =
args . onebppmethod <- m
2023-12-24 14:54:21 +01:00
let setpixelthreshold i =
args . pixel_threshold <- i
2023-12-27 20:53:02 +01:00
let setlengththreshold i =
args . length_threshold <- i
let setpercentagethreshold i =
args . percentage_threshold <- i
2024-01-04 12:43:27 +01:00
let setdpithreshold i =
args . dpi_threshold <- i
2023-12-31 12:59:48 +01:00
let setlosslessresample i =
args . resample_factor <- i
2024-02-01 14:29:20 +01:00
let setlosslessresampledpi i =
2024-02-01 17:38:07 +01:00
args . resample_factor <- -. i
2024-02-01 14:29:20 +01:00
2024-01-01 20:09:40 +01:00
let setresampleinterpolate () =
args . resample_interpolate <- true
2023-12-31 12:59:48 +01:00
2024-01-12 16:00:28 +01:00
let setjbig2_lossy_threshold f =
args . jbig2_lossy_threshold <- f
2023-12-28 17:18:25 +01:00
let setprocessimagesinfo () =
set Cpdfimage . debug_image_processing
2024-06-24 16:29:32 +02:00
let setextractstream s =
args . op <- Some ( ExtractStream s )
2024-01-17 19:37:58 +01:00
2024-06-24 16:29:32 +02:00
let setextractstreamdecomp s =
args . op <- Some ( ExtractStream s ) ;
2024-01-17 19:37:58 +01:00
args . extract_stream_decompress <- true
2024-06-24 16:29:32 +02:00
let setprintobj s =
args . op <- Some ( PrintObj s )
2024-01-17 19:37:58 +01:00
2024-09-02 17:30:02 +02:00
let setreplaceobj s =
match String . split_on_char '=' s with
| [ a ; b ] -> args . op <- Some ( ReplaceObj ( a , b ) )
| _ -> error " replace_obj: bad specification "
2024-09-18 16:31:24 +02:00
let expand_namespace = function
| " PDF " -> standard_namespace
| " PDF2 " -> pdf2_namespace
| x -> x
2024-09-20 18:01:41 +02:00
let setreadableops () =
Pdfops . whitespace := " \n " ;
2024-09-20 18:22:12 +02:00
Pdfops . always_add_whitespace := true ;
Pdfops . write_comments := true
2024-09-20 18:01:41 +02:00
2024-09-26 16:22:22 +02:00
let addeltinfo s =
match String . split_on_char '=' s with
| h :: t ->
let pdfobj = Pdfread . parse_single_object ( String . concat " " t ) in
Cpdfdrawcontrol . eltinfo h pdfobj
| [] -> error " addeltinfo: bad format "
2024-09-02 15:00:11 +02:00
let specs =
2013-08-20 16:32:57 +02:00
[ ( " -version " ,
Arg . Unit ( setop Version ) ,
" Print the cpdf version number " ) ;
( " -o " ,
Arg . String setout ,
" Set the output file, if appropriate " ) ;
( " -i " ,
Arg . String set_input ,
" Add an input file " ) ;
2023-01-11 05:12:51 +01:00
( " -png " ,
Arg . String set_input_png ,
" Load from a PNG file, converting to PDF " ) ;
( " -jpeg " ,
Arg . String set_input_jpeg ,
" Load from a JPEG file, converting to PDF " ) ;
2024-03-22 14:57:04 +01:00
( " -jpeg2000 " ,
Arg . String set_input_jpeg2000 ,
" Load from a JPEG2000 file, converting to PDF " ) ;
2023-12-04 14:39:56 +01:00
( " -jbig2 " ,
Arg . String set_input_jbig2 ,
" Load from a JBIG2 fragment, converting to PDF " ) ;
2023-12-04 17:32:12 +01:00
( " -jbig2-global " ,
Arg . String set_jbig2_global ,
" Load a JBIG2 global stream " ) ;
( " -jbig2-global-clear " ,
Arg . Unit clear_jbig2_global ,
" Forget any JBIG2 global stream " ) ;
2013-08-20 16:32:57 +02:00
( " -idir " ,
Arg . String set_input_dir ,
" Add a directory of files " ) ;
2021-12-15 14:01:51 +01:00
( " -idir-only-pdfs " ,
Arg . Unit setidironlypdfs ,
" Have -idir ignore files not ending in .pdf " ) ;
2021-05-20 17:53:35 +02:00
( " -pw " ,
Arg . String setdashpassword ,
" Supply a password explicitly -pw=<password> " ) ;
2013-08-20 16:32:57 +02:00
( " -stdin " ,
Arg . Unit setstdin ,
" Read input from standard input " ) ;
( " -stdin-owner " ,
Arg . String setstdinowner ,
" Owner password for -stdin " ) ;
( " -stdin-user " ,
Arg . String setstdinuser ,
" User password for -stdin " ) ;
( " -stdout " ,
Arg . Unit setstdout ,
" Send result to standard output " ) ;
2018-04-05 13:56:32 +02:00
( " -error-on-malformed " ,
Arg . Set Pdfread . error_on_malformed ,
" Do not try to read malformed files " ) ;
2013-08-20 16:32:57 +02:00
( " -range " ,
Arg . String setrange ,
" Explicitly add a range " ) ;
2021-10-16 16:47:41 +02:00
( " -collate " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . collate <- true ) ,
2021-10-16 16:47:41 +02:00
" Collate ranges when merging " ) ;
2014-12-11 19:19:57 +01:00
( " -revision " ,
Arg . Int setrevision ,
2016-11-04 17:46:08 +01:00
" " ) ;
2013-08-20 16:32:57 +02:00
( " -change-id " ,
Arg . Unit ( setop ChangeId ) ,
" Change the file's /ID tag " ) ;
( " -no-preserve-objstm " ,
2024-01-23 19:22:09 +01:00
Arg . Unit ( fun () -> args . preserve_objstm <- false ) ,
2013-08-20 16:32:57 +02:00
" Don't preserve object streams " ) ;
( " -create-objstm " ,
2024-01-23 19:22:09 +01:00
Arg . Unit ( fun () -> args . create_objstm <- true ) ,
2013-08-20 16:32:57 +02:00
" Create object streams anew " ) ;
( " -keep-version " ,
2024-01-23 19:37:35 +01:00
Arg . Unit ( fun () -> args . keepversion <- true ) ,
2013-08-20 16:32:57 +02:00
" Don't change the version number " ) ;
( " -l " ,
2024-01-23 15:32:31 +01:00
Arg . Unit ( fun () -> args . linearize <- true ) ,
2014-12-01 19:12:02 +01:00
" Linearize output file " ) ;
( " -keep-l " ,
2024-01-23 15:32:31 +01:00
Arg . Unit ( fun () -> args . keeplinearize <- true ) ,
2014-12-01 19:12:02 +01:00
" Linearize if the input file was linearized " ) ;
2014-10-02 20:57:06 +02:00
( " -cpdflin " ,
Arg . String setcpdflin ,
" Set location of 'cpdflin' " ) ;
2014-10-15 14:48:39 +02:00
( " -recrypt " ,
2014-10-15 18:51:15 +02:00
Arg . Unit setrecrypt ,
2014-10-15 14:48:39 +02:00
" Keep this file's encryption when writing " ) ;
2013-08-20 16:32:57 +02:00
( " -raw " ,
2021-12-19 13:55:06 +01:00
Arg . Unit ( setencoding Cpdfmetadata . Raw ) ,
2013-08-20 16:32:57 +02:00
" Do not process text " ) ;
( " -stripped " ,
2021-12-19 13:55:06 +01:00
Arg . Unit ( setencoding Cpdfmetadata . Stripped ) ,
2013-08-20 16:32:57 +02:00
" Process text by simple stripping to ASCII " ) ;
( " -utf8 " ,
2021-12-19 13:55:06 +01:00
Arg . Unit ( setencoding Cpdfmetadata . UTF8 ) ,
2013-08-20 16:32:57 +02:00
" Process text by conversion to UTF8 Unicode " ) ;
( " -fast " ,
Arg . Unit setfast ,
" Speed over correctness with malformed documents " ) ;
2019-10-21 13:06:04 +02:00
( " -args " ,
2021-10-12 19:40:47 +02:00
Arg . Unit ( fun () -> () ) ,
2019-10-21 13:06:04 +02:00
" Get arguments from a file. " ) ;
2024-09-02 15:25:15 +02:00
( " -args-json " ,
Arg . Unit ( fun () -> () ) ,
" Get arguments from a JSON file. " ) ;
2013-08-20 16:32:57 +02:00
( " -merge " ,
Arg . Unit ( setop Merge ) ,
" Merge a number of files into one " ) ;
( " -retain-numbering " ,
Arg . Unit set_retain_numbering ,
" Don't renumber pages when merging " ) ;
2019-07-03 15:40:32 +02:00
( " -merge-add-bookmarks " ,
Arg . Unit setmergeaddbookmarks ,
" Add bookmarks for each file to merged file " ) ;
( " -merge-add-bookmarks-use-titles " ,
Arg . Unit setmergeaddbookmarksusetitles ,
" Use title of document rather than filename " ) ;
2024-07-03 13:53:14 +02:00
( " -process-struct-trees " ,
Arg . Unit ( fun () -> args . process_struct_trees <- true ) ,
" Process structure trees " ) ;
2013-08-20 16:32:57 +02:00
( " -remove-duplicate-fonts " ,
Arg . Unit set_remove_duplicate_fonts ,
" Remove duplicate fonts when merging " ) ;
( " -split " ,
Arg . Unit ( setop Split ) ,
" Split a file into individual pages " ) ;
( " -chunk " ,
Arg . Int setchunk ,
" Set chunk size for -split (default 1) " ) ;
( " -split-bookmarks " ,
2024-01-23 15:32:31 +01:00
Arg . Int ( fun i -> setop ( SplitOnBookmarks i ) () ) ,
2013-08-20 16:32:57 +02:00
" Split a file at bookmarks at a given level " ) ;
2023-10-25 19:44:29 +02:00
( " -split-max " ,
Arg . String setsplitmax ,
" Split a file to files of a given size " ) ;
2023-10-30 19:30:49 +01:00
( " -spray " ,
Arg . Unit ( setop Spray ) ,
" Split a file by alternating pages " ) ;
2013-08-20 16:32:57 +02:00
( " -scale-page " ,
Arg . String setscale ,
" -scale-page \" sx sy \" scales by (sx, sy) " ) ;
( " -scale-to-fit " ,
Arg . String setscaletofit ,
" -scale-to-fit \" x y \" scales to page size (x, y) " ) ;
2024-09-21 18:35:29 +02:00
( " -stretch " ,
Arg . String setstretch ,
" -stretch \" x y \" scales without preserving aspect ratio " ) ;
2013-08-20 16:32:57 +02:00
( " -scale-contents " ,
Arg . Float setscalecontents ,
2022-09-01 17:46:24 +02:00
" Scale contents by the given factor " ) ;
2013-08-20 16:32:57 +02:00
( " -scale-to-fit-scale " ,
Arg . Float setscaletofitscale ,
" -scale-to-fit-scale (1.0 = 100%) " ) ;
( " -shift " ,
Arg . String setshift ,
" -shift \" dx dy \" shifts the chosen pages " ) ;
2024-01-22 17:36:37 +01:00
( " -shift-boxes " ,
Arg . String setshiftboxes ,
2024-01-25 18:51:48 +01:00
" -shift-boxes \" dx dy \" shifts boxes on the chosen pages " ) ;
2013-08-20 16:32:57 +02:00
( " -rotate " ,
Arg . Int setrotate ,
" Set rotation of pages to 0, 90, 180, 270 " ) ;
( " -rotateby " ,
Arg . Int setrotateby ,
" Rotate pages by 90, 180 or 270 degrees " ) ;
( " -rotate-contents " ,
2024-01-23 15:32:31 +01:00
Arg . Float ( fun f -> setop ( RotateContents f ) () ) ,
2013-08-20 16:32:57 +02:00
" Rotate contents of pages " ) ;
( " -upright " ,
Arg . Unit ( setop Upright ) ,
" Make pages upright " ) ;
2022-01-02 16:18:55 +01:00
( " -prerotate " ,
2024-01-23 19:22:09 +01:00
Arg . Unit ( fun () -> args . prerotate <- true ) ,
2022-01-02 16:18:55 +01:00
" Calls -upright on pages before modifying them, if required " ) ;
( " -no-warn-rotate " ,
Arg . Unit setnowarnrotate ,
" Do not warn on pages of PDFs which are not upright " ) ;
2013-08-20 16:32:57 +02:00
( " -hflip " ,
Arg . Unit ( setop HFlip ) ,
" Flip pages horizontally " ) ;
( " -vflip " ,
Arg . Unit ( setop VFlip ) ,
" Flip pages vertically " ) ;
( " -crop " ,
Arg . String setcrop ,
2023-08-03 14:21:24 +02:00
" Crop specified pages (synonym for -cropbox) " ) ;
2019-06-26 15:43:24 +02:00
( " -cropbox " ,
Arg . String setcrop ,
2023-08-03 14:21:24 +02:00
" Crop specified pages " ) ;
2019-06-26 15:43:24 +02:00
( " -artbox " ,
Arg . String setart ,
" Set art box for specified pages " ) ;
( " -bleedbox " ,
Arg . String setbleed ,
" Set bleed box for specified pages " ) ;
( " -trimbox " ,
Arg . String settrim ,
" Set trim box for specified pages " ) ;
2017-05-19 20:10:49 +02:00
( " -hard-box " ,
Arg . String sethardbox ,
" Hard crop specified pages to the given box " ) ;
2019-07-15 12:52:14 +02:00
( " -show-boxes " ,
Arg . Unit ( setop ShowBoxes ) ,
" Show boxes by adding rectangles to pages " ) ;
2019-07-15 14:42:32 +02:00
( " -trim-marks " ,
Arg . Unit ( setop TrimMarks ) ,
" Add trim marks " ) ;
2013-08-20 16:32:57 +02:00
( " -remove-crop " ,
Arg . Unit ( setop RemoveCrop ) ,
" Remove cropping on specified pages " ) ;
2019-06-26 15:43:24 +02:00
( " -remove-cropbox " ,
Arg . Unit ( setop RemoveCrop ) ,
" Synonym for -remove-crop " ) ;
( " -remove-trimbox " ,
Arg . Unit ( setop RemoveTrim ) ,
" Remove trim box on specified pages " ) ;
( " -remove-bleedbox " ,
Arg . Unit ( setop RemoveBleed ) ,
" Remove bleed box on specified pages " ) ;
( " -remove-artbox " ,
Arg . Unit ( setop RemoveArt ) ,
" Remove art box on specified pages " ) ;
2013-08-20 16:32:57 +02:00
( " -frombox " , Arg . String setfrombox , " Set box to copy from " ) ;
( " -tobox " , Arg . String settobox , " Set box to copy to " ) ;
2015-01-21 13:20:49 +01:00
( " -mediabox-if-missing " ,
Arg . Unit setmediaboxifmissing ,
" If copy from box missing, substitute media box " ) ;
2013-08-20 16:32:57 +02:00
( " -mediabox " ,
Arg . String setmediabox ,
" Set media box on specified pages " ) ;
( " -encrypt " ,
Arg . Unit setencryptcollect ,
" Encrypt a document " ) ;
( " -decrypt " ,
Arg . Unit ( setop Decrypt ) ,
" Decrypt a file " ) ;
2021-10-12 19:40:47 +02:00
( " -decrypt-force " ,
2024-01-23 19:46:09 +01:00
Arg . Unit ( fun () -> args . debugforce <- true ) ,
2021-10-12 19:40:47 +02:00
" Decrypt a file even without password " ) ;
2024-01-23 19:37:35 +01:00
( " -no-edit " , Arg . Unit ( fun () -> args . no_edit <- true ) , " No edits " ) ;
( " -no-print " , Arg . Unit ( fun () -> args . no_print <- true ) , " No printing " ) ;
( " -no-copy " , Arg . Unit ( fun () -> args . no_copy <- true ) , " No copying " ) ;
( " -no-annot " , Arg . Unit ( fun () -> args . no_annot <- true ) , " No annotations " ) ;
( " -no-forms " , Arg . Unit ( fun () -> args . no_forms <- true ) , " No forms " ) ;
( " -no-extract " , Arg . Unit ( fun () -> args . no_extract <- true ) , " No extracting " ) ;
( " -no-assemble " , Arg . Unit ( fun () -> args . no_assemble <- true ) , " No assembling " ) ;
( " -no-hq-print " , Arg . Unit ( fun () -> args . no_hq_print <- true ) , " No high quality printing " ) ;
2013-08-20 16:32:57 +02:00
( " -no-encrypt-metadata " ,
Arg . Unit set_no_encrypt_metadata ,
" Don't encrypt metadata (AES only) " ) ;
( " -decompress " ,
Arg . Unit ( setop Decompress ) ,
" Decompress " ) ;
( " -compress " ,
Arg . Unit ( setop Compress ) ,
" Compress streams, leaving metadata alone " ) ;
( " -remove-duplicate-streams " ,
2024-01-23 19:22:09 +01:00
Arg . Unit ( fun () -> args . remove_duplicate_streams <- true ) ,
2013-08-20 16:32:57 +02:00
" " ) ;
( " -list-bookmarks " ,
Arg . Unit ( setop ListBookmarks ) ,
" List Bookmarks " ) ;
2021-10-26 18:32:36 +02:00
( " -list-bookmarks-json " ,
Arg . Unit setlistbookmarksjson ,
" List Bookmarks in JSON format " ) ;
2013-08-20 16:32:57 +02:00
( " -remove-bookmarks " ,
Arg . Unit ( setop RemoveBookmarks ) ,
" Remove bookmarks from a file " ) ;
( " -add-bookmarks " ,
Arg . String setaddbookmarks ,
" Add bookmarks from the given file " ) ;
2021-10-26 18:32:36 +02:00
( " -add-bookmarks-json " ,
Arg . String setaddbookmarksjson ,
" Add bookmarks from the given file in JSON format " ) ;
2019-07-06 18:55:26 +02:00
( " -bookmarks-open-to-level " ,
Arg . Int setbookmarksopentolevel ,
" Open bookmarks to this level (0 = all closed) " ) ;
2013-08-20 16:32:57 +02:00
( " -presentation " ,
Arg . Unit ( setop Presentation ) ,
" Make a presentation " ) ;
( " -trans " ,
Arg . String settrans ,
" Set the transition method for -presentation " ) ;
( " -duration " ,
Arg . Float setduration ,
" Set the display duration for -presentation " ) ;
( " -vertical " ,
Arg . Unit setvertical ,
" Set dimension for Split and Blinds styles " ) ;
( " -outward " ,
Arg . Unit setoutward ,
" Set direction for Split and Box styles " ) ;
( " -direction " ,
Arg . Int setdirection ,
" Set direction for Wipe and Glitter styles " ) ;
( " -effect-duration " ,
Arg . Float seteffectduration ,
" Set the effect duration in seconds " ) ;
( " -stamp-on " ,
Arg . String setstampon ,
" Stamp a file on some pages of another " ) ;
( " -stamp-under " ,
Arg . String setstampunder ,
" Stamp a file under some pages of another " ) ;
2014-10-08 15:52:55 +02:00
( " -scale-stamp-to-fit " ,
2024-01-23 19:22:09 +01:00
Arg . Unit ( fun () -> args . scale_stamp_to_fit <- true ) ,
2014-10-08 15:52:55 +02:00
" Scale the stamp to fit the page " ) ;
2013-08-20 16:32:57 +02:00
( " -combine-pages " ,
Arg . String setcombinepages ,
" Combine two files by merging individual pages " ) ;
( " -add-text " ,
Arg . String setaddtext ,
" Superimpose text on the given range of pages " ) ;
( " -remove-text " ,
Arg . Unit ( setop RemoveText ) ,
" Remove text previously added by cpdf " ) ;
( " -add-rectangle " ,
Arg . String setrectangle ,
2016-11-08 19:15:04 +01:00
" Add a rectangle to the page " ) ;
2013-08-20 16:32:57 +02:00
( " -bates " ,
2024-01-23 19:37:35 +01:00
Arg . Int ( fun n -> args . bates <- n ) ,
2013-08-20 16:32:57 +02:00
" Set the base bates number " ) ;
2015-07-26 13:31:43 +02:00
( " -bates-at-range " ,
Arg . Int setbatesrange ,
" Set the base bates number at first page in range " ) ;
2015-07-17 17:34:47 +02:00
( " -bates-pad-to " ,
2024-01-23 19:37:35 +01:00
Arg . Int ( fun n -> args . batespad <- Some n ) ,
2015-07-17 17:34:47 +02:00
" Pad the bates number with leading zeroes to width " ) ;
2013-08-20 16:32:57 +02:00
( " -font " ,
Arg . String setfont ,
" Set the font " ) ;
2022-09-21 17:10:48 +02:00
( " -font-size " ,
2024-10-03 18:59:10 +02:00
Arg . String setfontsize ,
2022-09-21 17:10:48 +02:00
" Set the font size " ) ;
2023-07-14 13:37:57 +02:00
( " -load-ttf " ,
Arg . String loadttf ,
" Use a TrueType font " ) ;
2022-09-26 21:38:16 +02:00
( " -embed-std14 " ,
Arg . String setembedstd14 ,
" Embed standard 14 fonts " ) ;
2013-08-20 16:32:57 +02:00
( " -color " ,
Arg . String setcolor ,
" Set the color " ) ;
( " -opacity " ,
Arg . Float setopacity ,
" Set the text opacity " ) ;
( " -outline " ,
2024-01-23 19:22:09 +01:00
Arg . Unit ( fun () -> args . outline <- true ) ,
2013-08-20 16:32:57 +02:00
" Use outline mode for text " ) ;
( " -linewidth " ,
2024-10-03 18:59:10 +02:00
Arg . String setlinewidth ,
2013-08-20 16:32:57 +02:00
" Set line width for outline text " ) ;
( " -pos-center " ,
Arg . String setposcenter ,
" Set position relative to center of baseline " ) ;
( " -pos-left " ,
Arg . String setposleft ,
" Set position relative to left of baseline " ) ;
( " -pos-right " ,
Arg . String setposright ,
" Set position relative to right of baseline " ) ;
( " -top " ,
Arg . String settop ,
" Set position relative to center top of page " ) ;
( " -topleft " ,
Arg . String settopleft ,
" Set position relative to top left of page " ) ;
( " -topright " ,
Arg . String settopright ,
" Set position relative to top right of page " ) ;
( " -left " ,
Arg . String setleft ,
" Set position relative to center left of page " ) ;
( " -bottomleft " ,
Arg . String setbottomleft ,
" Set position relative to bottom left of page " ) ;
( " -bottom " ,
Arg . String setbottom ,
" Set position relative to center bottom of page " ) ;
( " -bottomright " ,
Arg . String setbottomright ,
" Set position relative to bottom right of page " ) ;
( " -right " ,
Arg . String setright ,
" Set position relative to center right of page " ) ;
( " -diagonal " ,
Arg . Unit setdiagonal ,
" Place text diagonally across page " ) ;
( " -reverse-diagonal " ,
Arg . Unit setreversediagonal ,
" Place text diagonally across page from top left " ) ;
2014-10-03 16:55:03 +02:00
( " -center " ,
Arg . Unit setcenter ,
" Place text in the center of the page " ) ;
2013-08-20 16:32:57 +02:00
( " -justify-left " ,
2023-05-11 22:54:23 +02:00
Arg . Unit ( fun () -> args . justification <- Cpdfaddtext . LeftJustify ) ,
2013-08-20 16:32:57 +02:00
" Justify multiline text left " ) ;
( " -justify-right " ,
2023-05-11 22:54:23 +02:00
Arg . Unit ( fun () -> args . justification <- Cpdfaddtext . RightJustify ) ,
2013-08-20 16:32:57 +02:00
" Justify multiline text right " ) ;
( " -justify-center " ,
2023-05-11 22:54:23 +02:00
Arg . Unit ( fun () -> args . justification <- Cpdfaddtext . CentreJustify ) ,
2019-10-21 12:54:34 +02:00
" Justify multiline text center " ) ;
2013-08-20 16:32:57 +02:00
( " -underneath " ,
2023-05-11 22:54:23 +02:00
Arg . Unit ( fun () -> args . underneath <- true ) ,
2013-08-20 16:32:57 +02:00
" Text stamp is underneath content " ) ;
( " -line-spacing " ,
2023-05-11 22:54:23 +02:00
Arg . Float ( fun f -> args . linespacing <- f ) ,
2013-08-20 16:32:57 +02:00
" Line spacing (1 is normal) " ) ;
( " -midline " ,
2023-05-11 22:54:23 +02:00
Arg . Unit ( fun () -> args . midline <- true ) ,
2013-08-20 16:32:57 +02:00
" Adjust text to midline rather than baseline " ) ;
2015-01-20 16:50:36 +01:00
( " -topline " ,
2023-05-11 22:54:23 +02:00
Arg . Unit ( fun () -> args . topline <- true ) ,
2015-01-20 16:50:36 +01:00
" Adjust text to topline rather than baseline " ) ;
2013-08-20 16:32:57 +02:00
( " -relative-to-cropbox " ,
2023-05-11 22:54:23 +02:00
Arg . Unit ( fun () -> args . relative_to_cropbox <- true ) ,
2013-08-20 16:32:57 +02:00
" Add text relative to Crop Box not Media Box " ) ;
2019-07-02 19:20:05 +02:00
( " -embed-missing-fonts " ,
Arg . Unit ( setop EmbedMissingFonts ) ,
" Embed missing fonts by calling gs " ) ;
2013-08-20 16:32:57 +02:00
( " -twoup " ,
Arg . Unit ( setop TwoUp ) ,
" Put 2 pages onto one " ) ;
( " -twoup-stack " ,
Arg . Unit ( setop TwoUpStack ) ,
" Stack 2 pages onto one twice the size " ) ;
2021-10-18 19:19:59 +02:00
( " -impose " ,
Arg . String setimpose ,
" Impose onto given page size " ) ;
( " -impose-xy " ,
Arg . String setimposexy ,
" Impose x by y (zero means unlimited) " ) ;
( " -impose-columns " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . impose_columns <- true ) ,
2021-10-18 19:19:59 +02:00
" Impose in columns rather than rows " ) ;
( " -impose-rtl " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . impose_rtl <- true ) ,
2023-11-29 11:33:07 +01:00
" Impose right-to-left " ) ;
2021-10-18 19:19:59 +02:00
( " -impose-btt " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . impose_btt <- true ) ,
2023-11-29 11:33:07 +01:00
" Impose bottom-to-top " ) ;
2021-10-18 19:19:59 +02:00
( " -impose-margin " ,
2024-10-03 18:59:10 +02:00
Arg . String setimposemargin ,
2021-10-18 19:19:59 +02:00
" Add margin around whole imposed page " ) ;
( " -impose-spacing " ,
2024-10-03 18:59:10 +02:00
Arg . String setimposespacing ,
2021-10-18 19:19:59 +02:00
" Add spacing around each imposed page " ) ;
( " -impose-linewidth " ,
2024-10-03 18:59:10 +02:00
Arg . String setimposelinewidth ,
2021-10-18 19:19:59 +02:00
" Imposition divider line width (0=none) " ) ;
2023-11-15 18:34:14 +01:00
( " -chop " ,
Arg . String setchop ,
" Chop x by y " ) ;
2024-02-05 15:01:16 +01:00
( " -chop-h " ,
2024-08-31 20:06:48 +02:00
Arg . String setchoph ,
2024-02-05 15:01:16 +01:00
" Chop horizontally " ) ;
( " -chop-v " ,
2024-08-31 20:06:48 +02:00
Arg . String setchopv ,
2024-02-05 15:01:16 +01:00
" Chop horizontally " ) ;
2023-11-29 11:33:07 +01:00
( " -chop-columns " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . impose_columns <- true ) ,
2023-11-29 11:33:07 +01:00
" Chop in columns rather than rows " ) ;
( " -chop-rtl " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . impose_rtl <- true ) ,
2023-11-29 11:33:07 +01:00
" Chop right-to-left " ) ;
( " -chop-btt " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . impose_btt <- true ) ,
2023-11-29 11:33:07 +01:00
" Chop bottom-to-top " ) ;
2013-08-20 16:32:57 +02:00
( " -pad-before " ,
Arg . Unit ( setop PadBefore ) ,
" Add a blank page before the given pages " ) ;
( " -pad-after " ,
Arg . Unit ( setop PadAfter ) ,
" Add a blank page after the given pages " ) ;
( " -pad-every " ,
Arg . Int setpadevery ,
" Add a blank page after every n pages " ) ;
2017-12-18 20:44:02 +01:00
( " -pad-with " ,
Arg . String setpadwith ,
" Use a given PDF instead of a blank page " ) ;
2013-08-20 16:32:57 +02:00
( " -pad-multiple " ,
Arg . Int setpadmultiple ,
" Pad the document to a multiple of n pages " ) ;
2019-07-01 16:35:17 +02:00
( " -pad-multiple-before " ,
Arg . Int setpadmultiplebefore ,
" Pad the document at beginning to a multiple of n pages " ) ;
2013-08-20 16:32:57 +02:00
( " -list-annotations " ,
Arg . Unit ( setop ListAnnotations ) ,
" List annotations " ) ;
2021-10-27 19:55:52 +02:00
( " -list-annotations-json " ,
Arg . Unit setlistannotationsjson ,
" List annotations in JSON format " ) ;
2013-08-20 16:32:57 +02:00
( " -copy-annotations " ,
Arg . String setcopyannotations ,
" Copy annotations from given file " ) ;
( " -remove-annotations " ,
Arg . Unit ( setop RemoveAnnotations ) ,
" Remove annotations " ) ;
2023-01-13 07:30:46 +01:00
( " -set-annotations " ,
Arg . String setsetannotations ,
" Set annotations from JSON file " ) ;
2013-08-20 16:32:57 +02:00
( " -list-fonts " ,
Arg . Unit ( setop Fonts ) ,
" Output font list " ) ;
2023-10-31 16:50:23 +01:00
( " -list-fonts-json " ,
Arg . Unit setlistfontsjson ,
" Output font list in JSON format " ) ;
2013-08-20 16:32:57 +02:00
( " -info " ,
Arg . Unit ( setop Info ) ,
" Output file information " ) ;
2023-10-31 16:50:23 +01:00
( " -info-json " ,
Arg . Unit setinfojson ,
" Output file information in JSON format " ) ;
2013-08-20 16:32:57 +02:00
( " -page-info " ,
Arg . Unit ( setop PageInfo ) ,
2019-10-21 12:54:34 +02:00
" Output page information " ) ;
2023-10-31 16:50:23 +01:00
( " -page-info-json " ,
Arg . Unit setpageinfojson ,
" Output page information in JSON format " ) ;
2013-08-20 16:32:57 +02:00
( " -set-author " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetAuthor s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set Author " ) ;
( " -set-title " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetTitle s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set Title " ) ;
( " -set-subject " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetSubject s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set Subject " ) ;
( " -set-keywords " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetKeywords s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set Keywords " ) ;
( " -set-create " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetCreate s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set Creation date " ) ;
( " -set-modify " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetModify s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set Modification date " ) ;
( " -set-creator " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetCreator s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set Creator " ) ;
( " -set-producer " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetProducer s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set Producer " ) ;
( " -set-trapped " ,
Arg . Unit ( setop SetTrapped ) ,
" Mark as trapped " ) ;
( " -set-untrapped " ,
Arg . Unit ( setop SetUntrapped ) ,
" Mark as not trapped " ) ;
2019-07-28 13:35:51 +02:00
( " -also-set-xmp " ,
2019-06-26 18:43:59 +02:00
Arg . Unit setalsosetxml ,
2019-07-28 13:35:51 +02:00
" Also set XMP metadata " ) ;
( " -just-set-xmp " ,
2019-06-26 18:43:59 +02:00
Arg . Unit setjustsetxml ,
2019-07-28 13:35:51 +02:00
" Just set XMP metadata, not old-fashioned metadata " ) ;
2019-07-01 15:40:22 +02:00
( " -create-metadata " ,
Arg . Unit ( setop CreateMetadata ) ,
2019-10-21 12:54:34 +02:00
" Create XMP metadata from scratch. " ) ;
2013-08-20 16:32:57 +02:00
( " -set-page-layout " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetPageLayout s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set page layout upon document opening " ) ;
( " -set-page-mode " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetPageMode s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set page mode upon document opening " ) ;
2023-06-02 14:05:42 +02:00
( " -set-non-full-screen-page-mode " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetNonFullScreenPageMode s ) () ) ,
2023-04-17 20:55:11 +02:00
" Set non full screen page mode if page mode is FullScreen " ) ;
2013-10-02 16:29:53 +02:00
( " -open-at-page " ,
2020-05-04 13:25:42 +02:00
Arg . String setopenatpage ,
2013-10-02 16:29:53 +02:00
" Set initial page " ) ;
( " -open-at-page-fit " ,
2020-05-04 13:25:42 +02:00
Arg . String setopenatpagefit ,
2023-06-02 14:05:42 +02:00
" Set initial page, scaling to fit " ) ;
2023-04-18 15:42:17 +02:00
( " -open-at-page-custom " ,
Arg . String setopenatpagecustom ,
2023-06-02 14:05:42 +02:00
" Set initial page, with custom scaling " ) ;
2013-08-20 16:32:57 +02:00
( " -set-metadata " ,
2024-01-23 15:32:31 +01:00
Arg . String ( fun s -> setop ( SetMetadata s ) () ) ,
2013-08-20 16:32:57 +02:00
" Set metadata to the contents of a file " ) ;
( " -print-metadata " ,
Arg . Unit ( setop Metadata ) ,
" Output metadata information " ) ;
( " -remove-metadata " ,
Arg . Unit ( setop RemoveMetadata ) ,
" Remove document metadata " ) ;
2019-06-29 16:03:22 +02:00
( " -set-metadata-date " ,
Arg . String setsetmetadatadate ,
" Set the XMP metadata date property " ) ;
2013-08-20 16:32:57 +02:00
( " -hide-toolbar " ,
Arg . String hidetoolbar ,
" Hide the viewer's toolbar " ) ;
( " -hide-menubar " ,
Arg . String hidemenubar ,
" Hide the viewer's menubar " ) ;
( " -hide-window-ui " ,
Arg . String hidewindowui ,
" Hide the viewer's scroll bars etc. " ) ;
( " -fit-window " ,
Arg . String fitwindow ,
" Resize document's window to fit size of page " ) ;
( " -center-window " ,
Arg . String centerwindow ,
" Position window in the center of screen " ) ;
( " -display-doc-title " ,
Arg . String displaydoctitle ,
" Display document's title in the title bar " ) ;
2024-06-12 18:21:20 +02:00
( " -set-language " ,
Arg . String ( fun s -> setop ( SetLanguage s ) () ) ,
" Set the document's language " ) ;
2013-08-20 16:32:57 +02:00
( " -pages " ,
Arg . Unit ( setop CountPages ) ,
" Count pages " ) ;
( " -list-attached-files " ,
Arg . Unit ( setop ListAttachedFiles ) ,
" List attached files " ) ;
( " -dump-attachments " ,
Arg . Unit ( setop DumpAttachedFiles ) ,
2019-07-14 14:50:48 +02:00
" Dump attachments to disk " ) ;
2013-08-20 16:32:57 +02:00
( " -attach-file " ,
Arg . String setattachfile ,
" Attach a file " ) ;
( " -to-page " ,
Arg . String settopage ,
" Attach file to given page instead of document " ) ;
( " -remove-files " ,
Arg . Unit ( setop RemoveAttachedFiles ) ,
" Remove embedded attached document-level files " ) ;
2023-11-09 12:25:19 +01:00
( " -list-images " ,
Arg . Unit ( setop ListImages ) ,
" List images " ) ;
( " -list-images-json " ,
Arg . Unit setlistimagesjson ,
" List images in JSON format " ) ;
2023-11-09 19:36:41 +01:00
( " -list-images-used " ,
2023-11-09 12:25:19 +01:00
Arg . Unit ( fun () -> setop ( ImageResolution max_float ) () ) ,
" List images at point of use " ) ;
2023-11-09 19:36:41 +01:00
( " -list-images-used-json " ,
2023-11-09 12:25:19 +01:00
Arg . Unit ( fun () -> args . format_json <- true ; setop ( ImageResolution max_float ) () ) ,
" List images at point of use in JSON format " ) ;
2013-08-20 16:32:57 +02:00
( " -image-resolution " ,
Arg . Float setimageresolution ,
2023-11-09 12:25:19 +01:00
" List images at point of use under a given dpi " ) ;
( " -image-resolution-json " ,
Arg . Float ( fun f -> setimageresolution f ; args . format_json <- true ) ,
" List images at point of use under a given dpi " ) ;
2013-08-20 16:32:57 +02:00
( " -copy-font " ,
Arg . String setcopyfont ,
" Copy a named font " ) ;
( " -copy-font-page " ,
Arg . Int setfontpage ,
" Set the page a copied font is drawn from " ) ;
( " -copy-font-name " ,
Arg . String setcopyfontname ,
" Set the name of the font to copy " ) ;
2019-07-31 18:32:18 +02:00
( " -remove-fonts " ,
Arg . Unit ( setop RemoveFonts ) ,
" Remove embedded fonts " ) ;
2013-08-20 16:32:57 +02:00
( " -missing-fonts " ,
Arg . Unit ( setop MissingFonts ) ,
" List missing fonts " ) ;
( " -remove-id " ,
Arg . Unit ( setop RemoveId ) ,
" Remove the file's /ID tag " ) ;
( " -draft " ,
Arg . Unit ( setop Draft ) ,
" Remove images from the file " ) ;
2019-07-09 17:31:45 +02:00
( " -draft-remove-only " ,
2024-01-23 19:09:15 +01:00
Arg . String ( fun s -> args . removeonly <- Some s ) ,
2019-07-09 17:31:45 +02:00
" Only remove named image " ) ;
2013-08-20 16:32:57 +02:00
( " -boxes " ,
Arg . Unit setboxes ,
" Add crossed boxes to -draft option " ) ;
2019-07-11 18:19:40 +02:00
( " -remove-all-text " ,
Arg . Unit ( setop RemoveAllText ) ,
" Remove all text " ) ;
2013-08-20 16:32:57 +02:00
( " -blacktext " ,
Arg . Unit ( setop BlackText ) ,
" Blacken document text " ) ;
( " -blacklines " ,
Arg . Unit ( setop BlackLines ) ,
" Blacken lines in document " ) ;
( " -blackfills " ,
Arg . Unit ( setop BlackFills ) ,
" Blacken fills in document " ) ;
( " -thinlines " ,
Arg . String setthinlines ,
" Set minimum line thickness to the given width " ) ;
2016-11-09 16:42:47 +01:00
( " -remove-clipping " ,
Arg . Unit ( setop RemoveClipping ) ,
" Remove clipping paths " ) ;
2013-08-20 16:32:57 +02:00
( " -clean " ,
Arg . Unit ( setop Clean ) ,
" Garbage-collect a file " ) ;
( " -set-version " ,
2024-01-23 15:32:31 +01:00
Arg . Int ( fun i -> setop ( SetVersion i ) () ) ,
2013-08-20 16:32:57 +02:00
" Set PDF version number " ) ;
( " -copy-id-from " ,
Arg . String setcopyid ,
" Copy one file's ID tag to another " ) ;
2013-10-24 12:21:52 +02:00
( " -print-page-labels " ,
Arg . Unit ( setop PrintPageLabels ) ,
" Print page labels " ) ;
2023-10-31 16:50:23 +01:00
( " -print-page-labels-json " ,
Arg . Unit setprintpagelabelsjson ,
" Print page labels in JSON format " ) ;
2013-10-24 12:21:52 +02:00
( " -remove-page-labels " ,
Arg . Unit ( setop RemovePageLabels ) ,
" Remove page labels " ) ;
( " -add-page-labels " ,
2013-10-24 16:21:54 +02:00
Arg . Unit ( setop AddPageLabels ) ,
2013-10-24 12:21:52 +02:00
" Add or replace page labels " ) ;
2013-10-24 16:21:54 +02:00
( " -label-style " ,
Arg . String setlabelstyle ,
" Set label style (default DecimalArabic) " ) ;
( " -label-prefix " ,
Arg . String setlabelprefix ,
" Set label prefix (default none) " ) ;
2014-09-18 16:40:22 +02:00
( " -label-startval " ,
Arg . Int setlabelstartval ,
" Set label start value (default 1) " ) ;
2020-01-25 08:22:45 +01:00
( " -labels-progress " ,
Arg . Unit setlabelsprogress ,
" Label start value progresses with multiple ranges " ) ;
2015-01-07 19:29:11 +01:00
( " -remove-dict-entry " ,
Arg . String setremovedictentry ,
" Remove an entry from all dictionaries " ) ;
2021-10-28 18:06:46 +02:00
( " -replace-dict-entry " ,
Arg . String setreplacedictentry ,
" Remove an entry from all dictionaries " ) ;
( " -replace-dict-entry-value " ,
Arg . String setreplacedictentryvalue ,
" Replacement value for -replace-dict-entry " ) ;
( " -dict-entry-search " ,
Arg . String setdictentrysearch ,
" Search string for -remove-dict-entry and -replace-dict-entry " ) ;
2021-10-29 16:09:21 +02:00
( " -print-dict-entry " ,
Arg . String setprintdictentry ,
" Print dictionary values of a given key " ) ;
2015-01-22 20:16:56 +01:00
( " -producer " ,
2020-09-16 17:16:15 +02:00
Arg . String setproduceraswego ,
2015-01-22 20:16:56 +01:00
" Change the /Producer entry in the /Info dictionary " ) ;
( " -creator " ,
2020-09-16 17:16:15 +02:00
Arg . String setcreatoraswego ,
2015-01-22 20:16:56 +01:00
" Change the /Creator entry in the /Info dictionary " ) ;
2016-11-08 19:15:04 +01:00
( " -list-spot-colors " ,
2016-11-03 18:11:08 +01:00
Arg . Unit ( setop ListSpotColours ) ,
2016-11-08 19:15:04 +01:00
" List spot colors " ) ;
2019-07-07 18:07:52 +02:00
( " -create-pdf " ,
Arg . Unit ( setop CreatePDF ) ,
" Create a new PDF " ) ;
2024-09-05 17:38:03 +02:00
( " -create-pdf-ua-1 " ,
2024-09-30 14:01:31 +02:00
Arg . String ( fun s -> args . subformat <- Some Cpdfua . PDFUA1 ; args . title <- Some s ; setop CreatePDF () ) ,
2024-09-11 18:16:49 +02:00
" Create a new PDF/UA-1 with the given title " ) ;
( " -create-pdf-ua-2 " ,
2024-09-30 14:45:10 +02:00
Arg . String ( fun s -> args . subformat <- Some Cpdfua . PDFUA2 ; args . title <- Some s ; setop CreatePDF () ) ,
2024-09-11 18:16:49 +02:00
" Create a new PDF/UA-2 with the given title " ) ;
2019-07-07 18:07:52 +02:00
( " -create-pdf-pages " ,
Arg . Int setcreatepdfpages ,
" Number of pages for new PDF " ) ;
( " -create-pdf-papersize " ,
Arg . String setcreatepdfpapersize ,
2019-10-01 16:02:12 +02:00
" Paper size for new PDF " ) ;
( " -prepend-content " ,
Arg . String setprepend ,
" Prepend content to page " ) ;
( " -postpend-content " ,
Arg . String setpostpend ,
" Postpend content to page " ) ;
2020-12-11 20:01:02 +01:00
( " -gs " ,
2024-01-23 19:22:09 +01:00
Arg . String ( fun s -> args . path_to_ghostscript <- s ) ,
2020-12-11 20:01:02 +01:00
" Path to gs executable " ) ;
( " -gs-malformed " ,
Arg . Unit setgsmalformed ,
" Also try to reconstruct malformed files with gs " ) ;
( " -gs-quiet " ,
2024-01-23 19:09:15 +01:00
Arg . Unit ( fun () -> args . gs_quiet <- true ) ,
2020-12-11 20:01:02 +01:00
" Make gs go into quiet mode " ) ;
2021-05-20 17:53:35 +02:00
( " -gs-malformed-force " ,
Arg . Unit whingemalformed ,
" See manual for usage. " ) ;
2020-12-11 20:01:02 +01:00
( " -im " ,
Arg . String setimpath ,
2024-01-10 19:39:00 +01:00
" Path to magick executable " ) ;
2020-12-11 20:01:02 +01:00
( " -p2p " ,
Arg . String setp2ppath ,
" Path to pnmtopng executable " ) ;
( " -extract-images " ,
Arg . Unit ( setop ExtractImages ) ,
" Extract images to file " ) ;
2020-12-20 16:41:52 +01:00
( " -dedup " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . dedup <- true ) ,
2020-12-20 16:41:52 +01:00
" Deduplicate extracted images fully " ) ;
( " -dedup-perpage " ,
2024-01-23 19:14:00 +01:00
Arg . Unit ( fun () -> args . dedup_per_page <- true ) ,
2020-12-20 16:41:52 +01:00
" Deduplicate extracted images per page only " ) ;
2023-12-06 13:20:27 +01:00
( " -process-images " ,
Arg . Unit ( setop ProcessImages ) ,
" Process images within PDF " ) ;
2023-12-28 17:18:25 +01:00
( " -process-images-info " ,
Arg . Unit setprocessimagesinfo ,
" Show info when processing images " ) ;
2023-12-22 20:33:10 +01:00
( " -jbig2enc " ,
Arg . String setjbig2encpath ,
" Path to jbig2enc executable " ) ;
2023-12-06 13:20:27 +01:00
( " -jpeg-to-jpeg " ,
2024-02-01 17:38:07 +01:00
Arg . Float setjpegquality ,
2023-12-06 13:20:27 +01:00
" Set JPEG quality for existing JPEGs " ) ;
( " -lossless-to-jpeg " ,
2024-02-01 17:38:07 +01:00
Arg . Float setjpegqualitylossless ,
2023-12-06 13:20:27 +01:00
" Set JPEG quality for existing lossless images " ) ;
2023-12-22 17:45:53 +01:00
( " -1bpp-method " ,
Arg . String set1bppmethod ,
" Set 1bpp compression method for existing images " ) ;
2024-01-12 16:00:28 +01:00
( " -jbig2-lossy-threshold " ,
Arg . Float setjbig2_lossy_threshold ,
" Set jbig2enc lossy threshold " ) ;
2023-12-24 14:54:21 +01:00
( " -pixel-threshold " ,
Arg . Int setpixelthreshold ,
" Only process images with more pixels than this " ) ;
2023-12-27 20:53:02 +01:00
( " -length-threshold " ,
Arg . Int setlengththreshold ,
" Only process images with data longer than this " ) ;
( " -percentage-threshold " ,
2024-02-01 17:38:07 +01:00
Arg . Float setpercentagethreshold ,
2023-12-27 20:53:02 +01:00
" Only substitute lossy image when smaller than this " ) ;
2024-01-04 12:43:27 +01:00
( " -dpi-threshold " ,
2024-02-01 17:38:07 +01:00
Arg . Float setdpithreshold ,
2024-01-04 12:43:27 +01:00
" Only process image when always higher than this dpi " ) ;
2023-12-31 12:59:48 +01:00
( " -lossless-resample " ,
2024-02-01 17:38:07 +01:00
Arg . Float setlosslessresample ,
2023-12-31 12:59:48 +01:00
" Resample lossless images to given part of original " ) ;
2024-02-01 16:22:19 +01:00
( " -lossless-resample-dpi " ,
2024-02-01 17:38:07 +01:00
Arg . Float setlosslessresampledpi ,
2024-02-01 14:29:20 +01:00
" Resample lossless images to given DPI " ) ;
2023-12-31 12:59:48 +01:00
( " -resample-interpolate " ,
2024-01-01 20:09:40 +01:00
Arg . Unit setresampleinterpolate ,
2023-12-31 12:59:48 +01:00
" Interpolate when resampling " ) ;
2021-05-20 17:53:35 +02:00
( " -squeeze " ,
Arg . Unit setsqueeze ,
" Squeeze " ) ;
( " -squeeze-log-to " ,
Arg . String setsqueezelogto ,
" Squeeze log location " ) ;
( " -squeeze-no-pagedata " ,
2024-01-23 19:09:15 +01:00
Arg . Unit ( fun () -> args . squeeze_pagedata <- false ) ,
2021-05-20 17:53:35 +02:00
" Don't recompress pages " ) ;
( " -squeeze-no-recompress " ,
2024-01-23 19:09:15 +01:00
Arg . Unit ( fun () -> args . squeeze_recompress <- false ) ,
2021-05-20 17:53:35 +02:00
" Don't recompress streams " ) ;
( " -output-json " ,
Arg . Unit ( setop OutputJSON ) ,
" Export PDF file as JSON data " ) ;
( " -output-json-parse-content-streams " ,
2024-01-23 19:09:15 +01:00
Arg . Unit ( fun () -> args . jsonparsecontentstreams <- true ) ,
2021-05-20 17:53:35 +02:00
" Parse content streams " ) ;
( " -output-json-no-stream-data " ,
2024-01-23 19:09:15 +01:00
Arg . Unit ( fun () -> args . jsonnostreamdata <- true ) ,
2021-05-20 17:53:35 +02:00
" Skip stream data for brevity " ) ;
2021-10-04 19:38:36 +02:00
( " -output-json-decompress-streams " ,
2024-01-23 19:09:15 +01:00
Arg . Unit ( fun () -> args . jsondecompressstreams <- true ) ,
2021-10-04 19:38:36 +02:00
" Skip stream data for brevity " ) ;
2021-12-30 16:25:24 +01:00
( " -output-json-clean-strings " ,
2024-01-23 19:09:15 +01:00
Arg . Unit ( fun () -> args . jsoncleanstrings <- true ) ,
2021-12-30 16:25:24 +01:00
" Convert UTF16BE strings to PDFDocEncoding when possible " ) ;
2021-10-01 13:16:55 +02:00
( " -j " ,
Arg . String set_json_input ,
2021-10-21 19:10:47 +02:00
" Load a PDF JSON file " ) ;
2021-05-20 17:53:35 +02:00
( " -ocg-list " ,
Arg . Unit ( setop OCGList ) ,
" List optional content groups " ) ;
( " -ocg-rename " ,
Arg . Unit ( setop OCGRename ) ,
" Rename optional content group " ) ;
( " -ocg-rename-from " ,
2024-01-23 19:09:15 +01:00
Arg . String ( fun s -> args . ocgrenamefrom <- s ) ,
2021-05-20 17:53:35 +02:00
" Rename from (with -ocg-rename) " ) ;
( " -ocg-rename-to " ,
2024-01-23 19:09:15 +01:00
Arg . String ( fun s -> args . ocgrenameto <- s ) ,
2021-05-20 17:53:35 +02:00
" Rename to (with -ocg-rename) " ) ;
( " -ocg-order-all " ,
Arg . Unit ( setop OCGOrderAll ) ,
" Repair /Order so all OCGs listed " ) ;
( " -ocg-coalesce-on-name " ,
Arg . Unit ( setop OCGCoalesce ) ,
" Coalesce OCGs with like name " ) ;
( " -stamp-as-xobject " ,
Arg . String setstampasxobject ,
2021-06-21 16:56:02 +02:00
" Stamp a file as a form xobject in another " ) ;
2021-11-03 17:05:53 +01:00
( " -print-font-table " ,
2021-11-01 16:40:33 +01:00
Arg . String setprintfontencoding ,
2021-11-03 17:05:53 +01:00
" Print the /ToUnicode table for a given font, if present. " ) ;
( " -print-font-table-page " ,
2021-11-01 16:40:33 +01:00
Arg . Int setfontpage ,
2021-11-03 17:05:53 +01:00
" Set page for -print-font-table " ) ;
2023-11-02 19:49:15 +01:00
( " -extract-font " ,
Arg . String setextractfontfile ,
" Extract a font " ) ;
2021-11-19 01:32:35 +01:00
( " -table-of-contents " ,
Arg . Unit ( setop TableOfContents ) ,
" Typeset a table of contents from bookmarks " ) ;
2021-12-10 13:58:30 +01:00
( " -toc-title " ,
2021-12-02 00:50:04 +01:00
Arg . String settableofcontentstitle ,
" Set (or clear if empty) the TOC title " ) ;
2021-12-10 13:58:30 +01:00
( " -toc-no-bookmark " ,
Arg . Unit settocnobookmark ,
" Don't add the table of contents to the bookmarks " ) ;
2021-11-19 01:32:35 +01:00
( " -typeset " ,
Arg . String settypeset ,
" Typeset a text file as a PDF " ) ;
2024-09-27 14:20:11 +02:00
( " -subformat " ,
Arg . String setsubformat ,
" Set subformat " ) ;
2024-09-27 14:45:18 +02:00
( " -title " ,
Arg . String ( fun s -> args . title <- Some s ) ,
" Set PDF/UA title " ) ;
2023-03-03 17:02:16 +01:00
( " -composition " ,
Arg . Unit ( setop ( Composition false ) ) ,
" Show composition of PDF " ) ;
( " -composition-json " ,
Arg . Unit ( setop ( Composition true ) ) ,
" Show composition of PDF in JSON format " ) ;
2023-05-02 16:04:35 +02:00
( " -text-width " ,
Arg . String settextwidth ,
" Find width of a line of text " ) ;
2023-07-14 17:27:53 +02:00
( " -draw " , Arg . Unit setdraw , " Begin drawing " ) ;
2024-09-06 15:04:17 +02:00
( " -draw-struct-tree " , Arg . Unit setdrawstructtree , " Build structure trees when drawing. " ) ;
2024-09-15 19:53:59 +02:00
( " -tag " , Arg . String Cpdfdrawcontrol . addtag , " Begin structure item " ) ;
( " -stag " , Arg . String Cpdfdrawcontrol . addstag , " Begin struture branch " ) ;
( " -end-tag " , Arg . Unit Cpdfdrawcontrol . endtag , " End structure item " ) ;
( " -end-stag " , Arg . Unit Cpdfdrawcontrol . endstag , " End structure branch " ) ;
( " -auto-tags " , Arg . Unit ( fun _ -> Cpdfdrawcontrol . autotags true ) , " Auto-tag paragraphs and figures " ) ;
( " -no-auto-tags " , Arg . Unit ( fun _ -> Cpdfdrawcontrol . autotags false ) , " Don't auto-tag paragraphs and figures " ) ;
2024-09-16 17:37:12 +02:00
( " -artifact " , Arg . Unit ( fun _ -> Cpdfdrawcontrol . artifact () ) , " Begin an artifact " ) ;
( " -end-artifact " , Arg . Unit ( fun _ -> Cpdfdrawcontrol . endartifact () ) , " End an artifact " ) ;
( " -no-auto-artifacts " , Arg . Unit ( fun _ -> Cpdfdrawcontrol . autoartifacts false ) , " Don't mark untagged content as artifacts " ) ;
2024-09-26 16:22:22 +02:00
( " -eltinfo " , Arg . String addeltinfo , " Add element information " ) ;
2024-09-25 16:12:58 +02:00
( " -end-eltinfo " , Arg . String ( fun s -> Cpdfdrawcontrol . endeltinfo s ) , " Erase element information " ) ;
2024-09-18 16:31:24 +02:00
( " -namespace " , Arg . String ( fun s -> Cpdfdrawcontrol . addnamespace ( expand_namespace s ) ) , " Set the structure tree namespace " ) ;
2024-09-25 16:12:58 +02:00
( " -rolemap " , Arg . String ( fun s -> Cpdfdrawcontrol . setrolemap s ) , " Set a role map " ) ;
2023-09-01 20:09:23 +02:00
( " -rect " , Arg . String Cpdfdrawcontrol . addrect , " Draw rectangle " ) ;
( " -to " , Arg . String Cpdfdrawcontrol . addto , " Move to " ) ;
( " -line " , Arg . String Cpdfdrawcontrol . addline , " Add line to " ) ;
( " -bez " , Arg . String Cpdfdrawcontrol . addbezier , " Add Bezier curve to path " ) ;
( " -bez23 " , Arg . String Cpdfdrawcontrol . addbezier23 , " Add Bezier v-op to path " ) ;
( " -bez13 " , Arg . String Cpdfdrawcontrol . addbezier13 , " Add Bezier y-op to path " ) ;
( " -circle " , Arg . String Cpdfdrawcontrol . addcircle , " Add circle to path " ) ;
( " -strokecol " , Arg . String Cpdfdrawcontrol . setstroke , " Set stroke colour " ) ;
( " -fillcol " , Arg . String Cpdfdrawcontrol . setfill , " Set fill colour " ) ;
( " -stroke " , Arg . Unit Cpdfdrawcontrol . stroke , " Stroke path " ) ;
( " -fill " , Arg . Unit Cpdfdrawcontrol . fill , " Fill path " ) ;
( " -filleo " , Arg . Unit Cpdfdrawcontrol . fillevenodd , " Fill path, even odd " ) ;
( " -strokefill " , Arg . Unit Cpdfdrawcontrol . strokefill , " Stroke and fill path " ) ;
( " -strokefilleo " , Arg . Unit Cpdfdrawcontrol . strokefillevenodd , " Stroke and fill path, even odd " ) ;
( " -clip " , Arg . Unit Cpdfdrawcontrol . clip , " Clip " ) ;
( " -clipeo " , Arg . Unit Cpdfdrawcontrol . clipevenodd , " Clip, even odd " ) ;
( " -close " , Arg . Unit Cpdfdrawcontrol . closepath , " Close path " ) ;
( " -thick " , Arg . String Cpdfdrawcontrol . setthickness , " Set stroke thickness " ) ;
( " -cap " , Arg . String Cpdfdrawcontrol . setcap , " Set cap " ) ;
( " -join " , Arg . String Cpdfdrawcontrol . setjoin , " Set join " ) ;
( " -miter " , Arg . String Cpdfdrawcontrol . setmiter , " Set miter limit " ) ;
( " -dash " , Arg . String Cpdfdrawcontrol . setdash , " Set dash pattern " ) ;
( " -push " , Arg . Unit Cpdfdrawcontrol . push , " Push graphics stack " ) ;
( " -pop " , Arg . Unit Cpdfdrawcontrol . pop , " Pop graphics stack " ) ;
( " -matrix " , Arg . String Cpdfdrawcontrol . setmatrix , " Append to graphics matrix " ) ;
( " -mtrans " , Arg . String Cpdfdrawcontrol . setmtranslate , " Translate the graphics matrix " ) ;
( " -mrot " , Arg . String Cpdfdrawcontrol . setmrotate , " Rotate the graphics matrix " ) ;
( " -mscale " , Arg . String Cpdfdrawcontrol . setmscale , " Scale the graphics matrix " ) ;
( " -mshearx " , Arg . String Cpdfdrawcontrol . setmshearx , " Shear the graphics matrix in X " ) ;
( " -msheary " , Arg . String Cpdfdrawcontrol . setmsheary , " Shear the graphics matrix in Y " ) ;
( " -xobj-bbox " , Arg . String Cpdfdrawcontrol . xobjbbox , " Specify the bounding box for xobjects " ) ;
( " -xobj " , Arg . String Cpdfdrawcontrol . startxobj , " Begin saving a sequence of graphics operators " ) ;
( " -end-xobj " , Arg . Unit Cpdfdrawcontrol . endxobj , " End saving a sequence of graphics operators " ) ;
( " -use " , Arg . String Cpdfdrawcontrol . usexobj , " Use a saved sequence of graphics operators " ) ;
( " -draw-jpeg " , Arg . String Cpdfdrawcontrol . addjpeg , " Load a JPEG from file and name it " ) ;
( " -draw-png " , Arg . String Cpdfdrawcontrol . addpng , " Load a PNG from file and name it " ) ;
2024-09-25 16:41:08 +02:00
( " -image " , Arg . String ( fun s -> Cpdfdrawcontrol . addimage s ) , " Draw an image which has already been loaded " ) ;
2023-09-01 20:09:23 +02:00
( " -fill-opacity " , Arg . Float Cpdfdrawcontrol . addopacity , " Set opacity " ) ;
( " -stroke-opacity " , Arg . Float Cpdfdrawcontrol . addsopacity , " Set stroke opacity " ) ;
( " -bt " , Arg . Unit Cpdfdrawcontrol . addbt , " Begin text " ) ;
( " -et " , Arg . Unit Cpdfdrawcontrol . addet , " End text " ) ;
2023-09-26 18:51:56 +02:00
( " -text " , Arg . String Cpdfdrawcontrol . addtext , " Draw text " ) ;
( " -stext " , Arg . String Cpdfdrawcontrol . addspecialtext , " Draw text with %specials " ) ;
2024-09-12 17:08:05 +02:00
( " -para " , Arg . String Cpdfdrawcontrol . addpara , " Add a paragraph of text " ) ;
2024-09-19 15:41:51 +02:00
( " -paras " , Arg . String Cpdfdrawcontrol . addparas , " Add paragraphs of text, splitting on newlines " ) ;
2024-09-20 15:15:10 +02:00
( " -indent " , Arg . Float ( fun f -> args . indent <- Some f ) , " Set indent for paragraphs " ) ;
2024-10-03 18:59:10 +02:00
( " -leading " , Arg . String setleading , " Set leading " ) ;
( " -charspace " , Arg . String setcharspace , " Set character spacing " ) ;
( " -wordspace " , Arg . String setwordspace , " Set word space " ) ;
2023-09-01 20:09:23 +02:00
( " -textscale " , Arg . Float ( fun f -> Cpdfdrawcontrol . addop ( Cpdfdraw . TextScale f ) ) , " Set text scale " ) ;
( " -rendermode " , Arg . Int ( fun i -> Cpdfdrawcontrol . addop ( Cpdfdraw . RenderMode i ) ) , " Set text rendering mode " ) ;
2024-10-03 18:59:10 +02:00
( " -rise " , Arg . String setrise , " Set text rise " ) ;
2023-09-01 20:09:23 +02:00
( " -nl " , Arg . Unit ( fun () -> Cpdfdrawcontrol . addop Cpdfdraw . Newline ) , " New line " ) ;
( " -newpage " , Arg . Unit Cpdfdrawcontrol . addnewpage , " Move to a fresh page " ) ;
2024-06-24 16:29:32 +02:00
( " -extract-stream " , Arg . String setextractstream , " Extract a stream " ) ;
2024-07-02 19:23:10 +02:00
( " -extract-stream-decompress " , Arg . String setextractstreamdecomp , " Extract a stream, decompressing " ) ;
( " -obj " , Arg . String setprintobj , " Print object " ) ;
2024-09-02 17:30:02 +02:00
( " -replace-obj " , Arg . String setreplaceobj , " Replace object " ) ;
2024-07-02 19:23:10 +02:00
( " -json " , Arg . Unit ( fun () -> args . format_json <- true ) , " Format output as JSON " ) ;
( " -verify " , Arg . String ( fun s -> setop ( Verify s ) () ) , " Verify conformance to a standard " ) ;
( " -verify-single " , Arg . String ( fun s -> args . verify_single <- Some s ) , " Verify a single test " ) ;
2024-09-14 15:43:55 +02:00
( " -mark-as " , Arg . String ( fun s -> setop ( MarkAs ( Cpdfua . subformat_of_string s ) ) () ) , " Mark as conforming to a standard " ) ;
( " -remove-mark " , Arg . String ( fun s -> setop ( RemoveMark ( Cpdfua . subformat_of_string s ) ) () ) , " Remove conformance mark " ) ;
2024-07-02 19:23:10 +02:00
( " -print-struct-tree " , Arg . Unit ( fun () -> setop PrintStructTree () ) , " Print structure tree " ) ;
( " -extract-struct-tree " , Arg . Unit ( fun () -> setop ExtractStructTree () ) , " Extract structure tree in JSON format " ) ;
( " -replace-struct-tree " , Arg . String ( fun s -> setop ( ReplaceStructTree s ) () ) , " Replace structure tree from JSON " ) ;
2024-09-21 19:44:43 +02:00
( " -redact " , Arg . Unit ( fun () -> setop Redact () ) , " Redact entire pages " ) ;
2020-02-26 17:24:27 +01:00
(* These items are undocumented *)
2013-08-20 16:32:57 +02:00
( " -debug " , Arg . Unit setdebug , " " ) ;
2024-01-23 19:46:09 +01:00
( " -debug-crypt " , Arg . Unit ( fun () -> args . debugcrypt <- true ) , " " ) ;
( " -debug-force " , Arg . Unit ( fun () -> args . debugforce <- true ) , " " ) ;
2018-04-05 13:56:32 +02:00
( " -debug-malformed " , Arg . Set Pdfread . debug_always_treat_malformed , " " ) ;
2023-04-25 14:45:56 +02:00
( " -debug-stderr-to-stdout " , Arg . Unit setstderrtostdout , " " ) ;
2024-09-20 18:01:41 +02:00
( " -debug-readable-ops " , Arg . Unit setreadableops , " " ) ;
2023-04-12 20:37:30 +02:00
( " -stay-on-error " , Arg . Unit setstayonerror , " " ) ;
(* These items are unfinished *)
2016-11-09 19:15:23 +01:00
( " -extract-text " , Arg . Unit ( setop ExtractText ) , " " ) ;
2016-11-13 15:02:09 +01:00
( " -extract-text-font-size " , Arg . Float setextracttextfontsize , " " ) ;
2016-11-10 16:03:34 +01:00
]
2013-08-20 16:32:57 +02:00
and usage_msg =
2021-05-27 18:31:12 +02:00
" Syntax: cpdf [<operation>] <input files> [-o <output file>] \n \n \
2024-07-23 15:47:13 +02:00
Copyright Coherent Graphics Ltd . \ n \ n \
Version " ^ (if agpl then " AGPLv3 - licensed " else " " ) ^ string_of_int major_version ^ " . " ^ string_of_int minor_version ^ " . " ^ string_of_int minor_minor_version ^ " " ^ version_date ^ " \ n \ n \
https : // www . coherentpdf . com / \ n \ n \
2013-08-20 16:32:57 +02:00
Input names are distinguished by containing a '.' and may be \ n \
followed by a page range specification , for instance \ " 1,2,3 \" \n \
or \ " 1-6,9-end \" or \" even \" or \" odd \" or \" reverse \" . \n \n Operations (See \
manual for full details ) : \ n "
(* Reading and writing *)
2014-10-14 20:36:57 +02:00
let filesize name =
try
let x = open_in_bin name in
let r = in_channel_length x in
close_in x ;
r
with
_ -> 0
2013-08-20 16:32:57 +02:00
2019-06-30 15:05:20 +02:00
(* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF
* cannot mend it . It is copied to a temporary file , fixed , then we return None or Some ( pdf ) . * )
let mend_pdf_file_with_ghostscript filename =
2023-04-25 14:45:56 +02:00
match args . path_to_ghostscript with
| " " ->
Pdfe . log " Please supply path to gs with -gs \n " ;
exit 2
| _ ->
Pdfe . log " CPDF could not mend. Attempting to mend file with gs \n " ;
let tmpout = Filename . temp_file " cpdf " " .pdf " in
tempfiles := tmpout :: ! tempfiles ;
let gscall =
Filename . quote_command args . path_to_ghostscript
( ( if args . gs_quiet then [ " -dQUIET " ] else [] ) @
[ " -dNOPAUSE " ; " -sDEVICE=pdfwrite " ; " -sOUTPUTFILE= " ^ tmpout ; " -dBATCH " ; filename ] )
in
match Sys . command gscall with
| 0 -> Pdfe . log " Succeeded! \n " ; tmpout
| _ -> Pdfe . log " Could not fix malformed PDF file, even with gs \n " ; exit 2
2019-06-30 15:05:20 +02:00
2019-07-02 15:10:42 +02:00
exception StdInBytes of bytes
2014-12-11 20:36:43 +01:00
let pdf_of_stdin ? revision user_pw owner_pw =
2019-07-02 15:10:42 +02:00
let rbytes = ref ( mkbytes 0 ) in
try
let user_pw = Some user_pw
and owner_pw = if owner_pw = " " then None else Some owner_pw in
let o , bytes = Pdfio . input_output_of_bytes 16384 in
try
while true do o . Pdfio . output_char ( input_char stdin ) done ;
Pdf . empty ()
with
End_of_file ->
let thebytes = Pdfio . extract_bytes_from_input_output o bytes in
rbytes := thebytes ;
let i = Pdfio . input_of_bytes thebytes in
pdfread_pdf_of_input ? revision user_pw owner_pw i
with
_ -> raise ( StdInBytes ! rbytes )
2014-10-15 18:51:15 +02:00
2019-07-02 16:50:36 +02:00
let rec get_single_pdf ? ( decrypt = true ) ? ( fail = false ) op read_lazy =
2019-07-02 15:10:42 +02:00
let failout () =
if fail then begin
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
2023-04-25 14:45:56 +02:00
Pdfe . log " Failed to read gs-reconstructed PDF even though gs succeeded \n " ;
2019-07-02 15:10:42 +02:00
exit 2
end
in
let warn_gs () =
2019-07-12 15:53:55 +02:00
begin match args . inputs with
( InFile inname , _ , _ , _ , _ , _ ) :: _ ->
2022-11-02 22:13:27 +01:00
begin try ignore ( close_in ( open_in_bin inname ) ) with _ ->
2023-04-25 14:45:56 +02:00
Pdfe . log ( Printf . sprintf " File %s does not exist \n " inname ) ;
2019-07-12 15:53:55 +02:00
exit 2
end
| _ -> ()
end ;
2023-04-25 14:45:56 +02:00
Pdfe . log " get_single_pdf: failed to read malformed PDF file. Consider using -gs-malformed \n " ;
2019-07-02 15:10:42 +02:00
exit 2
in
2014-10-15 18:51:15 +02:00
match args . inputs with
2019-06-30 15:05:20 +02:00
| ( InFile inname , x , u , o , y , revision ) as input :: more ->
2014-10-15 18:51:15 +02:00
if args . squeeze then
Printf . printf " Initial file size is %i bytes \n " ( filesize inname ) ;
let pdf =
2019-06-30 15:05:20 +02:00
try
if read_lazy then
pdfread_pdf_of_channel_lazy ? revision ( optstring u ) ( optstring o ) ( open_in_bin inname )
else
pdfread_pdf_of_file ? revision ( optstring u ) ( optstring o ) inname
with
2021-10-02 13:22:59 +02:00
| Cpdferror . SoftError _ as e -> raise e (* Bad owner or user password *)
2019-08-18 11:55:40 +02:00
| _ ->
2019-06-30 15:05:20 +02:00
if args . gs_malformed then
begin
2019-07-02 15:10:42 +02:00
failout () ;
2019-06-30 15:05:20 +02:00
let newname = mend_pdf_file_with_ghostscript inname in
args . inputs <- ( InFile newname , x , u , o , y , revision ) :: more ;
get_single_pdf ~ fail : true op read_lazy
end
else
2019-07-02 15:10:42 +02:00
warn_gs ()
in
args . was_encrypted <- Pdfcrypt . is_encrypted pdf ;
2019-07-02 16:50:36 +02:00
if decrypt then decrypt_if_necessary input op pdf else pdf
2019-07-02 15:10:42 +02:00
| ( StdIn , x , u , o , y , revision ) as input :: more ->
let pdf =
try pdf_of_stdin ? revision u o with
StdInBytes b ->
if args . gs_malformed then
2019-06-30 15:05:20 +02:00
begin
2019-07-02 15:10:42 +02:00
failout () ;
let inname = Filename . temp_file " cpdf " " .pdf " in
tempfiles := inname :: ! tempfiles ;
let fh = open_out_bin inname in
Pdfio . bytes_to_output_channel fh b ;
close_out fh ;
let newname = mend_pdf_file_with_ghostscript inname in
args . inputs <- ( InFile newname , x , u , o , y , revision ) :: more ;
get_single_pdf ~ fail : true op read_lazy
2019-06-30 15:05:20 +02:00
end
2019-07-02 15:10:42 +02:00
else
warn_gs ()
2014-10-15 18:51:15 +02:00
in
2014-10-16 16:28:46 +02:00
args . was_encrypted <- Pdfcrypt . is_encrypted pdf ;
2019-07-02 16:50:36 +02:00
if decrypt then decrypt_if_necessary input op pdf else pdf
2023-12-05 13:20:03 +01:00
| ( AlreadyInMemory ( pdf , s ) , _ , _ , _ , _ , _ ) :: _ -> pdf
2014-10-15 18:51:15 +02:00
| _ ->
raise ( Arg . Bad " cpdf: No input specified. \n " )
2019-07-02 16:50:36 +02:00
let filenames = null_hash ()
2019-08-16 16:16:21 +02:00
let squeeze_logto filename x =
let fh = open_out_gen [ Open_wronly ; Open_creat ] 0o666 filename in
seek_out fh ( out_channel_length fh ) ;
output_string fh x ;
close_out fh
2019-07-02 16:50:36 +02:00
(* This now memoizes on the name of the file to make sure we only load each
file once * )
2019-07-02 17:50:39 +02:00
let rec get_pdf_from_input_kind ? ( read_lazy = false ) ? ( decrypt = true ) ? ( fail = false ) ( ( _ , x , u , o , y , revision ) as input ) op ik =
2019-07-02 16:50:36 +02:00
let failout () =
if fail then begin
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
2023-04-25 14:45:56 +02:00
Pdfe . log " Failed to read gs-reconstructed PDF even though gs succeeded \n " ;
2019-07-02 16:50:36 +02:00
exit 2
end
in
let warn_gs () =
2019-07-12 15:53:55 +02:00
begin match input with
( InFile inname , _ , _ , _ , _ , _ ) ->
2022-11-02 22:13:27 +01:00
begin try ignore ( close_in ( open_in_bin inname ) ) with _ ->
2023-04-25 14:45:56 +02:00
Pdfe . log ( Printf . sprintf " File %s does not exist \n " inname ) ;
2019-07-12 15:53:55 +02:00
exit 2
end
| _ -> ()
end ;
2023-04-25 14:45:56 +02:00
Pdfe . log " get_pdf_from_input_kind: failed to read malformed PDF file. Consider using -gs-malformed \n " ;
2019-07-02 16:50:36 +02:00
exit 2
in
match ik with
2023-12-05 13:20:03 +01:00
| AlreadyInMemory ( pdf , _ ) -> pdf
2019-07-02 16:50:36 +02:00
| InFile s ->
2014-10-15 18:51:15 +02:00
if args . squeeze then
2019-07-02 16:50:36 +02:00
begin
let size = filesize s in
initial_file_size := size ;
2019-08-16 16:16:21 +02:00
let str = Printf . sprintf " Initial file size is %i bytes \n " size in
begin match ! logto with
| None -> print_string str
| Some filename -> squeeze_logto filename str
end
2019-07-02 16:50:36 +02:00
end ;
begin try Hashtbl . find filenames s with
Not_found ->
let pdf =
2019-07-02 17:50:39 +02:00
try
if read_lazy then
pdfread_pdf_of_channel_lazy ? revision ( optstring u ) ( optstring o ) ( open_in_bin s )
else
pdfread_pdf_of_file ? revision ( optstring u ) ( optstring o ) s
with
2021-10-02 13:22:59 +02:00
| Cpdferror . SoftError _ as e -> raise e (* Bad owner or user password *)
2019-08-18 11:55:40 +02:00
| e ->
Printf . printf " %s \n " ( Printexc . to_string e ) ;
2019-07-02 16:50:36 +02:00
if args . gs_malformed then
begin
failout () ;
let newname = mend_pdf_file_with_ghostscript s in
get_pdf_from_input_kind ~ fail : true ( InFile newname , x , u , o , y , revision ) op ( InFile newname ) ;
end
else
warn_gs ()
in
args . was_encrypted <- Pdfcrypt . is_encrypted pdf ;
let pdf = if decrypt then decrypt_if_necessary input op pdf else pdf in
Hashtbl . add filenames s pdf ; pdf
end
| StdIn ->
let pdf =
try pdf_of_stdin ? revision u o with
StdInBytes b ->
if args . gs_malformed then
begin
failout () ;
let inname = Filename . temp_file " cpdf " " .pdf " in
tempfiles := inname :: ! tempfiles ;
let fh = open_out_bin inname in
Pdfio . bytes_to_output_channel fh b ;
close_out fh ;
let newname = mend_pdf_file_with_ghostscript inname in
get_pdf_from_input_kind ~ fail : true ( InFile newname , x , u , o , y , revision ) op ( InFile newname ) ;
end
else
warn_gs ()
in
args . was_encrypted <- Pdfcrypt . is_encrypted pdf ;
if decrypt then decrypt_if_necessary input op pdf else pdf
2014-10-15 18:51:15 +02:00
2015-01-22 20:16:56 +01:00
let rec unescape_octals prev = function
| [] -> rev prev
| '\\' :: ( '0' .. '9' as a ) :: ( '0' .. '9' as b ) :: ( '0' .. '9' as c ) :: t ->
let chr = char_of_int ( int_of_string ( " 0o " ^ implode [ a ; b ; c ] ) ) in
unescape_octals ( chr :: prev ) t
| '\\' :: '\\' :: t -> unescape_octals ( '\\' :: prev ) t
| h :: t -> unescape_octals ( h :: prev ) t
let unescape_octals s =
implode ( unescape_octals [] ( explode s ) )
let process s =
2021-12-19 13:55:06 +01:00
if args . encoding < > Cpdfmetadata . Raw
2015-01-22 20:16:56 +01:00
then Pdftext . pdfdocstring_of_utf8 s
else unescape_octals s
let set_producer s pdf =
2021-12-19 13:55:06 +01:00
ignore ( Cpdfmetadata . set_pdf_info ( " /Producer " , Pdf . String ( process s ) , 0 ) pdf )
2015-01-22 20:16:56 +01:00
let set_creator s pdf =
2021-12-19 13:55:06 +01:00
ignore ( Cpdfmetadata . set_pdf_info ( " /Creator " , Pdf . String ( process s ) , 0 ) pdf )
2015-01-22 20:16:56 +01:00
2014-10-29 14:25:21 +01:00
let really_write_pdf ? ( encryption = None ) ? ( is_decompress = false ) mk_id pdf outname =
2015-01-22 20:16:56 +01:00
if args . producer < > None then set_producer ( unopt args . producer ) pdf ;
if args . creator < > None then set_creator ( unopt args . creator ) pdf ;
2023-04-25 14:45:56 +02:00
if args . debugcrypt then Printf . printf " really_write_pdf \n " ;
2014-12-02 16:12:28 +01:00
let will_linearize =
args . linearize | | args . keeplinearize && pdf . Pdf . was_linearized
in
2014-10-02 14:32:30 +02:00
let outname' =
2014-12-02 16:12:28 +01:00
if will_linearize then Filename . temp_file " cpdflin " " .pdf " else outname
2014-10-02 14:32:30 +02:00
in
2014-10-27 17:48:08 +01:00
if args . debugcrypt then
2014-11-19 18:36:02 +01:00
Printf . printf " args.recrypt = %b, args.was_encrypted = %b \n "
args . recrypt args . was_encrypted ;
2014-10-24 18:24:29 +02:00
begin
if args . recrypt && args . was_encrypted then
begin
2014-11-19 18:36:02 +01:00
if args . debugcrypt then
Printf . printf " Recrypting in really_write_pdf \n " ;
2014-10-30 18:00:00 +01:00
match args . inputs with
[] -> raise ( Pdf . PDFError " no input in recryption " )
2014-12-11 19:17:02 +01:00
| ( _ , _ , user_pw , owner_pw , _ , _ ) :: _ ->
2014-11-19 18:36:02 +01:00
let best_password =
if owner_pw < > " " then owner_pw else user_pw
in
2014-11-02 15:59:37 +01:00
Pdfwrite . pdf_to_file_options
~ preserve_objstm : args . preserve_objstm
~ generate_objstm : args . create_objstm
~ compress_objstm : ( not is_decompress )
~ recrypt : ( Some best_password )
2023-04-23 22:00:46 +02:00
None mk_id pdf outname'
2014-10-24 18:24:29 +02:00
end
else
begin
2018-03-20 16:53:25 +01:00
if args . debugforce | | not args . was_encrypted | | args . was_decrypted_with_owner then
2014-10-27 17:48:08 +01:00
begin
2014-11-19 18:36:02 +01:00
if args . debugcrypt then
Printf . printf " Pdf to file in really_write_pdf \n " ;
Pdfwrite . pdf_to_file_options
~ preserve_objstm : args . preserve_objstm
~ generate_objstm : args . create_objstm
~ compress_objstm : ( not is_decompress )
2023-04-23 22:00:46 +02:00
encryption mk_id pdf outname'
2014-10-27 17:48:08 +01:00
end
else
2014-11-19 18:36:02 +01:00
soft_error
2021-10-12 19:58:37 +02:00
" You must supply -recrypt here, or add -decrypt-force, or provide the owner password. "
2014-10-24 18:24:29 +02:00
end
2014-10-15 18:51:15 +02:00
end ;
2014-10-14 20:36:57 +02:00
begin
2014-12-02 16:12:28 +01:00
if will_linearize then
2021-12-18 17:14:31 +01:00
let cpdflin = find_cpdflin args . cpdflin in
2014-11-19 18:36:02 +01:00
match args . inputs with
[] -> raise ( Pdf . PDFError " no input in recryption " )
2014-12-11 19:17:02 +01:00
| ( _ , _ , user_pw , owner_pw , _ , _ ) :: _ ->
2014-11-19 18:36:02 +01:00
let best_password =
if owner_pw < > " " then owner_pw else user_pw
in
let code =
2021-12-18 17:14:31 +01:00
call_cpdflin cpdflin outname' outname best_password
2014-11-19 18:36:02 +01:00
in
if code > 0 then
begin
begin try Sys . remove outname with _ -> () end ;
Sys . rename outname' outname ;
soft_error
" Linearizer failed with above error. \
File written without linearization . "
end
else
begin try Sys . remove outname' with _ -> () end ;
2014-10-14 20:36:57 +02:00
end ;
if args . squeeze then
let s = filesize outname in
2019-08-16 16:16:21 +02:00
begin
let str =
Printf . sprintf
" Final file size is %i bytes, %.2f%% of original. \n "
s
( ( float s /. float ! initial_file_size ) * . 100 . )
in
match ! logto with
| None -> print_string str
| Some filename -> squeeze_logto filename str
end
2014-10-02 14:32:30 +02:00
2014-10-02 15:19:05 +02:00
let write_pdf ? ( encryption = None ) ? ( is_decompress = false ) mk_id pdf =
2014-10-27 17:48:08 +01:00
if args . debugcrypt then Printf . printf " write_pdf \n " ;
2023-03-22 16:45:00 +01:00
if args . create_objstm && not ( args . keepversion | | pdf . Pdf . major > 1 )
2013-08-20 16:32:57 +02:00
then pdf . Pdf . minor <- max pdf . Pdf . minor 5 ;
match args . out with
| NoOutputSpecified ->
output_pdfs = | pdf
| File outname ->
2014-10-14 20:36:57 +02:00
begin match encryption with
None ->
2021-10-15 18:18:23 +02:00
if not is_decompress then
begin
2021-12-18 17:26:33 +01:00
ignore ( Cpdfsqueeze . recompress_pdf pdf ) ;
2023-06-21 16:38:32 +02:00
if args . squeeze then Cpdfsqueeze . squeeze ~ pagedata : args . squeeze_pagedata ? logto : ! logto pdf ;
2021-10-15 18:18:23 +02:00
end ;
Pdf . remove_unreferenced pdf ;
really_write_pdf ~ is_decompress mk_id pdf outname
2014-10-14 20:36:57 +02:00
| Some _ ->
2014-10-29 14:25:21 +01:00
really_write_pdf ~ encryption ~ is_decompress mk_id pdf outname
2014-10-14 20:36:57 +02:00
end
2013-08-20 16:32:57 +02:00
| Stdout ->
2018-01-29 10:15:42 +01:00
let temp = Filename . temp_file " cpdfstdout " " .pdf " in
2014-10-02 14:32:30 +02:00
begin match encryption with
None ->
2021-10-15 18:18:23 +02:00
if not is_decompress then
begin
2021-12-18 17:26:33 +01:00
ignore ( Cpdfsqueeze . recompress_pdf pdf ) ;
2023-06-21 16:38:32 +02:00
if args . squeeze then Cpdfsqueeze . squeeze ~ pagedata : args . squeeze_pagedata ? logto : ! logto pdf ;
2021-10-15 18:18:23 +02:00
Pdf . remove_unreferenced pdf
end ;
really_write_pdf ~ encryption ~ is_decompress mk_id pdf temp ;
2014-10-02 14:32:30 +02:00
| Some _ ->
2014-10-29 14:25:21 +01:00
really_write_pdf ~ encryption ~ is_decompress mk_id pdf temp
2014-10-02 14:32:30 +02:00
end ;
let temp_file = open_in_bin temp in
try
while true do output_char stdout ( input_char temp_file ) done ;
assert false
with
2018-01-29 10:15:42 +01:00
End_of_file ->
2018-02-13 19:14:48 +01:00
begin try close_in temp_file ; Sys . remove temp with
2023-04-25 14:45:56 +02:00
e -> Pdfe . log ( Printf . sprintf " Failed to remove temp file %s (%s) \n " temp ( Printexc . to_string e ) )
2018-01-29 10:15:42 +01:00
end ;
flush stdout (* r For Windows *)
2013-08-20 16:32:57 +02:00
2014-10-28 13:56:45 +01:00
(* Find the stem of a filename *)
let stem s =
2014-10-28 19:40:56 +01:00
implode
( rev ( tail_no_fail
( dropwhile
( neq '.' ) ( rev ( explode ( Filename . basename s ) ) ) ) ) )
2014-10-28 13:56:45 +01:00
let fast_write_split_pdfs
2023-10-30 19:30:49 +01:00
? ( names = [] ) enc splitlevel original_filename sq spec main_pdf pagenums pdf_pages
2014-10-28 13:56:45 +01:00
=
let marks = Pdfmarks . read_bookmarks main_pdf in
iter2
( fun number pagenums ->
2024-07-05 15:35:32 +02:00
let pdf = Pdfpage . pdf_of_pages ~ retain_numbering : args . retain_numbering ~ process_struct_tree : args . process_struct_trees main_pdf pagenums in
2014-10-28 13:56:45 +01:00
let startpage , endpage = extremes pagenums in
2014-10-28 19:40:56 +01:00
let name =
2023-10-30 19:30:49 +01:00
if names < > [] then List . nth names ( number - 1 ) else
Cpdfbookmarks . name_of_spec
args . encoding marks main_pdf splitlevel spec number
( stem original_filename ) startpage endpage
2014-10-28 19:40:56 +01:00
in
2014-10-28 13:56:45 +01:00
Pdf . remove_unreferenced pdf ;
2023-06-21 16:38:32 +02:00
if sq then Cpdfsqueeze . squeeze ~ pagedata : args . squeeze_pagedata ? logto : ! logto pdf ;
2014-10-28 13:56:45 +01:00
really_write_pdf ~ encryption : enc ( not ( enc = None ) ) pdf name )
( indx pagenums )
pagenums
(* Return list, in order, a * set * of page numbers of bookmarks at a given level *)
let bookmark_pages level pdf =
2017-05-28 20:19:17 +02:00
let refnums = Pdf . page_reference_numbers pdf in
2017-05-29 15:39:01 +02:00
let fastrefnums = hashtable_of_dictionary ( combine refnums ( indx refnums ) ) in
2017-05-28 20:19:17 +02:00
setify_preserving_order
( option_map
( function
l when l . Pdfmarks . level = level ->
2017-05-29 15:39:01 +02:00
Some ( Pdfpage . pagenumber_of_target ~ fastrefnums pdf l . Pdfmarks . target )
2017-05-28 20:19:17 +02:00
| _ -> None )
( Pdfmarks . read_bookmarks pdf ) )
2014-10-28 13:56:45 +01:00
let split_at_bookmarks
2021-10-15 18:18:23 +02:00
enc original_filename ~ squeeze level spec pdf
2014-10-28 13:56:45 +01:00
=
let pdf_pages = Pdfpage . pages_of_pagetree pdf in
let points = bookmark_pages level pdf in
let points =
lose ( fun x -> x < = 0 | | x > Pdfpage . endpage pdf ) ( map pred points )
in
let pts = splitat points ( indx pdf_pages ) in
fast_write_split_pdfs
2021-10-15 18:18:23 +02:00
enc level original_filename squeeze spec pdf pts pdf_pages
2014-10-28 13:56:45 +01:00
let split_pdf
enc original_filename
chunksize linearize ~ cpdflin ~ preserve_objstm ~ create_objstm ~ squeeze
2021-10-15 18:18:23 +02:00
spec pdf
2014-10-28 13:56:45 +01:00
=
let pdf_pages = Pdfpage . pages_of_pagetree pdf in
fast_write_split_pdfs
2021-10-15 18:18:23 +02:00
enc 0 original_filename squeeze spec pdf
2014-11-03 20:19:12 +01:00
( splitinto chunksize ( indx pdf_pages ) ) pdf_pages
2013-08-20 16:32:57 +02:00
2023-10-26 14:46:51 +02:00
(* Given a PDF, write the split as if we had selected pages, and return its filesize. Delete it. *)
2023-10-26 16:55:55 +02:00
let split_max_fits pdf s p q =
2023-10-30 15:21:49 +01:00
if q < p then error " split_max_fits " else
2023-10-26 14:46:51 +02:00
let filename = Filename . temp_file " cpdf " " sm " in
let range = ilist p q in
2024-07-05 15:35:32 +02:00
let newpdf = Pdfpage . pdf_of_pages ~ process_struct_tree : args . process_struct_trees ~ retain_numbering : args . retain_numbering pdf range in
2023-10-26 14:46:51 +02:00
let r = args . out in
args . out <- File filename ;
write_pdf false newpdf ;
args . out <- r ;
let fh = open_in_bin filename in
let size = in_channel_length fh in
2023-10-31 13:25:54 +01:00
begin try close_in fh ; Sys . remove filename with _ -> () end ;
2023-10-26 16:55:55 +02:00
size < = s
(* Binary search on q from current value down to p to find max which fits. Returns q. Upon failure, returns -1 *)
2023-10-26 17:32:48 +02:00
let rec split_max_search pdf s b p q =
2023-10-26 16:55:55 +02:00
if p = q then
2023-10-26 17:32:48 +02:00
if split_max_fits pdf s b q then q else - 1
2023-10-26 16:55:55 +02:00
else
2023-10-26 17:51:54 +02:00
let half = ( q + p ) / 2 in
if split_max_fits pdf s b ( half + 1 )
then split_max_search pdf s b ( half + 1 ) q
else split_max_search pdf s b p half
2023-10-26 14:46:51 +02:00
2024-02-27 16:57:31 +01:00
let split_max enc original_filename ~ squeeze output_spec s pdf =
2023-10-26 14:46:51 +02:00
let outs = ref [] in
let p = ref 1 in
let endpage = Pdfpage . endpage pdf in
let q = ref endpage in
2023-10-26 18:59:57 +02:00
while ! p < ! q | | ! p = endpage do
2023-10-26 17:32:48 +02:00
let newq = split_max_search pdf s ! p ! p ! q in
2023-10-30 15:21:49 +01:00
if newq = - 1 then ( Printf . eprintf " Failed to make small enough split at page %i. No files written. \n " ! p ; exit 2 ) else
2023-10-26 16:55:55 +02:00
begin
2023-10-31 13:25:54 +01:00
(* Printf.printf "Pages %i-%i will fit...\n%!" !p newq; *)
2023-10-26 16:55:55 +02:00
outs := ilist ! p newq :: ! outs ;
p := newq + 1 ;
q := endpage
end
done ;
fast_write_split_pdfs enc 0 original_filename squeeze output_spec pdf ( rev ! outs ) ( Pdfpage . pages_of_pagetree pdf )
2023-10-25 19:15:19 +02:00
2013-08-20 16:32:57 +02:00
let getencryption pdf =
match Pdfread . what_encryption pdf with
| None | Some Pdfwrite . AlreadyEncrypted -> " Not encrypted "
| Some Pdfwrite . PDF40bit -> " 40bit "
| Some Pdfwrite . PDF128bit -> " 128bit "
| Some ( Pdfwrite . AES128bit true ) -> " 128bit AES, Metadata encrypted "
| Some ( Pdfwrite . AES128bit false ) -> " 128bit AES, Metadata not encrypted "
| Some ( Pdfwrite . AES256bit true ) -> " 256bit AES, Metadata encrypted "
| Some ( Pdfwrite . AES256bit false ) -> " 256bit AES, Metadata not encrypted "
| Some ( Pdfwrite . AES256bitISO true ) -> " 256bit AES ISO, Metadata encrypted "
| Some ( Pdfwrite . AES256bitISO false ) -> " 256bit AES ISO, Metadata not encrypted "
2020-01-30 11:20:33 +01:00
let write_json output pdf =
match output with
| NoOutputSpecified ->
error " -output-json: no output name specified "
| Stdout ->
2021-10-04 19:38:36 +02:00
Cpdfjson . to_output
( Pdfio . output_of_channel stdout )
2023-01-17 06:37:54 +01:00
~ utf8 : ( args . encoding = Cpdfmetadata . UTF8 )
2021-10-12 16:35:08 +02:00
~ parse_content : args . jsonparsecontentstreams
~ no_stream_data : args . jsonnostreamdata
~ decompress_streams : args . jsondecompressstreams
2021-12-30 16:25:24 +01:00
~ clean_strings : args . jsoncleanstrings
2021-10-04 19:38:36 +02:00
pdf
2020-01-30 11:20:33 +01:00
| File filename ->
2021-05-27 18:31:12 +02:00
let f = open_out filename in
2021-10-04 19:38:36 +02:00
Cpdfjson . to_output
( Pdfio . output_of_channel f )
2023-01-17 06:37:54 +01:00
~ utf8 : ( args . encoding = Cpdfmetadata . UTF8 )
2021-10-12 16:35:08 +02:00
~ parse_content : args . jsonparsecontentstreams
~ no_stream_data : args . jsonnostreamdata
~ decompress_streams : args . jsondecompressstreams
2021-12-30 16:25:24 +01:00
~ clean_strings : args . jsoncleanstrings
2021-10-04 19:38:36 +02:00
pdf ;
2020-01-30 11:20:33 +01:00
close_out f
2024-06-03 16:49:52 +02:00
let json_to_output json = function
| NoOutputSpecified ->
error " no output name specified "
| Stdout ->
output_string stdout ( Cpdfyojson . Safe . pretty_to_string json ) ;
| File filename ->
let f = open_out filename in
output_string f ( Cpdfyojson . Safe . pretty_to_string json ) ;
close_out f
2021-10-16 16:47:41 +02:00
let collate ( names , pdfs , ranges ) =
let ois = map ref ( combine3 names pdfs ranges ) in
let nis = ref [] in
while flatten ( map ( fun { contents = ( _ , _ , r ) } -> r ) ois ) < > [] do
iter
( fun ( { contents = ( name , pdf , range ) } as r ) ->
match range with
| [] -> ()
| h :: t ->
nis := ( name , pdf , [ h ] ) :: ! nis ;
r := ( name , pdf , t ) )
ois
done ;
split3 ( rev ! nis )
2022-01-02 16:18:55 +01:00
let warn_prerotate range pdf =
2024-01-24 19:40:20 +01:00
if not args . prerotate && not ( Cpdfpage . alluprightonly range pdf ) then
Pdfe . log " Some pages in the range have non-zero rotation. \
2023-04-25 14:45:56 +02:00
Consider adding - prerotate or pre - processing with - upright . \
To silence this warning use - no - warn - rotate \ n "
2022-01-02 16:18:55 +01:00
let prerotate range pdf =
Cpdfpage . upright ~ fast : args . fast range pdf
2023-02-20 20:39:42 +01:00
let check_bookmarks_mistake () =
if args . merge_add_bookmarks_use_titles && not args . merge_add_bookmarks then
2023-04-25 14:45:56 +02:00
Pdfe . log " Warning: -merge-add-bookmarks-use-titles is for use with -merge-add-bookmarks \n "
2023-02-20 20:39:42 +01:00
2023-02-28 17:55:35 +01:00
let check_clashing_output_name () =
match args . out with
| File s ->
if ( List . exists ( function ( InFile s' , _ , _ , _ , _ , _ ) when s = s' -> true | _ -> false ) args . inputs ) then
2023-04-25 14:45:56 +02:00
Pdfe . log " Warning: output file name clashes with input file name. Malformed file may result. \n "
2023-02-28 17:55:35 +01:00
| _ -> ()
2023-10-30 18:55:19 +01:00
let build_enc () =
match args . crypt_method with
| " " -> None
| _ ->
Some
{ Pdfwrite . encryption_method =
( match args . crypt_method with
| " 40bit " -> Pdfwrite . PDF40bit
| " 128bit " -> Pdfwrite . PDF128bit
| " AES " -> Pdfwrite . AES128bit args . encrypt_metadata
| " AES256 " -> Pdfwrite . AES256bit args . encrypt_metadata
| " AES256ISO " -> Pdfwrite . AES256bitISO args . encrypt_metadata
| _ -> assert false (* Pre-checked *) ) ;
Pdfwrite . owner_password = args . owner ;
Pdfwrite . user_password = args . user ;
Pdfwrite . permissions = banlist_of_args () }
2024-01-18 17:20:51 +01:00
let extract_stream pdf decomp objnum =
2024-06-24 17:21:28 +02:00
let objnum = int_of_string objnum in (* maybe objspec in the future... *)
2024-01-18 17:20:51 +01:00
let obj = Pdf . lookup_obj pdf objnum in
Pdf . getstream obj ;
if decomp then Pdfcodec . decode_pdfstream_until_unknown pdf obj ;
let data =
match obj with
| Pdf . Stream { contents = ( _ , Pdf . Got x ) } -> x
| _ -> mkbytes 0
in
match args . out with
| NoOutputSpecified ->
()
| File outname ->
let fh = open_out_bin outname in
output_string fh ( Pdfio . string_of_bytes data ) ;
close_out fh
| Stdout ->
output_string stdout ( Pdfio . string_of_bytes data )
2024-01-17 19:37:58 +01:00
2024-06-24 17:21:28 +02:00
(* Empty string is trailerdict. Begins with / and it's a chain separated by
commas . Begins with P and it's a page number then a ( possibly empty ) chain .
Otherwise it's an object number ( 0 = trailerdict ) . * )
2024-09-03 18:45:55 +02:00
let split_chain str = map ( fun x -> " / " ^ x ) ( tl ( String . split_on_char '/' str ) )
2024-06-24 17:21:28 +02:00
let print_obj pdf objspec =
let simple_obj obj =
let obj = if obj = 0 then pdf . Pdf . trailerdict else Pdf . lookup_obj pdf obj in
2024-01-17 21:10:07 +01:00
Printf . printf " %S \n " ( Pdfwrite . string_of_pdf obj )
2024-06-24 17:21:28 +02:00
in
let chain_obj objnum chain =
let obj = if objnum = 0 then pdf . Pdf . trailerdict else Pdf . lookup_obj pdf objnum in
match Pdf . lookup_chain pdf obj chain with
| Some x -> Printf . printf " %S \n " ( Pdfwrite . string_of_pdf x )
| None -> ()
in
match explode objspec with
| 'P' :: more ->
let number , chain =
let digits , rest = cleavewhile isdigit more in
List . nth ( Pdf . page_reference_numbers pdf ) ( int_of_string ( implode digits ) - 1 ) ,
2024-09-03 18:45:55 +02:00
begin match split_chain ( implode rest ) with [ " " ] -> [] | x -> x end
2024-06-24 17:21:28 +02:00
in
chain_obj number chain
2024-09-03 18:45:55 +02:00
| '/' :: more -> chain_obj 0 ( split_chain ( implode ( '/' :: more ) ) )
2024-06-24 17:21:28 +02:00
| [] -> simple_obj 0
| _ -> simple_obj ( int_of_string objspec )
2024-01-17 19:37:58 +01:00
2024-10-13 12:12:56 +02:00
let print_version () =
flprint
( " cpdf " ^ ( if agpl then " AGPL " else " " ) ^ " Version " ^ string_of_int major_version ^ " . " ^ string_of_int minor_version ^ " . " ^ string_of_int minor_minor_version ^ " " ^ version_date ^ " \n " )
2024-09-02 17:30:02 +02:00
2014-10-14 20:45:10 +02:00
(* Main function *)
2013-08-20 16:32:57 +02:00
let go () =
2023-02-20 20:39:42 +01:00
check_bookmarks_mistake () ;
2023-02-28 17:55:35 +01:00
check_clashing_output_name () ;
2013-08-20 16:32:57 +02:00
match args . op with
2024-10-13 12:12:56 +02:00
| Some Version -> print_version ()
2013-08-20 16:32:57 +02:00
| None | Some Merge ->
begin match args . out , args . inputs with
| _ , ( _ :: _ as inputs ) ->
2014-10-28 19:40:56 +01:00
let op = match inputs with [ _ ] -> None | _ -> Some Merge in
2014-12-11 19:17:02 +01:00
let names , ranges , rotations , _ , _ , _ = split6 inputs in
2014-10-28 19:40:56 +01:00
let pdfs = map2 ( fun i -> get_pdf_from_input_kind i op ) inputs names in
2013-08-20 16:32:57 +02:00
(* If at least one file had object streams and args.preserve_objstm is true, set -objstm-create *)
if args . preserve_objstm then
iter
( fun pdf ->
if Hashtbl . length pdf . Pdf . objects . Pdf . object_stream_ids > 0
then args . create_objstm <- true )
pdfs ;
begin match pdfs with
| [ pdf ] ->
2023-04-12 20:20:47 +02:00
if hd ranges < > " all " then
2014-10-14 20:36:57 +02:00
let range = parse_pagespec pdf ( hd ranges ) in
2024-07-05 15:35:32 +02:00
let newpdf = Pdfpage . pdf_of_pages ~ process_struct_tree : args . process_struct_trees ~ retain_numbering : args . retain_numbering pdf range in
2014-10-14 20:36:57 +02:00
write_pdf false newpdf
2013-08-20 16:32:57 +02:00
else
write_pdf false pdf
| _ ->
2014-11-05 14:41:47 +01:00
(* We check permissions. A merge is allowed if each file
2014-11-05 15:05:20 +01:00
included was ( a ) not encrypted ( detected by the absence of
saved encryption information in the PDF , or ( b ) decrypted using
the owner password ( stored in the input ) * )
if
2018-03-20 16:43:15 +01:00
( not args . debugforce ) &&
( not
2014-11-05 15:05:20 +01:00
( fold_left ( && ) true
( map2
2014-12-11 19:17:02 +01:00
( fun ( _ , _ , _ , _ , was_dec_with_owner , _ ) pdf ->
2014-11-05 15:05:20 +01:00
! was_dec_with_owner | | pdf . Pdf . saved_encryption = None )
inputs
2018-03-20 16:43:15 +01:00
pdfs ) ) )
2014-11-05 15:05:20 +01:00
then
2021-10-12 19:58:37 +02:00
soft_error " Merge requires the owner password for all encrypted files, or -decrypt-force. "
2014-11-05 15:05:20 +01:00
else
2019-07-03 15:40:32 +02:00
let pdfs =
if args . merge_add_bookmarks then
2020-03-04 19:50:32 +01:00
map2
2021-12-21 16:06:40 +01:00
( fun filename pdf -> Cpdfbookmarks . add_bookmark_title filename args . merge_add_bookmarks_use_titles pdf )
2020-03-04 19:50:32 +01:00
( map ( function InFile s -> s | StdIn -> " " | AlreadyInMemory _ -> " " ) names )
2019-07-03 15:40:32 +02:00
pdfs
else
pdfs
in
2014-11-05 15:05:20 +01:00
(* If args.keep_this_id is set, change the ID to the one from the kept one *)
let rangenums = map2 parse_pagespec pdfs ranges in
2021-10-16 16:47:41 +02:00
(* At this point, we have the information for collation. *)
let names = map string_of_input_kind names in
let names , pdfs , rangenums =
( if args . collate then collate else Fun . id ) ( names , pdfs , rangenums )
in
2014-11-05 15:05:20 +01:00
let outpdf =
2019-07-03 15:40:32 +02:00
Pdfmerge . merge_pdfs
2024-10-22 15:24:13 +02:00
args . retain_numbering args . remove_duplicate_fonts ~ process_struct_trees : args . process_struct_trees
~ add_toplevel_document : ( args . subformat = Some Cpdfua . PDFUA2 ) names pdfs rangenums
2014-11-05 15:05:20 +01:00
in
2023-12-05 13:56:56 +01:00
if args . remove_duplicate_streams then Pdfmerge . remove_duplicate_fonts outpdf ; (* JBIG2 Globals *)
2014-11-05 15:05:20 +01:00
write_pdf false outpdf
2013-08-20 16:32:57 +02:00
end
| _ ->
match args . op with
| Some Merge ->
error " Merge: Must specify one output and at least one input "
| None ->
error " Must specify one output and at least one input "
| _ -> assert false
end
| Some ( CopyFont fromfile ) ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , u , o , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf ( Some ( CopyFont fromfile ) ) false
and frompdf = pdfread_pdf_of_file ( optstring u ) ( optstring o ) fromfile in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2013-08-20 16:32:57 +02:00
let copyfontname =
match args . copyfontname with
| Some x -> x
| None -> failwith " copy_font: no font name given "
in
2021-10-18 17:22:38 +02:00
let outpdf = Cpdffont . copy_font frompdf copyfontname args . copyfontpage range pdf in
2013-08-20 16:32:57 +02:00
write_pdf true outpdf
| _ -> error " copyfont: bad command line "
end
| Some RemoveFonts ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf ( Some RemoveFonts ) false in
2021-11-12 23:09:49 +01:00
write_pdf true ( Cpdffont . remove_fonts pdf )
2013-08-20 16:32:57 +02:00
| _ -> error " remove fonts: bad command line "
end
2023-11-02 19:49:15 +01:00
| Some ( ExtractFontFile spec ) ->
2013-08-20 16:32:57 +02:00
begin match args . inputs , args . out with
2023-11-02 19:49:15 +01:00
| ( _ , pagespec , u , o , _ , _ ) :: _ , File filename ->
let pdf = get_single_pdf ( Some ( ExtractFontFile spec ) ) false in
begin match String . split_on_char ',' spec with
| [ pnum ; name ] ->
begin try Cpdffont . extract_fontfile ( int_of_string pnum ) name filename pdf with
Failure _ (* "int_of_string" *) -> error " extract font: bad page number "
end
| _ -> error " extract font: bad specification "
end
2013-08-20 16:32:57 +02:00
| _ -> error " extract fontfile: bad command line "
end
| Some CountPages ->
2019-07-02 16:50:36 +02:00
begin match args . inputs with
[ ( ik , _ , _ , _ , _ , _ ) as input ] ->
2019-07-02 17:50:39 +02:00
let pdf = get_pdf_from_input_kind ~ read_lazy : true ~ decrypt : false input ( Some CountPages ) ik in
2019-07-02 16:50:36 +02:00
output_page_count pdf
| _ -> raise ( Arg . Bad " CountPages: must have a single input file only " )
end
2013-08-20 16:32:57 +02:00
| Some Clean ->
2019-10-21 12:28:40 +02:00
let pdf' = get_single_pdf ( Some Clean ) false in
write_pdf false pdf'
2013-08-20 16:32:57 +02:00
| Some Info ->
2019-10-26 15:10:03 +02:00
let pdf , inname , input =
match args . inputs with
| ( InFile inname , _ , u , o , _ , _ ) as input :: _ ->
pdfread_pdf_of_channel_lazy ( optstring u ) ( optstring o ) ( open_in_bin inname ) , inname , input
| ( StdIn , _ , u , o , _ , _ ) as input :: _ -> pdf_of_stdin u o , " " , input
2023-12-05 13:20:03 +01:00
| ( AlreadyInMemory ( pdf , _ ) , _ , _ , _ , _ , _ ) as input :: _ -> pdf , " " , input
2019-10-26 15:10:03 +02:00
| _ -> raise ( Arg . Bad " cpdf: No input specified. \n " )
in
2023-11-01 15:55:14 +01:00
let json = ref [] in
if args . format_json
then json = | ( " Encryption " , ` String ( getencryption pdf ) )
else Printf . printf " Encryption: %s \n " ( getencryption pdf ) ;
if args . format_json
then json = | ( " Permissions " , ` List ( map ( fun p -> ` String ( string_of_permission p ) ) ( Pdfread . permissions pdf ) ) )
else Printf . printf " Permissions: %s \n " ( getpermissions pdf ) ;
2019-10-26 15:10:03 +02:00
if inname < > " " then
2023-11-01 15:55:14 +01:00
let lin = Pdfread . is_linearized ( Pdfio . input_of_channel ( open_in_bin inname ) ) in
if args . format_json then
json = | ( " Linearized " , ` Bool lin ) else Printf . printf " Linearized: %b \n " lin ;
let objstm = length ( list_of_hashtbl pdf . Pdf . objects . Pdf . object_stream_ids ) > 0 in
if args . format_json
then json = | ( " Object streams " , ` Bool objstm )
else Printf . printf " Object streams: %b \n " objstm ;
let ida , idb =
match Pdf . lookup_direct pdf " /ID " pdf . Pdf . trailerdict with
| Some ( Pdf . Array [ Pdf . String s ; Pdf . String s' ] ) ->
( Pdfwrite . make_hex_pdf_string s , Pdfwrite . make_hex_pdf_string s' )
| _ -> " " , " "
in
let fixid s = implode ( rev ( tl ( rev ( tl ( explode s ) ) ) ) ) in
if args . format_json
then json = | ( " ID " , if ida ^ idb = " " then ` Null else ` List [ ` String ( fixid ida ) ; ` String ( fixid idb ) ] )
2024-02-14 16:05:46 +01:00
else ( if ida ^ idb = " " then Printf . printf " ID: None \n " else Printf . printf " ID: %s %s \n " ida idb ) ;
2019-10-26 15:10:03 +02:00
let pdf = decrypt_if_necessary input ( Some Info ) pdf in
2023-11-01 15:55:14 +01:00
if args . format_json then
2023-11-01 21:47:52 +01:00
begin
Cpdfmetadata . output_info ~ json Cpdfmetadata . UTF8 pdf ;
Cpdfmetadata . output_xmp_info ~ json Cpdfmetadata . UTF8 pdf ;
flprint ( Cpdfyojson . Safe . pretty_to_string ( ` Assoc ( rev ! json ) ) )
end
2023-11-01 15:55:14 +01:00
else
begin
Cpdfmetadata . output_info args . encoding pdf ;
Cpdfmetadata . output_xmp_info args . encoding pdf
end
2013-08-20 16:32:57 +02:00
| Some PageInfo ->
2014-08-11 15:05:07 +02:00
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2014-08-11 15:05:07 +02:00
let pdf = get_single_pdf args . op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2023-10-31 17:51:57 +01:00
Cpdfpage . output_page_info ~ json : args . format_json pdf range
2014-08-11 15:05:07 +02:00
| _ -> error " list-bookmarks: bad command line "
end
2013-08-20 16:32:57 +02:00
| Some Metadata ->
2021-12-19 13:55:06 +01:00
Cpdfmetadata . print_metadata ( get_single_pdf ( Some Metadata ) true )
2013-08-20 16:32:57 +02:00
| Some Fonts ->
2021-11-03 17:51:28 +01:00
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
let pdf = get_single_pdf ( Some Fonts ) true in
let range = parse_pagespec_allow_empty pdf pagespec in
2023-10-31 17:23:20 +01:00
Cpdffont . print_fonts ~ json : args . format_json pdf range
2021-11-03 17:51:28 +01:00
| _ -> error " -list-fonts: bad command line "
end
2013-08-20 16:32:57 +02:00
| Some ListBookmarks ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args . op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-21 14:44:46 +01:00
Cpdfbookmarks . list_bookmarks ~ json : args . format_json args . encoding range pdf ( Pdfio . output_of_channel stdout ) ;
2013-08-20 16:32:57 +02:00
flush stdout
| _ -> error " list-bookmarks: bad command line "
end
| Some Crop ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2016-07-18 21:00:10 +02:00
let pdf = get_single_pdf ( Some Crop ) false in
2019-09-26 12:44:54 +02:00
let xywhlist = Cpdfcoord . parse_rectangles pdf args . rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage . crop_pdf xywhlist pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error " crop: bad command line "
end
2019-06-26 15:43:24 +02:00
| Some Art ->
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
let pdf = get_single_pdf ( Some Art ) false in
2019-09-26 12:44:54 +02:00
let xywhlist = Cpdfcoord . parse_rectangles pdf args . rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage . crop_pdf ~ box : " /ArtBox " xywhlist pdf range in
2019-06-26 15:43:24 +02:00
write_pdf false pdf
2019-08-11 20:39:12 +02:00
| _ -> error " art: bad command line "
2019-06-26 15:43:24 +02:00
end
| Some Bleed ->
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
let pdf = get_single_pdf ( Some Bleed ) false in
2019-09-26 12:44:54 +02:00
let xywhlist = Cpdfcoord . parse_rectangles pdf args . rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage . crop_pdf ~ box : " /BleedBox " xywhlist pdf range in
2019-06-26 15:43:24 +02:00
write_pdf false pdf
2019-08-11 20:39:12 +02:00
| _ -> error " bleed: bad command line "
2019-06-26 15:43:24 +02:00
end
| Some Trim ->
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
let pdf = get_single_pdf ( Some Trim ) false in
2019-09-26 12:44:54 +02:00
let xywhlist = Cpdfcoord . parse_rectangles pdf args . rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage . crop_pdf ~ box : " /TrimBox " xywhlist pdf range in
2019-06-26 15:43:24 +02:00
write_pdf false pdf
2019-08-11 20:39:12 +02:00
| _ -> error " trim: bad command line "
2019-06-26 15:43:24 +02:00
end
2013-08-20 16:32:57 +02:00
| Some MediaBox ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2016-07-18 21:00:10 +02:00
let pdf = get_single_pdf ( Some MediaBox ) false in
2019-09-26 12:44:54 +02:00
let xywhlist = Cpdfcoord . parse_rectangles pdf args . rectangle in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage . set_mediabox xywhlist pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error " set media box: bad command line "
end
2017-05-19 20:10:49 +02:00
| Some ( HardBox box ) ->
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
let pdf = get_single_pdf ( Some ( HardBox box ) ) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage . hard_box pdf range box args . mediabox_if_missing args . fast in
2017-05-19 20:10:49 +02:00
write_pdf false pdf
| _ -> error " hard box: bad command line "
end
2013-08-20 16:32:57 +02:00
| Some CopyBox ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf ( Some CopyBox ) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2013-08-20 16:32:57 +02:00
let f , t =
begin match args . frombox , args . tobox with
| Some f , Some t -> f , t
| _ -> error " Copy box: no tobox or no frombox specified "
end
in
2021-12-22 10:31:39 +01:00
let pdf = Cpdfpage . copy_box f t args . mediabox_if_missing pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error " Copy Box: bad command line "
end
| Some Decompress ->
2017-01-04 18:52:14 +01:00
let pdf = get_single_pdf ( Some Decompress ) false in
2013-08-20 16:32:57 +02:00
Pdf . iter_stream
( function stream ->
try Pdfcodec . decode_pdfstream_until_unknown pdf stream with
2023-04-25 14:45:56 +02:00
e -> Pdfe . log ( Printf . sprintf " Decode failure: %s. Carrying on... \n " ( Printexc . to_string e ) ) ; () )
2013-08-20 16:32:57 +02:00
pdf ;
2020-11-11 15:26:48 +01:00
write_pdf ~ is_decompress : true false pdf
2013-08-20 16:32:57 +02:00
| Some Compress ->
let pdf = get_single_pdf ( Some Compress ) false in
if args . remove_duplicate_streams then
2014-09-15 20:12:04 +02:00
Pdfmerge . remove_duplicate_fonts pdf ;
2021-12-18 17:26:33 +01:00
write_pdf false ( Cpdfsqueeze . recompress_pdf pdf )
2013-08-20 16:32:57 +02:00
| Some RemoveCrop ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf ( Some RemoveCrop ) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage . remove_cropping_pdf pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error " remove-crop: bad command line "
end
2019-06-26 15:43:24 +02:00
| Some RemoveArt ->
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
let pdf = get_single_pdf ( Some RemoveArt ) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage . remove_art_pdf pdf range in
2019-06-26 15:43:24 +02:00
write_pdf false pdf
| _ -> error " remove-crop: bad command line "
end
| Some RemoveTrim ->
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
let pdf = get_single_pdf ( Some RemoveTrim ) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage . remove_trim_pdf pdf range in
2019-06-26 15:43:24 +02:00
write_pdf false pdf
| _ -> error " remove-crop: bad command line "
end
| Some RemoveBleed ->
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
let pdf = get_single_pdf ( Some RemoveBleed ) false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage . remove_bleed_pdf pdf range in
2019-06-26 15:43:24 +02:00
write_pdf false pdf
| _ -> error " remove-crop: bad command line "
end
2013-08-20 16:32:57 +02:00
| Some ( Rotate _ ) | Some ( Rotateby _ ) ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2013-08-20 16:32:57 +02:00
let rotate =
match args . op with
2021-12-22 09:58:56 +01:00
| Some ( Rotate i ) -> Cpdfpage . rotate_pdf i
| Some ( Rotateby i ) -> Cpdfpage . rotate_pdf_by i
2013-08-20 16:32:57 +02:00
| _ -> assert false
in
let pdf = rotate pdf range in
write_pdf false pdf
| _ -> error " rotate: bad command line "
end
| Some ( RotateContents a ) ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage . rotate_contents ~ fast : args . fast a pdf range in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error " rotate-contents: bad command line "
end
| Some Upright ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2021-12-22 09:58:56 +01:00
let pdf = Cpdfpage . upright ~ fast : args . fast range pdf in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error " rotate-contents: bad command line "
end
| Some ( ( VFlip | HFlip ) as flip ) ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , _ ->
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf pagespec in
2013-08-20 16:32:57 +02:00
let pdf =
if flip = VFlip
2021-12-22 10:31:39 +01:00
then Cpdfpage . vflip_pdf ~ fast : args . fast pdf range
else Cpdfpage . hflip_pdf ~ fast : args . fast pdf range
2013-08-20 16:32:57 +02:00
in
write_pdf false pdf
| _ -> error " flip: bad command line "
end
| Some ( ( SetAuthor _ | SetTitle _ | SetSubject _ | SetKeywords _
| SetCreate _ | SetModify _ | SetCreator _ | SetProducer _
| SetTrapped | SetUntrapped ) as op ) ->
let key , value , version =
2021-12-19 13:55:06 +01:00
let f s = if args . encoding < > Cpdfmetadata . Raw then Pdftext . pdfdocstring_of_utf8 s else unescape_octals s in
2013-08-20 16:32:57 +02:00
match op with
| SetAuthor s -> " /Author " , Pdf . String ( f s ) , 0
| SetTitle s -> " /Title " , Pdf . String ( f s ) , 1
| SetSubject s -> " /Subject " , Pdf . String ( f s ) , 1
| SetKeywords s -> " /Keywords " , Pdf . String ( f s ) , 1
2021-12-19 13:55:06 +01:00
| SetCreate s -> " /CreationDate " , Pdf . String ( Cpdfmetadata . expand_date s ) , 0
| SetModify s -> " /ModDate " , Pdf . String ( Cpdfmetadata . expand_date s ) , 0
2013-08-20 16:32:57 +02:00
| SetCreator s -> " /Creator " , Pdf . String ( f s ) , 0
| SetProducer s -> " /Producer " , Pdf . String ( f s ) , 0
| SetTrapped -> " /Trapped " , Pdf . Boolean true , 3
| SetUntrapped -> " /Trapped " , Pdf . Boolean false , 3
| _ -> assert false
in
let pdf = get_single_pdf args . op false in
2023-03-22 16:45:00 +01:00
let version = if args . keepversion | | pdf . Pdf . major > 1 then pdf . Pdf . minor else version in
2019-06-28 16:01:28 +02:00
write_pdf false
2021-12-19 13:55:06 +01:00
( Cpdfmetadata . set_pdf_info
2019-06-28 16:01:28 +02:00
~ xmp_also : args . alsosetxml
~ xmp_just_set : args . justsetxml
( key , value , version ) pdf )
| Some ( SetMetadataDate date ) ->
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . set_metadata_date ( get_single_pdf args . op false ) date )
2013-08-20 16:32:57 +02:00
| Some ( ( HideToolbar _ | HideMenubar _ | HideWindowUI _
| FitWindow _ | CenterWindow _ | DisplayDocTitle _ ) as op ) ->
begin match args . out with
| _ ->
let key , value , version =
match op with
| HideToolbar s -> " /HideToolbar " , Pdf . Boolean s , 0
| HideMenubar s -> " /HideMenubar " , Pdf . Boolean s , 0
| HideWindowUI s -> " /HideWindowUI " , Pdf . Boolean s , 0
| FitWindow s -> " /FitWindow " , Pdf . Boolean s , 0
| CenterWindow s -> " /CenterWindow " , Pdf . Boolean s , 0
| DisplayDocTitle s -> " /DisplayDocTitle " , Pdf . Boolean s , 4
| _ -> assert false
in
let pdf = get_single_pdf args . op false in
2023-03-22 16:45:00 +01:00
let version = if args . keepversion | | pdf . Pdf . major > 1 then pdf . Pdf . minor else version in
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . set_viewer_preference ( key , value , version ) pdf )
2013-08-20 16:32:57 +02:00
end
2020-05-04 13:25:42 +02:00
| Some ( OpenAtPage str ) ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf str in
2020-05-04 13:25:42 +02:00
let n = match range with [ x ] -> x | _ -> error " open_at_page: range does not specify single page " in
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . set_open_action pdf false n )
2020-05-04 13:25:42 +02:00
| Some ( OpenAtPageFit str ) ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf str in
2023-04-18 15:42:17 +02:00
let n = match range with [ x ] -> x | _ -> error " open_at_page_fit: range does not specify single page " in
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . set_open_action pdf true n )
2023-04-18 15:42:17 +02:00
| Some ( OpenAtPageCustom dest ) ->
let pdf = get_single_pdf args . op false in
write_pdf false ( Cpdfmetadata . set_open_action ~ dest pdf true 1 )
2013-08-20 16:32:57 +02:00
| Some ( SetMetadata metadata_file ) ->
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . set_metadata args . keepversion metadata_file ( get_single_pdf args . op false ) )
2013-08-20 16:32:57 +02:00
| Some ( SetVersion v ) ->
let pdf = get_single_pdf args . op false in
2019-08-01 15:34:45 +02:00
let pdf =
if v > = 10
then { pdf with Pdf . major = 2 ; Pdf . minor = v - 10 }
else { pdf with Pdf . major = 1 ; Pdf . minor = v }
in
write_pdf false pdf
2013-08-20 16:32:57 +02:00
| Some ( SetPageLayout s ) ->
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . set_page_layout ( get_single_pdf args . op false ) s )
2013-08-20 16:32:57 +02:00
| Some ( SetPageMode s ) ->
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . set_page_mode ( get_single_pdf args . op false ) s )
2023-04-17 20:55:11 +02:00
| Some ( SetNonFullScreenPageMode s ) ->
write_pdf false ( Cpdfmetadata . set_non_full_screen_page_mode ( get_single_pdf args . op false ) s )
2013-08-20 16:32:57 +02:00
| Some Split ->
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| [ ( f , ranges , _ , _ , _ , _ ) ] , File output_spec ->
2014-10-06 15:32:53 +02:00
let pdf = get_single_pdf args . op true in
2023-10-30 18:55:19 +01:00
let enc = build_enc () in
args . create_objstm <- args . preserve_objstm ;
split_pdf
enc args . original_filename args . chunksize args . linearize ~ cpdflin : args . cpdflin
~ preserve_objstm : args . preserve_objstm ~ create_objstm : args . preserve_objstm (* yes--always create if preserving *)
~ squeeze : args . squeeze output_spec pdf
2013-08-20 16:32:57 +02:00
| _ , Stdout -> error " Can't split to standard output "
| _ , NoOutputSpecified -> error " Split: No output format specified "
| _ -> error " Split: bad parameters "
end
2014-10-28 20:01:37 +01:00
| Some ( SplitOnBookmarks level ) ->
begin match args . out with
| File output_spec ->
let pdf = get_single_pdf args . op false in
2023-10-30 18:55:19 +01:00
let enc = build_enc () in
args . create_objstm <- args . preserve_objstm ;
split_at_bookmarks
enc args . original_filename ~ squeeze : args . squeeze level output_spec pdf
2014-10-28 20:01:37 +01:00
| Stdout -> error " Can't split to standard output "
| NoOutputSpecified -> error " Split: No output format specified "
end
2023-10-25 19:15:19 +02:00
| Some ( SplitMax s ) ->
begin match args . out with
| File output_spec ->
let pdf = get_single_pdf args . op false in
2023-10-30 18:55:19 +01:00
let enc = build_enc () in
args . create_objstm <- args . preserve_objstm ;
split_max enc args . original_filename ~ squeeze : args . squeeze output_spec s pdf
2023-10-25 19:15:19 +02:00
| Stdout -> error " Can't split to standard output "
| NoOutputSpecified -> error " Split: No output format specified "
end
2023-10-30 17:36:41 +01:00
| Some Spray ->
2023-10-30 20:10:22 +01:00
begin match args . inputs , args . out with
| ( _ , pagespec , _ , _ , _ , _ ) :: _ , File output_spec ->
2023-10-30 18:55:19 +01:00
let pdf = get_single_pdf args . op false in
2023-10-30 20:10:22 +01:00
let range = ref ( parse_pagespec pdf pagespec ) in
2023-10-30 18:55:19 +01:00
let enc = build_enc () in
2023-10-30 20:10:22 +01:00
let pagenums = map ref ( many [] ( length ! spray_outputs ) ) in
let n = ref 0 in
while ! range < > [] do
List . nth pagenums ( ! n mod ( length ! spray_outputs ) ) = | hd ! range ;
range := tl ! range ;
n + = 1 ;
done ;
let names = rev ! spray_outputs in
iter ( fun x -> if ! x = [] then error " Spray: must have at least one page for each output " ) pagenums ;
2023-10-30 18:55:19 +01:00
args . create_objstm <- args . preserve_objstm ;
2023-10-30 20:10:22 +01:00
fast_write_split_pdfs ~ names enc 0 args . original_filename args . squeeze output_spec pdf ( map rev ( map ( ! ) pagenums ) ) ( Pdfpage . pages_of_pagetree pdf )
| _ , Stdout -> error " Can't spray to standard output "
| _ , NoOutputSpecified -> error " Spray: No output format specified "
| _ , _ -> error " Spray: no input "
2023-10-30 18:55:19 +01:00
end
2013-08-20 16:32:57 +02:00
| Some Presentation ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2013-08-20 16:32:57 +02:00
let pdf' =
2021-12-17 17:12:03 +01:00
Cpdfpresent . presentation
2013-08-20 16:32:57 +02:00
range
args . transition args . duration args . horizontal
args . inward args . direction args . effect_duration pdf
in
2023-03-22 16:45:00 +01:00
pdf . Pdf . minor <- if args . keepversion | | pdf . Pdf . major > 1 then pdf . Pdf . minor else max pdf . Pdf . minor 1 ;
2013-08-20 16:32:57 +02:00
write_pdf false pdf'
| Some ChangeId ->
2014-11-18 15:57:18 +01:00
if args . recrypt then
soft_error " Cannot recrypt with change id: an id is part of encryption information " ;
2013-08-20 16:32:57 +02:00
begin match args . inputs , args . out with
2014-12-11 19:17:02 +01:00
| [ ( k , _ , _ , _ , _ , _ ) as input ] , File s ->
2013-08-20 16:32:57 +02:00
let pdf = get_pdf_from_input_kind input args . op k in
2014-10-02 15:19:05 +02:00
write_pdf true pdf
2014-12-11 19:17:02 +01:00
| [ ( k , _ , _ , _ , _ , _ ) as input ] , Stdout ->
2013-08-20 16:32:57 +02:00
let pdf = get_pdf_from_input_kind input args . op k in
2014-10-02 15:19:05 +02:00
write_pdf true pdf
2013-08-20 16:32:57 +02:00
| _ -> error " ChangeId: exactly one input file and output file required. "
end
| Some RemoveId ->
2014-11-18 15:57:18 +01:00
if args . recrypt then
soft_error " Cannot recrypt with remove id: an id is part of encryption information " ;
2013-08-20 16:32:57 +02:00
let pdf = get_single_pdf args . op false in
pdf . Pdf . trailerdict <- Pdf . remove_dict_entry pdf . Pdf . trailerdict " /ID " ;
write_pdf false pdf
| Some ( CopyId getfrom ) ->
2014-11-18 15:57:18 +01:00
if args . recrypt then
2014-11-18 16:16:11 +01:00
soft_error " Cannot recrypt with copy id: an id is part of encryption information " ;
2013-08-20 16:32:57 +02:00
begin match args . inputs with
2014-12-11 19:17:02 +01:00
| [ ( k , _ , u , o , _ , _ ) as input ] ->
2013-08-20 16:32:57 +02:00
let pdf =
2021-12-19 13:55:06 +01:00
Cpdfmetadata . copy_id
2013-08-20 16:32:57 +02:00
args . keepversion
( pdfread_pdf_of_file ( optstring u ) ( optstring o ) getfrom )
( get_pdf_from_input_kind input args . op k )
in
write_pdf false pdf
| _ -> error " copy-id: No input file specified "
end
| Some ( ThinLines w ) ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-22 10:31:39 +01:00
write_pdf false ( Cpdftweak . thinlines range w pdf )
2013-08-20 16:32:57 +02:00
| Some BlackText ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-22 10:31:39 +01:00
write_pdf false ( Cpdftweak . blacktext args . color range pdf )
2013-08-20 16:32:57 +02:00
| Some BlackLines ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-22 10:31:39 +01:00
write_pdf false ( Cpdftweak . blacklines args . color range pdf )
2013-08-20 16:32:57 +02:00
| Some BlackFills ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-22 10:31:39 +01:00
write_pdf false ( Cpdftweak . blackfills args . color range pdf )
2013-08-20 16:32:57 +02:00
| Some RemoveAnnotations ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-21 14:44:46 +01:00
write_pdf false ( Cpdfannot . remove_annotations range pdf )
2013-08-20 16:32:57 +02:00
| Some ( CopyAnnotations getfrom ) ->
begin match args . inputs with
2014-12-11 19:17:02 +01:00
| [ ( k , _ , u , o , _ , _ ) as input ] ->
2023-04-21 16:39:09 +02:00
let input_pdf = get_pdf_from_input_kind input args . op k in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty input_pdf ( get_pagespec () ) in
2023-04-21 16:39:09 +02:00
Cpdfannot . copy_annotations
range
( pdfread_pdf_of_file ( optstring u ) ( optstring o ) getfrom )
input_pdf ;
write_pdf false input_pdf
2013-08-20 16:32:57 +02:00
| _ -> error " copy-annotations: No input file specified "
end
2023-01-13 07:30:46 +01:00
| Some ( SetAnnotations json ) ->
let data = Pdfio . input_of_channel ( open_in_bin json ) in
let pdf = get_single_pdf args . op false in
Cpdfannot . set_annotations_json pdf data ;
write_pdf false pdf
2013-08-20 16:32:57 +02:00
| Some ListAnnotations ->
2023-01-11 07:55:50 +01:00
let pdf = get_single_pdf args . op true in
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2023-04-22 17:58:05 +02:00
if args . format_json then
flprint ( Pdfio . string_of_bytes ( Cpdfannot . get_annotations_json pdf range ) )
else
Cpdfannot . list_annotations range args . encoding pdf
2013-08-20 16:32:57 +02:00
| Some Shift ->
let pdf = get_single_pdf args . op false in
2024-02-07 17:55:10 +01:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
let dxdylist = Cpdfcoord . parse_coordinates pdf args . coord in
write_pdf false ( Cpdfpage . shift_pdf ~ fast : args . fast dxdylist pdf range )
2024-01-22 17:36:37 +01:00
| Some ShiftBoxes ->
let pdf = get_single_pdf args . op false in
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
let dxdylist = Cpdfcoord . parse_coordinates pdf args . coord in
2024-02-07 17:55:10 +01:00
write_pdf false ( Cpdfpage . shift_boxes dxdylist pdf range )
2013-08-20 16:32:57 +02:00
| Some Scale ->
let pdf = get_single_pdf args . op false in
2024-02-07 17:55:10 +01:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
let sxsylist = Cpdfcoord . parse_coordinates pdf args . coord in
write_pdf false ( Cpdfpage . scale_pdf ~ fast : args . fast sxsylist pdf range )
2013-08-20 16:32:57 +02:00
| Some ScaleToFit ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2023-06-27 17:24:41 +02:00
warn_prerotate range pdf ;
let pdf = if args . prerotate then prerotate range pdf else pdf in
2019-09-26 12:44:54 +02:00
let xylist = Cpdfcoord . parse_coordinates pdf args . coord
2013-08-20 16:32:57 +02:00
and scale = args . scale in
2021-12-22 09:58:56 +01:00
write_pdf false ( Cpdfpage . scale_to_fit_pdf ~ fast : args . fast args . position scale xylist args . op pdf range )
2024-09-21 18:35:29 +02:00
| Some Stretch ->
let pdf = get_single_pdf args . op false in
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
warn_prerotate range pdf ;
let pdf = if args . prerotate then prerotate range pdf else pdf in
let xylist = Cpdfcoord . parse_coordinates pdf args . coord in
write_pdf false ( Cpdfpage . stretch ~ fast : args . fast xylist pdf range )
2013-08-20 16:32:57 +02:00
| Some ( ScaleContents scale ) ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-22 09:58:56 +01:00
write_pdf false ( Cpdfpage . scale_contents ~ fast : args . fast args . position scale pdf range )
2013-08-20 16:32:57 +02:00
| Some ListAttachedFiles ->
let pdf = get_single_pdf args . op false in
2021-10-02 13:22:59 +02:00
let attachments = Cpdfattach . list_attached_files pdf in
2015-09-26 21:30:57 +02:00
iter
2021-10-02 13:22:59 +02:00
( fun a -> Printf . printf " %i %s \n " a . Cpdfattach . pagenumber a . Cpdfattach . name )
2015-09-26 21:30:57 +02:00
attachments ;
flprint " "
2013-08-20 16:32:57 +02:00
| Some DumpAttachedFiles ->
let pdf = get_single_pdf args . op false in
begin match args . out with
2021-12-19 14:38:27 +01:00
| NoOutputSpecified -> Cpdfattach . dump_attached_files pdf " "
| File n -> Cpdfattach . dump_attached_files pdf n
2013-08-20 16:32:57 +02:00
| Stdout -> error " Can't dump attachments to stdout "
end
| Some RemoveAttachedFiles ->
2021-10-02 13:22:59 +02:00
write_pdf false ( Cpdfattach . remove_attached_files ( get_single_pdf args . op false ) )
2013-08-20 16:32:57 +02:00
| Some ( AttachFile files ) ->
begin match args . inputs with
2014-12-11 19:17:02 +01:00
| [ ( k , _ , _ , _ , _ , _ ) as input ] ->
2013-08-20 16:32:57 +02:00
let pdf = get_pdf_from_input_kind input args . op k in
let topage =
try
match args . topage with
| None -> None
| Some " end " -> Some ( Pdfpage . endpage pdf )
| Some s -> Some ( int_of_string s )
with _ -> error " Bad -to-page "
in
2021-10-02 13:22:59 +02:00
let pdf = fold_left ( Cpdfattach . attach_file args . keepversion topage ) pdf ( rev files ) in
2013-08-20 16:32:57 +02:00
write_pdf false pdf
| _ -> error " attach file: No input file specified "
end
| Some PadBefore ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2017-12-18 20:44:02 +01:00
let padwith =
match args . padwith with
None -> None
| Some filename -> Some ( pdfread_pdf_of_file None None filename )
in
2021-12-18 16:47:06 +01:00
write_pdf false ( Cpdfpad . padbefore ? padwith range pdf )
2013-08-20 16:32:57 +02:00
| Some PadAfter ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2017-12-18 20:44:02 +01:00
let padwith =
match args . padwith with
None -> None
| Some filename -> Some ( pdfread_pdf_of_file None None filename )
in
2021-12-18 16:47:06 +01:00
write_pdf false ( Cpdfpad . padafter ? padwith range pdf )
2013-08-20 16:32:57 +02:00
| Some ( PadEvery n ) ->
let pdf = get_single_pdf args . op false in
let range =
match keep ( function m -> m mod n = 0 ) ( ilist 1 ( Pdfpage . endpage pdf ) ) with
| [] -> []
| l -> if last l = Pdfpage . endpage pdf then all_but_last l else l
in
2017-12-21 15:54:01 +01:00
let padwith =
match args . padwith with
None -> None
| Some filename -> Some ( pdfread_pdf_of_file None None filename )
in
2021-12-18 16:47:06 +01:00
write_pdf false ( Cpdfpad . padafter ? padwith range pdf )
2013-08-20 16:32:57 +02:00
| Some ( PadMultiple n ) ->
let pdf = get_single_pdf args . op false in
2021-12-18 16:47:06 +01:00
write_pdf false ( Cpdfpad . padmultiple n pdf )
2019-07-01 16:35:17 +02:00
| Some ( PadMultipleBefore n ) ->
let pdf = get_single_pdf args . op false in
2021-12-18 16:47:06 +01:00
write_pdf false ( Cpdfpad . padmultiple ( - n ) pdf )
2013-08-20 16:32:57 +02:00
| Some Draft ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-21 16:25:59 +01:00
write_pdf false ( Cpdfdraft . draft args . removeonly args . boxes range pdf )
2013-08-20 16:32:57 +02:00
| Some ( AddText text ) ->
2022-10-19 17:32:56 +02:00
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2022-10-19 17:32:56 +02:00
let cpdffont = embed_font () in
2022-01-03 16:13:03 +01:00
warn_prerotate range pdf ;
2013-08-20 16:32:57 +02:00
let pdf =
2022-01-02 16:18:55 +01:00
if args . prerotate then prerotate range pdf else pdf
2013-08-20 16:32:57 +02:00
and filename =
match args . inputs with
2014-12-11 19:17:02 +01:00
| ( InFile inname , _ , _ , _ , _ , _ ) :: _ -> inname
2013-08-20 16:32:57 +02:00
| _ -> " "
in
2022-10-19 14:48:13 +02:00
write_pdf false
( Cpdfaddtext . addtexts
2022-10-19 17:32:56 +02:00
args . linewidth args . outline args . fast args . fontname
cpdffont args . bates args . batespad args . color args . position
2022-10-19 14:48:13 +02:00
args . linespacing args . fontsize args . underneath text range
args . relative_to_cropbox args . opacity
args . justification args . midline args . topline filename
2022-10-19 17:32:56 +02:00
args . extract_text_font_size args . coord ~ raw : ( args . encoding = Raw ) pdf )
2013-08-20 16:32:57 +02:00
| Some RemoveText ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2022-09-27 20:58:27 +02:00
write_pdf false ( Cpdfremovetext . removetext range pdf )
2013-08-20 16:32:57 +02:00
| Some AddRectangle ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2013-08-20 16:32:57 +02:00
write_pdf false
2021-12-21 15:00:58 +01:00
( Cpdfaddtext . addrectangle
2019-09-26 12:44:54 +02:00
args . fast ( Cpdfcoord . parse_coordinate pdf args . coord )
2016-11-08 19:15:04 +01:00
args . color args . outline args . linewidth args . opacity args . position
args . relative_to_cropbox args . underneath range pdf )
2013-08-20 16:32:57 +02:00
| Some ( AddBookmarks file ) ->
write_pdf false
2021-12-21 14:44:46 +01:00
( Cpdfbookmarks . add_bookmarks ~ json : args . format_json true ( Pdfio . input_of_channel ( open_in_bin file ) )
2013-08-20 16:32:57 +02:00
( get_single_pdf args . op false ) )
| Some RemoveBookmarks ->
write_pdf false ( Pdfmarks . remove_bookmarks ( get_single_pdf args . op false ) )
| Some TwoUp ->
2021-12-22 09:58:56 +01:00
write_pdf false ( Cpdfimpose . twoup args . fast ( get_single_pdf args . op false ) )
2013-08-20 16:32:57 +02:00
| Some TwoUpStack ->
2021-12-22 09:58:56 +01:00
write_pdf false ( Cpdfimpose . twoup_stack args . fast ( get_single_pdf args . op false ) )
2021-10-18 19:33:52 +02:00
| Some Impose fit ->
2021-10-19 17:18:15 +02:00
let pdf = get_single_pdf args . op false in
let x , y = Cpdfcoord . parse_coordinate pdf args . coord in
2021-10-19 20:26:02 +02:00
if not fit && ( x < 0 . 0 | | y < 0 . 0 ) then error " Negative imposition parameters not allowed. " else
2021-10-19 17:18:15 +02:00
write_pdf false
2021-12-22 09:58:56 +01:00
( Cpdfimpose . impose ~ x ~ y ~ fit ~ columns : args . impose_columns ~ rtl : args . impose_rtl ~ btt : args . impose_btt ~ center : args . impose_center
2021-10-27 21:15:05 +02:00
~ margin : args . impose_margin ~ spacing : args . impose_spacing ~ linewidth : args . impose_linewidth ~ fast : args . fast pdf )
2013-08-20 16:32:57 +02:00
| Some ( StampOn over ) ->
let overpdf =
match over with
| " stamp_use_stdin " -> pdf_of_stdin " " " "
| x -> pdfread_pdf_of_file None None x
in
2014-10-14 20:45:10 +02:00
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2014-10-14 20:45:10 +02:00
let pdf =
2021-12-22 10:31:39 +01:00
Cpdfpage . stamp
2016-01-08 13:28:07 +01:00
args . relative_to_cropbox args . position args . topline args . midline args . fast
2015-02-17 16:48:00 +01:00
args . scale_stamp_to_fit true range overpdf pdf
2014-10-14 20:45:10 +02:00
in
write_pdf false pdf
2013-08-20 16:32:57 +02:00
| Some ( StampUnder under ) ->
let underpdf =
match under with
| " stamp_use_stdin " -> pdf_of_stdin " " " "
| x -> pdfread_pdf_of_file None None x
in
2014-10-14 20:45:10 +02:00
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2014-10-14 20:45:10 +02:00
let pdf =
2021-12-22 10:31:39 +01:00
Cpdfpage . stamp
2016-01-08 13:28:07 +01:00
args . relative_to_cropbox args . position args . topline args . midline args . fast
2015-02-17 16:48:00 +01:00
args . scale_stamp_to_fit false range underpdf pdf
2014-10-14 20:45:10 +02:00
in
write_pdf false pdf
2013-08-20 16:32:57 +02:00
| Some ( CombinePages over ) ->
2022-01-03 16:13:03 +01:00
let underpdf = get_single_pdf args . op false in
let overpdf = pdfread_pdf_of_file None None over in
warn_prerotate ( parse_pagespec underpdf " all " ) underpdf ;
warn_prerotate ( parse_pagespec overpdf " all " ) overpdf ;
write_pdf false
( Cpdfpage . combine_pages
args . fast
( prerotate ( parse_pagespec underpdf " all " ) underpdf )
( prerotate ( parse_pagespec overpdf " all " ) overpdf )
false false true )
2013-08-20 16:32:57 +02:00
| Some Encrypt ->
let pdf = get_single_pdf args . op false in
2021-12-18 17:26:33 +01:00
let pdf = Cpdfsqueeze . recompress_pdf pdf
2023-10-30 18:55:19 +01:00
and encryption = build_enc () in
2013-08-20 16:32:57 +02:00
Pdf . remove_unreferenced pdf ;
if not args . keepversion then
begin
let newversion =
match args . crypt_method with
" 40bit " -> 1 | " 128bit " -> 4 | " AES " -> 6 | " AES256 " | " AES256ISO " -> 7 | _ -> 0
in
let newversion = if args . create_objstm then 5 else newversion in
2023-03-22 16:45:00 +01:00
if pdf . Pdf . major = 1 then pdf . Pdf . minor <- max pdf . Pdf . minor newversion
2013-08-20 16:32:57 +02:00
end ;
2023-10-30 18:55:19 +01:00
write_pdf ~ encryption false pdf
2013-08-20 16:32:57 +02:00
| Some Decrypt ->
2014-10-16 16:28:46 +02:00
args . recrypt <- false ;
2013-08-20 16:32:57 +02:00
write_pdf false ( get_single_pdf args . op false )
| Some RemoveMetadata ->
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . remove_metadata ( get_single_pdf args . op false ) )
2013-08-20 16:32:57 +02:00
| Some ExtractImages ->
let output_spec =
begin match args . out with
| File output_spec -> output_spec
| _ -> " "
end
in
let pdf = get_single_pdf args . op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2023-11-10 14:46:52 +01:00
Cpdfimage . extract_images ~ raw : ( args . encoding = Cpdfmetadata . Raw ) ? path_to_p2p : ( match args . path_to_p2p with " " -> None | x -> Some x ) ? path_to_im : ( match args . path_to_im with " " -> None | x -> Some x ) args . encoding args . dedup args . dedup_per_page pdf range output_spec
2013-08-20 16:32:57 +02:00
| Some ( ImageResolution f ) ->
let pdf = get_single_pdf args . op true in
2024-02-07 15:42:38 +01:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
if args . format_json then
flprint ( Pdfio . string_of_bytes ( Cpdfimage . image_resolution_json pdf range f ) )
else
let images = Cpdfimage . image_resolution pdf range f in
2023-11-09 19:36:41 +01:00
iter
2024-01-04 18:40:15 +01:00
( function ( pagenum , xobject , w , h , wdpi , hdpi , objnum ) ->
2024-10-10 17:38:34 +02:00
Printf . printf " %i, %s, %i, %i, %f, %f, %i \n " pagenum xobject w h wdpi hdpi objnum )
2023-11-09 19:36:41 +01:00
images
2023-11-09 12:25:19 +01:00
| Some ListImages ->
2023-11-13 18:55:59 +01:00
let pdf = get_single_pdf args . op true in
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
let json = Cpdfimage . images pdf range in
if args . format_json then
flprint ( Cpdfyojson . Safe . pretty_to_string json )
else
2023-11-14 18:47:44 +01:00
begin match json with
| ` List l ->
iter
2024-03-22 16:33:08 +01:00
( function ( ` Assoc [ ( _ , ` Int i ) ; ( _ , ` List pages ) ; ( _ , ` String name ) ; ( _ , ` Int w ) ; ( _ , ` Int h ) ; ( _ , ` Int size ) ; ( _ , bpc ) ; ( _ , cs ) ; ( _ , ` String filter ) ] ) ->
2023-11-14 18:47:44 +01:00
let pages = combine_with_spaces ( map ( function ` Int i -> string_of_int i | _ -> " " ) pages ) in
2024-03-22 16:33:08 +01:00
let bpc = match bpc with ` Int bpc -> string_of_int bpc | _ -> " none " in
let cs = match cs with ` String cs -> cs | _ -> " none " in
flprint ( Printf . sprintf " %i, %s, %s, %i, %i, %i, %s, %s, %s \n " i pages name w h size bpc cs filter )
2023-11-14 18:47:44 +01:00
| _ -> () )
l
| _ -> ()
end
2013-08-20 16:32:57 +02:00
| Some MissingFonts ->
let pdf = get_single_pdf args . op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-10-18 17:22:38 +02:00
Cpdffont . missing_fonts pdf range
2013-08-20 16:32:57 +02:00
| Some ExtractText ->
let pdf = get_single_pdf args . op true in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2022-09-27 20:58:27 +02:00
let text = Cpdfextracttext . extract_text args . extract_text_font_size pdf range in
2013-08-20 16:32:57 +02:00
begin match args . out with
| File filename ->
let fh = open_out_bin filename in
output_string fh text ;
close_out fh
| NoOutputSpecified | Stdout ->
print_string text ;
print_newline ()
end
2013-10-24 16:21:54 +02:00
| Some AddPageLabels ->
let pdf = get_single_pdf args . op false in
let range = parse_pagespec pdf ( get_pagespec () ) in
2021-12-20 10:02:39 +01:00
Cpdfpagelabels . add_page_labels
2020-01-25 08:22:45 +01:00
pdf args . labelsprogress args . labelstyle args . labelprefix args . labelstartval range ;
2014-09-18 16:40:22 +02:00
write_pdf false pdf
2013-10-24 12:21:52 +02:00
| Some RemovePageLabels ->
let pdf = get_single_pdf args . op false in
Pdfpagelabels . remove pdf ;
write_pdf false pdf
| Some PrintPageLabels ->
let pdf = get_single_pdf args . op true in
2023-10-31 18:28:07 +01:00
if args . format_json then
let json_of_pagelabel l =
` Assoc
[ ( " labelstyle " , ` String ( Pdfpagelabels . string_of_labelstyle l . Pdfpagelabels . labelstyle ) ) ;
( " labelprefix " , begin match l . Pdfpagelabels . labelprefix with None -> ` Null | Some s -> ` String s end ) ;
( " startpage " , ` Int l . Pdfpagelabels . startpage ) ;
( " startvalue " , ` Int l . Pdfpagelabels . startvalue ) ]
in
flprint ( Cpdfyojson . Safe . pretty_to_string ( ` List ( map json_of_pagelabel ( Pdfpagelabels . read pdf ) ) ) )
else
iter
print_string
( map Pdfpagelabels . string_of_pagelabel ( Pdfpagelabels . read pdf ) )
2015-01-07 19:29:11 +01:00
| Some ( RemoveDictEntry key ) ->
let pdf = get_single_pdf args . op true in
2021-12-22 10:31:39 +01:00
Cpdftweak . remove_dict_entry pdf key args . dict_entry_search ;
2015-01-07 19:29:11 +01:00
write_pdf false pdf
2021-10-28 18:06:46 +02:00
| Some ( ReplaceDictEntry key ) ->
let pdf = get_single_pdf args . op true in
2021-12-22 10:31:39 +01:00
Cpdftweak . replace_dict_entry pdf key args . replace_dict_entry_value args . dict_entry_search ;
2021-10-28 18:06:46 +02:00
write_pdf false pdf
2021-10-29 16:09:21 +02:00
| Some ( PrintDictEntry key ) ->
let pdf = get_single_pdf args . op true in
2023-01-17 06:37:54 +01:00
Cpdftweak . print_dict_entry ~ utf8 : ( args . encoding = Cpdfmetadata . UTF8 ) pdf key
2016-11-03 18:11:08 +01:00
| Some ListSpotColours ->
2016-11-04 13:43:58 +01:00
let pdf = get_single_pdf args . op false in
2021-12-19 14:26:15 +01:00
Cpdfspot . list_spot_colours pdf
2016-11-09 16:42:47 +01:00
| Some RemoveClipping ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-22 10:31:39 +01:00
write_pdf false ( Cpdftweak . remove_clipping pdf range )
2019-07-01 15:40:22 +02:00
| Some CreateMetadata ->
let pdf = get_single_pdf args . op false in
2021-12-19 13:55:06 +01:00
write_pdf false ( Cpdfmetadata . create_metadata pdf )
2019-07-02 19:20:05 +02:00
| Some EmbedMissingFonts ->
let fi =
match args . inputs with
[ ( InFile fi , _ , _ , _ , _ , _ ) ] -> fi
| _ -> error " Input method not supported for -embed-missing-fonts "
in
let fo =
match args . out with
File fo -> fo
| _ -> error " Output method not supported for -embed-missing-fonts "
in
2021-10-18 17:22:38 +02:00
Cpdffont . embed_missing_fonts args . path_to_ghostscript args . gs_quiet fi fo
2019-07-06 18:55:26 +02:00
| Some ( BookmarksOpenToLevel n ) ->
let pdf = get_single_pdf args . op false in
2021-12-21 16:06:40 +01:00
write_pdf false ( Cpdfbookmarks . bookmarks_open_to_level n pdf )
2019-07-07 18:07:52 +02:00
| Some CreatePDF ->
2024-09-30 14:10:22 +02:00
begin match args . subformat with
| Some Cpdfua . PDFUA1 ->
begin match args . title with None -> error " Provide -title " | _ -> () end ;
2024-09-30 16:13:56 +02:00
let pdf = Cpdfua . create_pdfua1 ( unopt args . title ) args . createpdf_pagesize args . createpdf_pages in
2024-09-30 14:10:22 +02:00
write_pdf false pdf
| Some Cpdfua . PDFUA2 ->
begin match args . title with None -> error " Provide -title " | _ -> () end ;
2024-09-30 16:13:56 +02:00
let pdf = Cpdfua . create_pdfua2 ( unopt args . title ) args . createpdf_pagesize args . createpdf_pages in
2024-09-30 14:10:22 +02:00
write_pdf false pdf
| None ->
let pdf = Cpdfcreate . blank_document_paper args . createpdf_pagesize args . createpdf_pages in
write_pdf false pdf
end
2019-07-11 18:19:40 +02:00
| Some RemoveAllText ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2022-09-27 20:58:27 +02:00
write_pdf false ( Cpdfremovetext . remove_all_text range pdf )
2019-07-15 12:52:14 +02:00
| Some ShowBoxes ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-22 10:31:39 +01:00
write_pdf false ( Cpdfpage . show_boxes pdf range )
2019-07-15 14:42:32 +02:00
| Some TrimMarks ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2021-12-22 10:31:39 +01:00
write_pdf false ( Cpdfpage . trim_marks pdf range )
2019-10-01 16:02:12 +02:00
| Some ( Postpend s | Prepend s as x ) ->
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2019-10-01 16:02:12 +02:00
let before = match x with Prepend _ -> true | _ -> false in
2021-12-22 10:31:39 +01:00
write_pdf false ( Cpdftweak . append_page_content s before args . fast range pdf )
2020-01-30 11:20:33 +01:00
| Some OutputJSON ->
let pdf = get_single_pdf args . op false in
write_json args . out pdf
2020-02-26 17:24:27 +01:00
| Some OCGCoalesce ->
let pdf = get_single_pdf args . op false in
2021-12-18 17:14:31 +01:00
Cpdfocg . ocg_coalesce pdf ;
2020-02-27 15:14:51 +01:00
write_pdf false pdf
2020-02-27 14:32:45 +01:00
| Some OCGList ->
let pdf = get_single_pdf args . op true in
2021-12-18 17:14:31 +01:00
Cpdfocg . ocg_list pdf
2020-02-27 14:32:45 +01:00
| Some OCGRename ->
let pdf = get_single_pdf args . op false in
2021-12-18 17:14:31 +01:00
Cpdfocg . ocg_rename args . ocgrenamefrom args . ocgrenameto pdf ;
2020-02-27 15:14:51 +01:00
write_pdf false pdf
2020-03-02 13:37:39 +01:00
| Some OCGOrderAll ->
let pdf = get_single_pdf args . op false in
2021-12-18 17:14:31 +01:00
Cpdfocg . ocg_order_all pdf ;
2020-03-02 13:37:39 +01:00
write_pdf false pdf
2020-03-18 15:01:27 +01:00
| Some ( StampAsXObject stamp ) ->
let stamp_pdf =
match stamp with
| " stamp_use_stdin " -> pdf_of_stdin " " " "
| x -> pdfread_pdf_of_file None None x
in
let pdf = get_single_pdf args . op false in
2021-05-25 14:49:51 +02:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2020-03-18 15:01:27 +01:00
let pdf , xobj_name =
2021-12-21 16:33:56 +01:00
Cpdfxobject . stamp_as_xobject pdf range stamp_pdf
2020-03-18 15:01:27 +01:00
in
Printf . printf " %s \n " xobj_name ;
flush stdout ;
write_pdf false pdf
2021-11-01 16:40:33 +01:00
| Some ( PrintFontEncoding fontname ) ->
let pdf = get_single_pdf args . op true in
2021-11-12 23:09:49 +01:00
Cpdffont . print_font_table pdf fontname args . copyfontpage
2021-11-19 01:32:35 +01:00
| Some TableOfContents ->
2022-10-19 16:34:19 +02:00
let pdf = get_single_pdf args . op false in
2022-10-19 14:48:13 +02:00
let cpdffont = embed_font () in
let pdf =
Cpdftoc . typeset_table_of_contents
2022-10-19 16:34:19 +02:00
~ font : cpdffont ~ fontsize : args . fontsize ~ title : args . toc_title ~ bookmark : args . toc_bookmark pdf
2022-10-19 14:48:13 +02:00
in
2022-10-19 16:34:19 +02:00
write_pdf false pdf
2021-11-19 01:32:35 +01:00
| Some ( Typeset filename ) ->
2022-11-02 22:13:27 +01:00
let text = Pdfio . bytes_of_input_channel ( open_in_bin filename ) in
2022-10-19 14:48:13 +02:00
let cpdffont = embed_font () in
2024-10-02 14:27:57 +02:00
let pdf = Cpdftexttopdf . typeset ~ process_struct_tree : args . process_struct_trees
? subformat : args . subformat ? title : args . title ~ font : cpdffont ~ papersize : args . createpdf_pagesize ~ fontsize : args . fontsize text in
2021-11-20 00:21:37 +01:00
write_pdf false pdf
2023-05-02 16:04:35 +02:00
| Some ( TextWidth s ) ->
2023-05-02 16:25:16 +02:00
let rawwidth =
match args . font with
| StandardFont f ->
Pdfstandard14 . textwidth false WinAnsiEncoding f s
| _ ->
error " -text-width only works for the standard 14 fonts "
in
let w = ( float rawwidth * . args . fontsize ) /. 1000 . in
Printf . printf " %f \n " w
2022-11-28 17:11:07 +01:00
| Some Draw ->
let pdf = get_single_pdf args . op false in
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2023-09-01 20:09:23 +02:00
let ops = match ! Cpdfdrawcontrol . drawops with [ ( " _MAIN " , ops ) ] -> rev ops | _ -> error " not enough -end-xobj or -et " in
2023-05-03 14:53:48 +02:00
write_pdf
false
2024-09-06 15:04:17 +02:00
( Cpdfdraw . draw ~ struct_tree : args . draw_struct_tree ~ fast : args . fast ~ underneath : args . underneath ~ filename : args . original_filename ~ bates : args . bates ~ batespad : args . batespad range pdf ops )
2023-03-03 17:02:16 +01:00
| Some ( Composition json ) ->
let pdf = get_single_pdf args . op false in
2023-03-03 18:07:12 +01:00
let filesize =
match args . inputs with
| ( InFile inname , _ , _ , _ , _ , _ ) :: _ -> filesize inname
| _ -> 0
in
2023-04-13 17:51:11 +02:00
Cpdfcomposition . show_composition filesize json pdf
2023-11-15 18:26:43 +01:00
| Some ( Chop ( x , y ) ) ->
2023-11-20 11:53:51 +01:00
let pdf = get_single_pdf args . op false in
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2023-11-20 15:42:00 +01:00
write_pdf false ( Cpdfchop . chop ~ x ~ y ~ columns : args . impose_columns ~ btt : args . impose_btt ~ rtl : args . impose_rtl pdf range )
2024-02-05 15:38:05 +01:00
| Some ( ChopHV ( is_h , line ) ) ->
2024-02-05 15:01:16 +01:00
let pdf = get_single_pdf args . op false in
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2024-02-05 15:38:05 +01:00
write_pdf false ( Cpdfchop . chop_hv ~ is_h ~ line ~ columns : args . impose_columns pdf range )
2023-12-06 13:20:27 +01:00
| Some ProcessImages ->
let pdf = get_single_pdf args . op false in
2024-01-04 12:33:17 +01:00
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2023-12-27 20:53:02 +01:00
Cpdfimage . process
2024-01-12 16:00:28 +01:00
~ q : args . jpegquality ~ qlossless : args . jpegqualitylossless ~ onebppmethod : args . onebppmethod ~ jbig2_lossy_threshold : args . jbig2_lossy_threshold
2023-12-31 12:59:48 +01:00
~ length_threshold : args . length_threshold ~ percentage_threshold : args . percentage_threshold ~ pixel_threshold : args . pixel_threshold
2024-02-01 14:29:20 +01:00
~ dpi_threshold : args . dpi_threshold ~ factor : args . resample_factor ~ interpolate : args . resample_interpolate
2024-02-22 16:56:35 +01:00
~ path_to_jbig2enc : args . path_to_jbig2enc ~ path_to_convert : args . path_to_im range pdf ;
2023-12-06 13:20:27 +01:00
write_pdf false pdf
2024-06-24 16:29:32 +02:00
| Some ( ExtractStream s ) ->
2024-05-30 16:33:24 +02:00
let pdf = get_single_pdf args . op true in
2024-06-24 17:21:28 +02:00
extract_stream pdf args . extract_stream_decompress s
2024-06-24 16:29:32 +02:00
| Some ( PrintObj s ) ->
2024-05-30 16:33:24 +02:00
let pdf = get_single_pdf args . op true in
2024-06-24 17:21:28 +02:00
print_obj pdf s
2024-09-02 17:30:02 +02:00
| Some ( ReplaceObj ( a , b ) ) ->
let pdf = get_single_pdf args . op false in
2024-09-03 18:45:55 +02:00
let pdfobj = Cpdfjson . object_of_json ( Cpdfyojson . Safe . from_string b ) in
2024-09-30 14:45:10 +02:00
Pdf . replace_obj pdf a pdfobj ;
2024-09-04 15:53:14 +02:00
write_pdf false pdf
2024-05-30 16:33:24 +02:00
| Some ( Verify standard ) ->
begin match standard with
| " PDF/UA-1(matterhorn) " ->
let pdf = get_single_pdf args . op false in
2024-06-24 17:33:43 +02:00
let testname = match args . verify_single with None -> " " | Some x -> x in
2024-05-30 16:33:24 +02:00
if args . format_json
2024-06-24 17:33:43 +02:00
then flprint ( Cpdfyojson . Safe . pretty_to_string ( Cpdfua . test_matterhorn_json pdf testname ) )
else Cpdfua . test_matterhorn_print pdf testname
2024-05-30 16:33:24 +02:00
| _ -> error " Unknown verification type. "
end
2024-05-30 18:49:23 +02:00
| Some ( MarkAs standard ) ->
begin match standard with
2024-09-14 15:43:55 +02:00
| Cpdfua . PDFUA1 ->
2024-05-30 18:49:23 +02:00
let pdf = get_single_pdf args . op false in
Cpdfua . mark pdf ;
write_pdf false pdf
2024-09-14 15:43:55 +02:00
| Cpdfua . PDFUA2 ->
2024-06-26 16:34:45 +02:00
let pdf = get_single_pdf args . op false in
Cpdfua . mark2 2024 pdf ;
write_pdf false pdf
2024-05-30 18:49:23 +02:00
end
2024-06-18 16:28:19 +02:00
| Some ( RemoveMark standard ) ->
begin match standard with
2024-09-14 15:43:55 +02:00
| Cpdfua . PDFUA1 | Cpdfua . PDFUA2 ->
2024-06-18 16:28:19 +02:00
let pdf = get_single_pdf args . op false in
Cpdfua . remove_mark pdf ;
write_pdf false pdf
end
2024-06-27 15:21:47 +02:00
| Some PrintStructTree ->
let pdf = get_single_pdf args . op true in
Cpdfua . print_struct_tree pdf
2024-06-03 16:49:52 +02:00
| Some ExtractStructTree ->
let pdf = get_single_pdf args . op true in
let json = Cpdfua . extract_struct_tree pdf in
json_to_output json args . out
2024-06-04 16:29:29 +02:00
| Some ( ReplaceStructTree s ) ->
let pdf = get_single_pdf args . op false in
let json = Cpdfyojson . Safe . from_file s in
Cpdfua . replace_struct_tree pdf json ;
write_pdf false pdf
2024-06-12 18:21:20 +02:00
| Some ( SetLanguage s ) ->
let pdf = get_single_pdf args . op false in
Cpdfmetadata . set_language pdf s ;
write_pdf false pdf
2024-09-21 19:44:43 +02:00
| Some Redact ->
let pdf = get_single_pdf args . op false in
let range = parse_pagespec_allow_empty pdf ( get_pagespec () ) in
2024-09-23 16:37:27 +02:00
write_pdf false ( Cpdfpage . redact ~ process_struct_tree : args . process_struct_trees pdf range )
2013-08-20 16:32:57 +02:00
2020-07-26 17:04:59 +02:00
(* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all . * )
let check_command_line () =
if args . gs_malformed && ! Pdfread . error_on_malformed then
error " Setting both -gs-malformed and -error-on-malformed makes no sense "
let parse_argv () s specs anon_fun usage_msg =
2015-06-01 16:08:01 +02:00
if args . debug then
2023-04-25 14:45:56 +02:00
Array . iter ( fun s -> Pdfe . log ( Printf . sprintf " arg: %s \n " s ) ) Sys . argv ;
2020-07-26 17:04:59 +02:00
Arg . parse_argv ~ current : ( ref 0 ) s specs anon_fun usage_msg ;
check_command_line ()
2013-08-20 16:32:57 +02:00
let align_specs s =
Arg . align s
2013-10-09 15:31:55 +02:00
(* The old -control mechanism clashed with AND, but must be retained for
backwards compatibility . There is a new mechanism - args file which performs
direct textual substitution of the file , before any expansion of ANDs * )
let rec expand_args_inner prev = function
[] -> rev prev
| " -args " :: filename :: r ->
expand_args_inner ( rev ( parse_control_file filename ) @ prev ) r
2024-09-02 15:25:15 +02:00
| " -args-json " :: filename :: r ->
expand_args_inner ( rev ( parse_control_file_json filename ) @ prev ) r
2013-10-09 15:31:55 +02:00
| h :: t -> expand_args_inner ( h :: prev ) t
let expand_args argv =
let l = Array . to_list argv in
Array . of_list ( expand_args_inner [] l )
2019-07-08 15:44:34 +02:00
let gs_malformed_force fi fo =
if args . path_to_ghostscript = " " then begin
2023-04-25 14:45:56 +02:00
Pdfe . log " Please supply path to gs with -gs \n " ;
2019-07-08 15:44:34 +02:00
exit 2
end ;
let gscall =
2023-02-21 15:50:07 +01:00
Filename . quote_command args . path_to_ghostscript
( ( if args . gs_quiet then [ " -dQUIET " ] else [] ) @
[ " -dNOPAUSE " ; " -sDEVICE=pdfwrite " ; " -sOUTPUTFILE= " ^ fo ; " -dBATCH " ; fi ] )
2019-07-08 15:44:34 +02:00
in
match Sys . command gscall with
| 0 -> exit 0
2023-04-25 14:45:56 +02:00
| _ -> Pdfe . log " Failed to mend file. \n " ; exit 2
2019-07-08 15:44:34 +02:00
2021-08-10 21:11:20 +02:00
let process_env_vars () =
match Sys . getenv_opt " CPDF_DEBUG " with
| Some " true " -> args . debug <- true
| Some " false " -> args . debug <- false
| _ -> ()
2013-08-20 16:32:57 +02:00
(* Main function. *)
let go_withargv argv =
2019-07-08 15:44:34 +02:00
(* Check for the standalone -gs-malformed-force special command line. This
* has exactly one file input and exactly one output and just - gs < gs >
* - gs - malformed - force between . * )
match argv with
2024-10-13 12:12:56 +02:00
| [| _ |] -> print_version ()
| [| _ ; inputfilename ; " -gs " ; gslocation ; " -gs-malformed-force " ; " -o " ; outputfilename |] ->
2019-07-08 15:44:34 +02:00
args . path_to_ghostscript <- gslocation ;
ignore ( gs_malformed_force inputfilename outputfilename ) ;
exit 0
2019-08-05 13:35:07 +02:00
| [| _ ; inputfilename ; " -gs " ; gslocation ; " -gs-malformed-force " ; " -o " ; outputfilename ; " -gs-quiet " |] ->
args . path_to_ghostscript <- gslocation ;
args . gs_quiet <- true ;
ignore ( gs_malformed_force inputfilename outputfilename ) ;
exit 0
2019-07-08 15:44:34 +02:00
| _ ->
2015-01-11 17:10:18 +01:00
Hashtbl . clear filenames ;
2021-10-15 18:36:11 +02:00
if demo then
2024-07-23 15:47:13 +02:00
flprint " This demo functions normally, but is for evaluation only. https://www.coherentpdf.com/ \n " ;
2013-08-20 16:32:57 +02:00
try
2013-10-09 15:31:55 +02:00
(* Pre-expand -args *)
let argv = expand_args argv in
2013-08-20 16:32:57 +02:00
(* Split the arguments into sets either side of ANDs *)
let sets =
let args =
( map ( fun l -> " cpdf " :: l ) ( split_around ( eq " AND " ) ( tl ( Array . to_list argv ) ) ) )
in
match args with
| [] -> []
| _ -> combine ( map Array . of_list args ) ( map ( eq ( length args ) ) ( ilist 1 ( length args ) ) )
in
iter
( fun ( s , islast ) ->
(* Printf.printf "AND:%b, %s\n" islast ( Array.fold_left ( fun x y -> x ^ " " ^ y ) "" s ) ;
flprint " \n " ; * )
reset_arguments () ;
2023-09-01 20:09:23 +02:00
Cpdfdrawcontrol . drawops := [ ( " _MAIN " , [] ) ] ;
2021-08-10 21:11:20 +02:00
process_env_vars () ;
2013-08-20 16:32:57 +02:00
parse_argv () s ( align_specs specs ) anon_fun usage_msg ;
2023-12-05 13:20:03 +01:00
let addrange pdf = AlreadyInMemory ( pdf , " fromAND " ) , args . dashrange , " " , " " , ref false , None in
2013-08-20 16:32:57 +02:00
args . inputs <- rev ( map addrange ! output_pdfs ) @ rev args . inputs ;
output_pdfs := [] ;
2023-05-03 15:19:55 +02:00
go () )
2013-08-20 16:32:57 +02:00
sets ;
2019-07-02 15:10:42 +02:00
flush stdout ; (* r for Windows *)
exit 0
2013-08-20 16:32:57 +02:00
with
| Arg . Bad s ->
2023-04-25 14:45:56 +02:00
Pdfe . log
2013-10-09 15:31:55 +02:00
( implode ( takewhile ( neq '\n' ) ( explode s ) ) ^ " Use -help for help. \n \n " ) ;
2015-01-12 18:55:45 +01:00
if not ! stay_on_error then exit 2 else raise StayOnError
2013-08-20 16:32:57 +02:00
| Arg . Help _ ->
Arg . usage ( align_specs specs ) usage_msg ;
flush stderr (* r for Windows *)
| Sys_error s as e ->
2023-04-25 14:45:56 +02:00
Pdfe . log ( s ^ " \n \n " ) ;
2015-01-11 19:39:35 +01:00
if not ! stay_on_error then
2015-01-12 18:55:45 +01:00
( if args . debug then raise e else exit 2 )
else raise StayOnError
2013-08-20 16:32:57 +02:00
| Pdf . PDFError s as e ->
2023-04-25 14:45:56 +02:00
Pdfe . log
2013-08-20 16:32:57 +02:00
( " cpdf encountered an error. Technical details follow: \n \n " ^ s ^ " \n \n " ) ;
2015-01-11 19:39:35 +01:00
if not ! stay_on_error then
if args . debug then raise e else exit 2
2015-01-12 18:55:45 +01:00
else
raise StayOnError
2021-10-02 13:22:59 +02:00
| Cpdferror . SoftError s -> soft_error s
| Cpdferror . HardError s -> error s
2013-08-20 16:32:57 +02:00
| e ->
2023-04-25 14:45:56 +02:00
Pdfe . log
2013-08-20 16:32:57 +02:00
( " cpdf encountered an unexpected error. Technical Details follow: \n " ^
Printexc . to_string e ^ " \n \n " ) ;
2015-01-12 18:55:45 +01:00
if not ! stay_on_error then
( if args . debug then raise e else exit 2 ) else raise StayOnError
2013-08-20 16:32:57 +02:00
2015-01-11 17:10:18 +01:00
let go () =
go_withargv Sys . argv