From f402f4f720f0c183e344bddf354fa4cc5f4dab78 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Tue, 20 Aug 2013 15:32:57 +0100 Subject: [PATCH] Initial commit --- Makefile | 16 + OCamlMakefile | 1299 +++++++++++++++++ cpdf.ml | 3063 ++++++++++++++++++++++++++++++++++++++++ cpdf.mli | 305 ++++ cpdfcommand.ml | 3400 +++++++++++++++++++++++++++++++++++++++++++++ cpdfcommand.mli | 7 + cpdfcommandrun.ml | 2 + cpdfstrftime.ml | 93 ++ cpdfstrftime.mli | 2 + 9 files changed, 8187 insertions(+) create mode 100644 Makefile create mode 100644 OCamlMakefile create mode 100644 cpdf.ml create mode 100644 cpdf.mli create mode 100644 cpdfcommand.ml create mode 100644 cpdfcommand.mli create mode 100644 cpdfcommandrun.ml create mode 100644 cpdfstrftime.ml create mode 100644 cpdfstrftime.mli diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cfaacd8 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +# Build the cpdf command line tools and top level +SOURCES = cpdfstrftime.mli cpdfstrftime.ml cpdf.mli cpdf.ml cpdfcommand.mli \ +cpdfcommand.ml cpdfcommandrun.ml + +RESULT = cpdf + +PACKS = camlpdf + +OCAMLNCFLAGS = -g +OCAMLBCFLAGS = -g +OCAMLLDFLAGS = -g + +all : native-code top htdoc + +-include OCamlMakefile + diff --git a/OCamlMakefile b/OCamlMakefile new file mode 100644 index 0000000..43dc3e2 --- /dev/null +++ b/OCamlMakefile @@ -0,0 +1,1299 @@ +########################################################################### +# OCamlMakefile +# Copyright (C) 1999- Markus Mottl +# +# For updates see: +# http://www.ocaml.info/home/ocaml_sources.html +# +########################################################################### + +# Modified by damien for .glade.ml compilation + +# Set these variables to the names of the sources to be processed and +# the result variable. Order matters during linkage! + +ifndef SOURCES + SOURCES := foo.ml +endif +export SOURCES + +ifndef RES_CLIB_SUF + RES_CLIB_SUF := _stubs +endif +export RES_CLIB_SUF + +ifndef RESULT + RESULT := foo +endif +export RESULT := $(strip $(RESULT)) + +export LIB_PACK_NAME + +ifndef DOC_FILES + DOC_FILES := $(filter %.mli, $(SOURCES)) +endif +export DOC_FILES +FIRST_DOC_FILE := $(firstword $(DOC_FILES)) + +export BCSUFFIX +export NCSUFFIX + +ifndef TOPSUFFIX + TOPSUFFIX := .top +endif +export TOPSUFFIX + +# Eventually set include- and library-paths, libraries to link, +# additional compilation-, link- and ocamlyacc-flags +# Path- and library information needs not be written with "-I" and such... +# Define THREADS if you need it, otherwise leave it unset (same for +# USE_CAMLP4)! + +export THREADS +export VMTHREADS +export ANNOTATE +export USE_CAMLP4 + +export INCDIRS +export LIBDIRS +export EXTLIBDIRS +export RESULTDEPS +export OCAML_DEFAULT_DIRS + +export LIBS +export CLIBS +export CFRAMEWORKS + +export OCAMLFLAGS +export OCAMLNCFLAGS +export OCAMLBCFLAGS + +export OCAMLLDFLAGS +export OCAMLNLDFLAGS +export OCAMLBLDFLAGS + +export OCAMLMKLIB_FLAGS + +ifndef OCAMLCPFLAGS + OCAMLCPFLAGS := a +endif +export OCAMLCPFLAGS + +ifndef DOC_DIR + DOC_DIR := doc +endif +export DOC_DIR + +export PPFLAGS + +export LFLAGS +export YFLAGS +export IDLFLAGS + +export OCAMLDOCFLAGS + +export OCAMLFIND_INSTFLAGS + +export DVIPSFLAGS + +export STATIC + +# Add a list of optional trash files that should be deleted by "make clean" +export TRASH + +ECHO := echo + +ifdef REALLY_QUIET + export REALLY_QUIET + ECHO := true + LFLAGS := $(LFLAGS) -q + YFLAGS := $(YFLAGS) -q +endif + +#################### variables depending on your OCaml-installation + +SYSTEM := $(shell ocamlc -config 2>/dev/null | grep system | sed 's/system: //') + # This may be + # - mingw + # - mingw64 + # - win32 + # - cygwin + # - some other string means Unix + # - empty means ocamlc does not support -config + +ifeq ($(SYSTEM),$(filter $(SYSTEM),mingw mingw64)) + MINGW=1 +endif +ifeq ($(SYSTEM),win32) + MSVC=1 +endif + +ifdef MINGW + export MINGW + WIN32 := 1 + # The default value 'cc' makes 'ocamlc -cc "cc"' raises the error 'The + # NTVDM CPU has encountered an illegal instruction'. + ifndef CC + MNO_CYGWIN := $(shell gcc -Wextra -v --help 2>/dev/null | grep -q '\-mno-cygwin'; echo $$?) + CC := gcc + else + MNO_CYGWIN := $(shell $$CC -Wextra -v --help 2>/dev/null | grep -q '\-mno-cygwin'; echo $$?) + endif + # We are compiling with cygwin tools: + ifeq ($(MNO_CYGWIN),0) + CFLAGS_WIN32 := -mno-cygwin + endif + # The OCaml C header files use this flag: + CFLAGS += -D__MINGW32__ +endif +ifdef MSVC + export MSVC + WIN32 := 1 + ifndef STATIC + CPPFLAGS_WIN32 := -DCAML_DLL + endif + CFLAGS_WIN32 += -nologo + EXT_OBJ := obj + EXT_LIB := lib + ifeq ($(CC),gcc) + # work around GNU Make default value + ifdef THREADS + CC := cl -MT + else + CC := cl + endif + endif + ifeq ($(CXX),g++) + # work around GNU Make default value + CXX := $(CC) + endif + CFLAG_O := -Fo +endif +ifdef WIN32 + EXT_CXX := cpp + EXE := .exe +endif + +ifndef EXT_OBJ + EXT_OBJ := o +endif +ifndef EXT_LIB + EXT_LIB := a +endif +ifndef EXT_CXX + EXT_CXX := cc +endif +ifndef EXE + EXE := # empty +endif +ifndef CFLAG_O + CFLAG_O := -o # do not delete this comment (preserves trailing whitespace)! +endif + +export CC +export CXX +export CFLAGS +export CXXFLAGS +export LDFLAGS +export CPPFLAGS + +ifndef RPATH_FLAG + ifdef ELF_RPATH_FLAG + RPATH_FLAG := $(ELF_RPATH_FLAG) + else + RPATH_FLAG := -R + endif +endif +export RPATH_FLAG + +ifndef MSVC +ifndef PIC_CFLAGS + PIC_CFLAGS := -fPIC +endif +ifndef PIC_CPPFLAGS + PIC_CPPFLAGS := -DPIC +endif +endif + +export PIC_CFLAGS +export PIC_CPPFLAGS + +BCRESULT := $(addsuffix $(BCSUFFIX), $(RESULT)) +NCRESULT := $(addsuffix $(NCSUFFIX), $(RESULT)) +TOPRESULT := $(addsuffix $(TOPSUFFIX), $(RESULT)) + +ifndef OCAMLFIND + OCAMLFIND := ocamlfind +endif +export OCAMLFIND + +ifndef OCAML + OCAML := ocaml +endif +export OCAML + +ifndef OCAMLC + OCAMLC := ocamlc +endif +export OCAMLC + +ifndef OCAMLOPT + OCAMLOPT := ocamlopt +endif +export OCAMLOPT + +ifndef OCAMLMKTOP + OCAMLMKTOP := ocamlmktop +endif +export OCAMLMKTOP + +ifndef OCAMLCP + OCAMLCP := ocamlcp +endif +export OCAMLCP + +ifndef OCAMLDEP + OCAMLDEP := ocamldep +endif +export OCAMLDEP + +ifndef OCAMLLEX + OCAMLLEX := ocamllex +endif +export OCAMLLEX + +ifndef OCAMLYACC + OCAMLYACC := ocamlyacc +endif +export OCAMLYACC + +ifndef OCAMLMKLIB + OCAMLMKLIB := ocamlmklib +endif +export OCAMLMKLIB + +ifndef OCAML_GLADECC + OCAML_GLADECC := lablgladecc2 +endif +export OCAML_GLADECC + +ifndef OCAML_GLADECC_FLAGS + OCAML_GLADECC_FLAGS := +endif +export OCAML_GLADECC_FLAGS + +ifndef CAMELEON_REPORT + CAMELEON_REPORT := report +endif +export CAMELEON_REPORT + +ifndef CAMELEON_REPORT_FLAGS + CAMELEON_REPORT_FLAGS := +endif +export CAMELEON_REPORT_FLAGS + +ifndef CAMELEON_ZOGGY + CAMELEON_ZOGGY := camlp4o pa_zog.cma pr_o.cmo +endif +export CAMELEON_ZOGGY + +ifndef CAMELEON_ZOGGY_FLAGS + CAMELEON_ZOGGY_FLAGS := +endif +export CAMELEON_ZOGGY_FLAGS + +ifndef OXRIDL + OXRIDL := oxridl +endif +export OXRIDL + +ifndef CAMLIDL + CAMLIDL := camlidl +endif +export CAMLIDL + +ifndef CAMLIDLDLL + CAMLIDLDLL := camlidldll +endif +export CAMLIDLDLL + +ifndef NOIDLHEADER + MAYBE_IDL_HEADER := -header +endif +export NOIDLHEADER + +export NO_CUSTOM + +ifndef CAMLP4 + CAMLP4 := camlp4 +endif +export CAMLP4 + +ifndef REAL_OCAMLFIND + ifdef PACKS + ifndef CREATE_LIB + ifdef THREADS + PACKS += threads + endif + endif + empty := + space := $(empty) $(empty) + comma := , + ifdef PREDS + PRE_OCAML_FIND_PREDICATES := $(subst $(space),$(comma),$(PREDS)) + PRE_OCAML_FIND_PACKAGES := $(subst $(space),$(comma),$(PACKS)) + OCAML_FIND_PREDICATES := -predicates $(PRE_OCAML_FIND_PREDICATES) + # OCAML_DEP_PREDICATES := -syntax $(PRE_OCAML_FIND_PREDICATES) + OCAML_FIND_PACKAGES := $(OCAML_FIND_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES) + OCAML_DEP_PACKAGES := $(OCAML_DEP_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES) + else + OCAML_FIND_PACKAGES := -package $(subst $(space),$(comma),$(PACKS)) + OCAML_DEP_PACKAGES := + endif + OCAML_FIND_LINKPKG := -linkpkg + REAL_OCAMLFIND := $(OCAMLFIND) + endif +endif + +export OCAML_FIND_PACKAGES +export OCAML_DEP_PACKAGES +export OCAML_FIND_LINKPKG +export REAL_OCAMLFIND + +ifndef OCAMLDOC + OCAMLDOC := ocamldoc +endif +export OCAMLDOC + +ifndef LATEX + LATEX := latex +endif +export LATEX + +ifndef DVIPS + DVIPS := dvips +endif +export DVIPS + +ifndef PS2PDF + PS2PDF := ps2pdf +endif +export PS2PDF + +ifndef OCAMLMAKEFILE + OCAMLMAKEFILE := OCamlMakefile +endif +export OCAMLMAKEFILE + +ifndef OCAMLLIBPATH + OCAMLLIBPATH := \ + $(shell $(OCAMLC) 2>/dev/null -where || echo /usr/local/lib/ocaml) +endif +export OCAMLLIBPATH + +ifndef OCAML_LIB_INSTALL + OCAML_LIB_INSTALL := $(OCAMLLIBPATH)/contrib +endif +export OCAML_LIB_INSTALL + +########################################################################### + +#################### change following sections only if +#################### you know what you are doing! + +# delete target files when a build command fails +.PHONY: .DELETE_ON_ERROR +.DELETE_ON_ERROR: + +# for pedants using "--warn-undefined-variables" +export MAYBE_IDL +export REAL_RESULT +export CAMLIDLFLAGS +export THREAD_FLAG +export RES_CLIB +export MAKEDLL +export ANNOT_FLAG +export C_OXRIDL +export SUBPROJS +export CFLAGS_WIN32 +export CPPFLAGS_WIN32 + +INCFLAGS := + +SHELL := /bin/sh + +MLDEPDIR := ._d +BCDIDIR := ._bcdi +NCDIDIR := ._ncdi + +FILTER_EXTNS := %.mli %.ml %.mll %.mly %.idl %.oxridl %.c %.m %.$(EXT_CXX) %.rep %.zog %.glade + +FILTERED := $(filter $(FILTER_EXTNS), $(SOURCES)) +SOURCE_DIRS := $(filter-out ./, $(sort $(dir $(FILTERED)))) + +FILTERED_REP := $(filter %.rep, $(FILTERED)) +DEP_REP := $(FILTERED_REP:%.rep=$(MLDEPDIR)/%.d) +AUTO_REP := $(FILTERED_REP:.rep=.ml) + +FILTERED_ZOG := $(filter %.zog, $(FILTERED)) +DEP_ZOG := $(FILTERED_ZOG:%.zog=$(MLDEPDIR)/%.d) +AUTO_ZOG := $(FILTERED_ZOG:.zog=.ml) + +FILTERED_GLADE := $(filter %.glade, $(FILTERED)) +DEP_GLADE := $(FILTERED_GLADE:%.glade=$(MLDEPDIR)/%.d) +AUTO_GLADE := $(FILTERED_GLADE:.glade=.ml) + +FILTERED_ML := $(filter %.ml, $(FILTERED)) +DEP_ML := $(FILTERED_ML:%.ml=$(MLDEPDIR)/%.d) + +FILTERED_MLI := $(filter %.mli, $(FILTERED)) +DEP_MLI := $(FILTERED_MLI:.mli=.di) + +FILTERED_MLL := $(filter %.mll, $(FILTERED)) +DEP_MLL := $(FILTERED_MLL:%.mll=$(MLDEPDIR)/%.d) +AUTO_MLL := $(FILTERED_MLL:.mll=.ml) + +FILTERED_MLY := $(filter %.mly, $(FILTERED)) +DEP_MLY := $(FILTERED_MLY:%.mly=$(MLDEPDIR)/%.d) $(FILTERED_MLY:.mly=.di) +AUTO_MLY := $(FILTERED_MLY:.mly=.mli) $(FILTERED_MLY:.mly=.ml) + +FILTERED_IDL := $(filter %.idl, $(FILTERED)) +DEP_IDL := $(FILTERED_IDL:%.idl=$(MLDEPDIR)/%.d) $(FILTERED_IDL:.idl=.di) +C_IDL := $(FILTERED_IDL:%.idl=%_stubs.c) +ifndef NOIDLHEADER + C_IDL += $(FILTERED_IDL:.idl=.h) +endif +OBJ_C_IDL := $(FILTERED_IDL:%.idl=%_stubs.$(EXT_OBJ)) +AUTO_IDL := $(FILTERED_IDL:.idl=.mli) $(FILTERED_IDL:.idl=.ml) $(C_IDL) + +FILTERED_OXRIDL := $(filter %.oxridl, $(FILTERED)) +DEP_OXRIDL := $(FILTERED_OXRIDL:%.oxridl=$(MLDEPDIR)/%.d) $(FILTERED_OXRIDL:.oxridl=.di) +AUTO_OXRIDL := $(FILTERED_OXRIDL:.oxridl=.mli) $(FILTERED_OXRIDL:.oxridl=.ml) $(C_OXRIDL) + +FILTERED_C_CXX := $(filter %.c %.m %.$(EXT_CXX), $(FILTERED)) +OBJ_C_CXX := $(FILTERED_C_CXX:.c=.$(EXT_OBJ)) +OBJ_C_CXX := $(OBJ_C_CXX:.m=.$(EXT_OBJ)) +OBJ_C_CXX := $(OBJ_C_CXX:.$(EXT_CXX)=.$(EXT_OBJ)) + +PRE_TARGETS += $(AUTO_MLL) $(AUTO_MLY) $(AUTO_IDL) $(AUTO_OXRIDL) $(AUTO_ZOG) $(AUTO_REP) $(AUTO_GLADE) + +ALL_DEPS := $(DEP_ML) $(DEP_MLI) $(DEP_MLL) $(DEP_MLY) $(DEP_IDL) $(DEP_OXRIDL) $(DEP_ZOG) $(DEP_REP) $(DEP_GLADE) + +MLDEPS := $(filter %.d, $(ALL_DEPS)) +MLIDEPS := $(filter %.di, $(ALL_DEPS)) +BCDEPIS := $(MLIDEPS:%.di=$(BCDIDIR)/%.di) +NCDEPIS := $(MLIDEPS:%.di=$(NCDIDIR)/%.di) + +ALLML := $(filter %.mli %.ml %.mll %.mly %.idl %.oxridl %.rep %.zog %.glade, $(FILTERED)) + +IMPLO_INTF := $(ALLML:%.mli=%.mli.__) +IMPLO_INTF := $(foreach file, $(IMPLO_INTF), \ + $(basename $(file)).cmi $(basename $(file)).cmo) +IMPLO_INTF := $(filter-out %.mli.cmo, $(IMPLO_INTF)) +IMPLO_INTF := $(IMPLO_INTF:%.mli.cmi=%.cmi) + +IMPLX_INTF := $(IMPLO_INTF:.cmo=.cmx) + +INTF := $(filter %.cmi, $(IMPLO_INTF)) +IMPL_CMO := $(filter %.cmo, $(IMPLO_INTF)) +IMPL_CMX := $(IMPL_CMO:.cmo=.cmx) +IMPL_ASM := $(IMPL_CMO:.cmo=.asm) +IMPL_S := $(IMPL_CMO:.cmo=.s) + +OBJ_LINK := $(OBJ_C_IDL) $(OBJ_C_CXX) +OBJ_FILES := $(IMPL_CMO:.cmo=.$(EXT_OBJ)) $(OBJ_LINK) + +EXECS := $(addsuffix $(EXE), \ + $(sort $(TOPRESULT) $(BCRESULT) $(NCRESULT))) +ifdef WIN32 + EXECS += $(BCRESULT).dll $(NCRESULT).dll +endif + +CLIB_BASE := $(RESULT)$(RES_CLIB_SUF) +ifneq ($(strip $(OBJ_LINK)),) + RES_CLIB := lib$(CLIB_BASE).$(EXT_LIB) +endif + +ifdef WIN32 +DLLSONAME := dll$(CLIB_BASE).dll +else +DLLSONAME := dll$(CLIB_BASE).so +endif + +NONEXECS := $(INTF) $(IMPL_CMO) $(IMPL_CMX) $(IMPL_ASM) $(IMPL_S) \ + $(OBJ_FILES) $(PRE_TARGETS) $(BCRESULT).cma $(NCRESULT).cmxa \ + $(NCRESULT).$(EXT_LIB) $(BCRESULT).cmi $(BCRESULT).cmo \ + $(NCRESULT).cmi $(NCRESULT).cmx $(NCRESULT).$(EXT_OBJ) \ + $(RES_CLIB) $(IMPL_CMO:.cmo=.annot) \ + $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(LIB_PACK_NAME).cmx \ + $(LIB_PACK_NAME).$(EXT_OBJ) + +ifndef STATIC + NONEXECS += $(DLLSONAME) +endif + +ifndef LIBINSTALL_FILES + LIBINSTALL_FILES := $(RESULT).mli $(RESULT).cmi $(RESULT).cma \ + $(RESULT).cmxa $(RESULT).$(EXT_LIB) $(RES_CLIB) + ifndef STATIC + ifneq ($(strip $(OBJ_LINK)),) + LIBINSTALL_FILES += $(DLLSONAME) + endif + endif +endif + +export LIBINSTALL_FILES + +ifdef WIN32 + # some extra stuff is created while linking DLLs + NONEXECS += $(BCRESULT).$(EXT_LIB) $(BCRESULT).exp $(NCRESULT).exp $(CLIB_BASE).exp $(CLIB_BASE).lib +endif + +TARGETS := $(EXECS) $(NONEXECS) + +# If there are IDL-files +ifneq ($(strip $(FILTERED_IDL)),) + MAYBE_IDL := -cclib -lcamlidl +endif + +ifdef USE_CAMLP4 + CAMLP4PATH := \ + $(shell $(CAMLP4) -where 2>/dev/null || echo /usr/local/lib/camlp4) + INCFLAGS := -I $(CAMLP4PATH) + CINCFLAGS := -I$(CAMLP4PATH) +endif + +INCFLAGS := $(INCFLAGS) $(INCDIRS:%=-I %) $(SOURCE_DIRS:%=-I %) $(OCAML_DEFAULT_DIRS:%=-I %) +CINCFLAGS += $(SOURCE_DIRS:%=-I%) $(INCDIRS:%=-I%) $(OCAML_DEFAULT_DIRS:%=-I%) + +ifndef MSVC + CLIBFLAGS += $(SOURCE_DIRS:%=-L%) $(LIBDIRS:%=-L%) \ + $(EXTLIBDIRS:%=-L%) $(OCAML_DEFAULT_DIRS:%=-L%) + + ifeq ($(ELF_RPATH), yes) + CLIBFLAGS += $(EXTLIBDIRS:%=-Wl,$(RPATH_FLAG)%) + endif +endif + +ifndef PROFILING + INTF_OCAMLC := $(OCAMLC) +else + ifndef THREADS + INTF_OCAMLC := $(OCAMLCP) -p $(OCAMLCPFLAGS) + else + # OCaml does not support profiling byte code + # with threads (yet), therefore we force an error. + ifndef REAL_OCAMLC + $(error Profiling of multithreaded byte code not yet supported by OCaml) + endif + INTF_OCAMLC := $(OCAMLC) + endif +endif + +ifndef MSVC + COMMON_LDFLAGS := $(LDFLAGS:%=-ccopt %) $(SOURCE_DIRS:%=-ccopt -L%) \ + $(LIBDIRS:%=-ccopt -L%) $(EXTLIBDIRS:%=-ccopt -L%) \ + $(EXTLIBDIRS:%=-ccopt -Wl $(OCAML_DEFAULT_DIRS:%=-ccopt -L%)) + + ifeq ($(ELF_RPATH),yes) + COMMON_LDFLAGS += $(EXTLIBDIRS:%=-ccopt -Wl,$(RPATH_FLAG)%) + endif +else + COMMON_LDFLAGS := -ccopt "/link -NODEFAULTLIB:LIBC $(LDFLAGS:%=%) $(SOURCE_DIRS:%=-LIBPATH:%) \ + $(LIBDIRS:%=-LIBPATH:%) $(EXTLIBDIRS:%=-LIBPATH:%) \ + $(OCAML_DEFAULT_DIRS:%=-LIBPATH:%) " +endif + +CLIBS_OPTS := $(CLIBS:%=-cclib -l%) $(CFRAMEWORKS:%=-cclib '-framework %') +ifdef MSVC + ifndef STATIC + # MSVC libraries do not have 'lib' prefix + CLIBS_OPTS := $(CLIBS:%=-cclib %.lib) + endif +endif + +ifneq ($(strip $(OBJ_LINK)),) + ifdef CREATE_LIB + OBJS_LIBS := -cclib -l$(CLIB_BASE) $(CLIBS_OPTS) $(MAYBE_IDL) + else + OBJS_LIBS := $(OBJ_LINK) $(CLIBS_OPTS) $(MAYBE_IDL) + endif +else + OBJS_LIBS := $(CLIBS_OPTS) $(MAYBE_IDL) +endif + +ifdef LIB_PACK_NAME + FOR_PACK_NAME := $(shell echo $(LIB_PACK_NAME) | awk '{print toupper(substr($$0,1,1))substr($$0,2)}') +endif + +# If we have to make byte-code +ifndef REAL_OCAMLC + BYTE_OCAML := y + + # EXTRADEPS is added dependencies we have to insert for all + # executable files we generate. Ideally it should be all of the + # libraries we use, but it's hard to find the ones that get searched on + # the path since I don't know the paths built into the compiler, so + # just include the ones with slashes in their names. + EXTRADEPS := $(addsuffix .cma,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i)))) + + + ifndef LIB_PACK_NAME + SPECIAL_OCAMLFLAGS := $(OCAMLBCFLAGS) + else + SPECIAL_OCAMLFLAGS := -for-pack $(FOR_PACK_NAME) $(OCAMLBCFLAGS) + endif + + REAL_OCAMLC := $(INTF_OCAMLC) + + REAL_IMPL := $(IMPL_CMO) + REAL_IMPL_INTF := $(IMPLO_INTF) + IMPL_SUF := .cmo + + DEPFLAGS := + MAKE_DEPS := $(MLDEPS) $(BCDEPIS) + + ifdef CREATE_LIB + override CFLAGS := $(PIC_CFLAGS) $(CFLAGS) + override CPPFLAGS := $(PIC_CPPFLAGS) $(CPPFLAGS) + ifndef STATIC + ifneq ($(strip $(OBJ_LINK)),) + MAKEDLL := $(DLLSONAME) + ALL_LDFLAGS := -dllib $(DLLSONAME) + endif + endif + endif + + ifndef NO_CUSTOM + ifneq "$(strip $(OBJ_LINK) $(THREADS) $(MAYBE_IDL) $(CLIBS) $(CFRAMEWORKS))" "" + ALL_LDFLAGS += -custom + endif + endif + + ALL_LDFLAGS += $(INCFLAGS) $(OCAMLLDFLAGS) $(OCAMLBLDFLAGS) \ + $(COMMON_LDFLAGS) $(LIBS:%=%.cma) + CAMLIDLDLLFLAGS := + + ifdef THREADS + ifdef VMTHREADS + THREAD_FLAG := -vmthread + else + THREAD_FLAG := -thread + endif + ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS) + ifndef CREATE_LIB + ifndef REAL_OCAMLFIND + ALL_LDFLAGS := unix.cma threads.cma $(ALL_LDFLAGS) + endif + endif + endif + +# we have to make native-code +else + EXTRADEPS := $(addsuffix .cmxa,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i)))) + ifndef PROFILING + SPECIAL_OCAMLFLAGS := $(OCAMLNCFLAGS) + PLDFLAGS := + else + SPECIAL_OCAMLFLAGS := -p $(OCAMLNCFLAGS) + PLDFLAGS := -p + endif + + ifndef LIB_PACK_NAME + SPECIAL_OCAMLFLAGS := $(OCAMLNCFLAGS) + else + SPECIAL_OCAMLFLAGS := -for-pack $(FOR_PACK_NAME) $(OCAMLNCFLAGS) + endif + REAL_IMPL := $(IMPL_CMX) + REAL_IMPL_INTF := $(IMPLX_INTF) + IMPL_SUF := .cmx + + override CPPFLAGS := -DNATIVE_CODE $(CPPFLAGS) + + DEPFLAGS := -native + MAKE_DEPS := $(MLDEPS) $(NCDEPIS) + + ALL_LDFLAGS := $(PLDFLAGS) $(INCFLAGS) $(OCAMLLDFLAGS) \ + $(OCAMLNLDFLAGS) $(COMMON_LDFLAGS) + CAMLIDLDLLFLAGS := -opt + + ifndef CREATE_LIB + ALL_LDFLAGS += $(LIBS:%=%.cmxa) + else + override CFLAGS := $(PIC_CFLAGS) $(CFLAGS) + override CPPFLAGS := $(PIC_CPPFLAGS) $(CPPFLAGS) + endif + + ifdef THREADS + THREAD_FLAG := -thread + ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS) + ifndef CREATE_LIB + ifndef REAL_OCAMLFIND + ALL_LDFLAGS := unix.cmxa threads.cmxa $(ALL_LDFLAGS) + endif + endif + endif +endif + +export MAKE_DEPS + +ifdef ANNOTATE + ANNOT_FLAG := -annot +else +endif + +ALL_OCAMLCFLAGS := $(THREAD_FLAG) $(ANNOT_FLAG) $(OCAMLFLAGS) \ + $(INCFLAGS) $(SPECIAL_OCAMLFLAGS) + +ifdef make_deps + -include $(MAKE_DEPS) + PRE_TARGETS := +endif + +########################################################################### +# USER RULES + +# Call "OCamlMakefile QUIET=" to get rid of all of the @'s. +QUIET=@ + +# generates byte-code (default) +byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes +bc: byte-code + +byte-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(BCRESULT)" make_deps=yes +bcnl: byte-code-nolink + +top: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(TOPRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes + +# generates native-code + +native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +nc: native-code + +native-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +ncnl: native-code-nolink + +# generates byte-code libraries +byte-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" \ + CREATE_LIB=yes \ + make_deps=yes +bcl: byte-code-library + +# generates native-code libraries +native-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).cmxa \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + CREATE_LIB=yes \ + make_deps=yes +ncl: native-code-library + +ifdef WIN32 +# generates byte-code dll +byte-code-dll: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).dll \ + REAL_RESULT="$(BCRESULT)" \ + make_deps=yes +bcd: byte-code-dll + +# generates native-code dll +native-code-dll: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).dll \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +ncd: native-code-dll +endif + +# generates byte-code with debugging information +debug-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dc: debug-code + +debug-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(BCRESULT)" make_deps=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dcnl: debug-code-nolink + +# generates byte-code with debugging information (native code) +debug-native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ + REAL_RESULT="$(NCRESULT)" make_deps=yes \ + REAL_OCAMLC="$(OCAMLOPT)" \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dnc: debug-native-code + +debug-native-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(NCRESULT)" make_deps=yes \ + REAL_OCAMLC="$(OCAMLOPT)" \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dncnl: debug-native-code-nolink + +# generates byte-code libraries with debugging information +debug-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" make_deps=yes \ + CREATE_LIB=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dcl: debug-code-library + +# generates byte-code libraries with debugging information (native code) +debug-native-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).cmxa \ + REAL_RESULT="$(NCRESULT)" make_deps=yes \ + REAL_OCAMLC="$(OCAMLOPT)" \ + CREATE_LIB=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dncl: debug-native-code-library + +# generates byte-code for profiling +profiling-byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" PROFILING="y" \ + make_deps=yes +pbc: profiling-byte-code + +# generates native-code + +profiling-native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + PROFILING="y" \ + make_deps=yes +pnc: profiling-native-code + +# generates byte-code libraries +profiling-byte-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" PROFILING="y" \ + CREATE_LIB=yes \ + make_deps=yes +pbcl: profiling-byte-code-library + +# generates native-code libraries +profiling-native-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).cmxa \ + REAL_RESULT="$(NCRESULT)" PROFILING="y" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + CREATE_LIB=yes \ + make_deps=yes +pncl: profiling-native-code-library + +# packs byte-code objects +pack-byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT).cmo \ + REAL_RESULT="$(BCRESULT)" \ + PACK_LIB=yes make_deps=yes +pabc: pack-byte-code + +# packs native-code objects +pack-native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(NCRESULT).cmx $(NCRESULT).$(EXT_OBJ) \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + PACK_LIB=yes make_deps=yes +panc: pack-native-code + +# generates HTML-documentation +htdoc: $(DOC_DIR)/$(RESULT)/html/index.html + +# generates Latex-documentation +ladoc: $(DOC_DIR)/$(RESULT)/latex/doc.tex + +# generates PostScript-documentation +psdoc: $(DOC_DIR)/$(RESULT)/latex/doc.ps + +# generates PDF-documentation +pdfdoc: $(DOC_DIR)/$(RESULT)/latex/doc.pdf + +# generates all supported forms of documentation +doc: htdoc ladoc psdoc pdfdoc + +########################################################################### +# LOW LEVEL RULES + +$(REAL_RESULT): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(REAL_OCAMLC) \ + $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \ + $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@$(EXE) \ + $(REAL_IMPL) + +nolink: $(REAL_IMPL_INTF) $(OBJ_LINK) + +ifdef WIN32 +$(REAL_RESULT).dll: $(REAL_IMPL_INTF) $(OBJ_LINK) + $(CAMLIDLDLL) $(CAMLIDLDLLFLAGS) $(OBJ_LINK) $(CLIBS) \ + -o $@ $(REAL_IMPL) +endif + +%$(TOPSUFFIX): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) + $(REAL_OCAMLFIND) $(OCAMLMKTOP) \ + $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \ + $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@$(EXE) \ + $(REAL_IMPL) + +.SUFFIXES: .mli .ml .cmi .cmo .cmx .cma .cmxa .$(EXT_OBJ) \ + .mly .di .d .$(EXT_LIB) .idl %.oxridl .c .m .$(EXT_CXX) .h .so \ + .rep .zog .glade + +ifndef STATIC +ifdef MINGW +# From OCaml 3.11.0, ocamlmklib is available on windows +OCAMLMLIB_EXISTS = $(shell which $(OCAMLMKLIB)) +ifeq ($(strip $(OCAMLMLIB_EXISTS)),) +$(DLLSONAME): $(OBJ_LINK) + $(CC) $(CFLAGS) $(CFLAGS_WIN32) $(OBJ_LINK) -shared -o $@ \ + $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/lib%.a))) \ + '$(OCAMLLIBPATH)/ocamlrun.a' \ + -Wl,--whole-archive \ + -Wl,--export-all-symbols \ + -Wl,--allow-multiple-definition \ + -Wl,--enable-auto-import +else +$(DLLSONAME): $(OBJ_LINK) + $(OCAMLMKLIB) $(INCFLAGS) $(CLIBFLAGS) \ + -o $(CLIB_BASE) $(OBJ_LINK) $(CLIBS:%=-l%) \ + $(CFRAMEWORKS:%=-framework %) \ + $(OCAMLMKLIB_FLAGS) +endif +else +ifdef MSVC +$(DLLSONAME): $(OBJ_LINK) + link /NOLOGO /DLL /OUT:$@ $(OBJ_LINK) \ + $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/%.lib))) \ + '$(OCAMLLIBPATH)/ocamlrun.lib' + +else +$(DLLSONAME): $(OBJ_LINK) + $(OCAMLMKLIB) $(INCFLAGS) $(CLIBFLAGS) \ + -o $(CLIB_BASE) $(OBJ_LINK) $(CLIBS:%=-l%) $(CFRAMEWORKS:%=-framework %) \ + $(OCAMLMKLIB_FLAGS) +endif +endif +endif + +ifndef LIB_PACK_NAME +$(RESULT).cma: $(REAL_IMPL_INTF) $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@ $(REAL_IMPL) + +$(RESULT).cmxa $(RESULT).$(EXT_LIB): $(REAL_IMPL_INTF) $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@ $(REAL_IMPL) +else +# Packing a bytecode library +LIB_PACK_NAME_MLI = $(wildcard $(LIB_PACK_NAME).mli) +ifeq ($(LIB_PACK_NAME_MLI),) +LIB_PACK_NAME_CMI = $(LIB_PACK_NAME).cmi +else +# $(LIB_PACK_NAME).mli exists, it likely depends on other compiled interfaces +LIB_PACK_NAME_CMI = +$(LIB_PACK_NAME).cmi: $(REAL_IMPL_INTF) +endif +ifdef BYTE_OCAML +$(LIB_PACK_NAME_CMI) $(LIB_PACK_NAME).cmo: $(REAL_IMPL_INTF) + $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack -o $(LIB_PACK_NAME).cmo $(OCAMLLDFLAGS) $(REAL_IMPL) +# Packing into a unit which can be transformed into a library +# Remember the .ml's must have been compiled with -for-pack $(LIB_PACK_NAME) +else +$(LIB_PACK_NAME_CMI) $(LIB_PACK_NAME).cmx: $(REAL_IMPL_INTF) + $(REAL_OCAMLFIND) $(OCAMLOPT) -pack -o $(LIB_PACK_NAME).cmx $(OCAMLLDFLAGS) $(REAL_IMPL) +endif + +$(RESULT).cma: $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@ $(LIB_PACK_NAME).cmo + +$(RESULT).cmxa $(RESULT).$(EXT_LIB): $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmx $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(OBJS_LIBS) $(filter-out -custom, $(ALL_LDFLAGS)) -o $@ $(LIB_PACK_NAME).cmx +endif + +$(RES_CLIB): $(OBJ_LINK) +ifndef MSVC + ifneq ($(strip $(OBJ_LINK)),) + $(AR) rcs $@ $(OBJ_LINK) + endif +else + ifneq ($(strip $(OBJ_LINK)),) + lib -nologo -debugtype:cv -out:$(RES_CLIB) $(OBJ_LINK) + endif +endif + +%.cmi: %.mli $(EXTRADEPS) + $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(ECHO) $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c $(THREAD_FLAG) $(ANNOT_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c $(THREAD_FLAG) $(ANNOT_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + else \ + $(ECHO) $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c -pp \"$$pp $(PPFLAGS)\" $(THREAD_FLAG) $(ANNOT_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c -pp "$$pp $(PPFLAGS)" $(THREAD_FLAG) $(ANNOT_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + fi + +%.cmi: %$(IMPL_SUF); + +%$(IMPL_SUF) %.$(EXT_OBJ): %.ml $(EXTRADEPS) + $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(ECHO) $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c $(ALL_OCAMLCFLAGS) $<; \ + $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c $(ALL_OCAMLCFLAGS) $<; \ + else \ + $(ECHO) $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c -pp \"$$pp $(PPFLAGS)\" $(ALL_OCAMLCFLAGS) $<; \ + $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c -pp "$$pp $(PPFLAGS)" $(ALL_OCAMLCFLAGS) $<; \ + fi + +.PRECIOUS: %.ml +%.ml: %.mll + $(OCAMLLEX) $(LFLAGS) $< + +.PRECIOUS: %.ml %.mli +%.ml %.mli: %.mly + $(OCAMLYACC) $(YFLAGS) $< + $(QUIET)pp=`sed -n -e 's/.*(\*pp \([^*]*\) \*).*/\1/p;q' $<`; \ + if [ ! -z "$$pp" ]; then \ + mv $*.ml $*.ml.temporary; \ + echo "(*pp $$pp $(PPFLAGS)*)" > $*.ml; \ + cat $*.ml.temporary >> $*.ml; \ + rm $*.ml.temporary; \ + mv $*.mli $*.mli.temporary; \ + echo "(*pp $$pp $(PPFLAGS)*)" > $*.mli; \ + cat $*.mli.temporary >> $*.mli; \ + rm $*.mli.temporary; \ + fi + + +.PRECIOUS: %.ml +%.ml: %.rep + $(CAMELEON_REPORT) $(CAMELEON_REPORT_FLAGS) -gen $< + +.PRECIOUS: %.ml +%.ml: %.zog + $(CAMELEON_ZOGGY) $(CAMELEON_ZOGGY_FLAGS) -impl $< > $@ + +.PRECIOUS: %.ml +%.ml: %.glade + $(OCAML_GLADECC) $(OCAML_GLADECC_FLAGS) $< > $@ + +.PRECIOUS: %.ml %.mli +%.ml %.mli: %.oxridl + $(OXRIDL) $< + +.PRECIOUS: %.ml %.mli %_stubs.c %.h +%.ml %.mli %_stubs.c %.h: %.idl + $(CAMLIDL) $(MAYBE_IDL_HEADER) $(IDLFLAGS) \ + $(CAMLIDLFLAGS) $< + $(QUIET)if [ $(NOIDLHEADER) ]; then touch $*.h; fi + +%.$(EXT_OBJ): %.c + $(OCAMLC) -c -cc "$(CC)" -ccopt "$(CFLAGS) \ + $(CPPFLAGS) $(CPPFLAGS_WIN32) \ + $(CFLAGS_WIN32) $(CINCFLAGS) $(CFLAG_O)$@ " $< + +%.$(EXT_OBJ): %.m + $(CC) -c $(CFLAGS) $(CINCFLAGS) $(CPPFLAGS) \ + -I'$(OCAMLLIBPATH)' \ + $< $(CFLAG_O)$@ + +%.$(EXT_OBJ): %.$(EXT_CXX) + $(CXX) -c $(CXXFLAGS) $(CINCFLAGS) $(CPPFLAGS) \ + -I'$(OCAMLLIBPATH)' \ + $< $(CFLAG_O)$@ + +$(MLDEPDIR)/%.d: %.ml + $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi + $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(ECHO) $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ + $(INCFLAGS) $< \> $@; \ + $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ + $(INCFLAGS) $< > $@; \ + else \ + $(ECHO) $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ + -pp \"$$pp $(PPFLAGS)\" $(INCFLAGS) $< \> $@; \ + $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ + -pp "$$pp $(PPFLAGS)" $(INCFLAGS) $< > $@; \ + fi + +$(BCDIDIR)/%.di $(NCDIDIR)/%.di: %.mli + $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi + $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(ECHO) $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) $(INCFLAGS) $< \> $@; \ + $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) $(INCFLAGS) $< > $@; \ + else \ + $(ECHO) $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) \ + -pp \"$$pp $(PPFLAGS)\" $(INCFLAGS) $< \> $@; \ + $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) \ + -pp "$$pp $(PPFLAGS)" $(INCFLAGS) $< > $@; \ + fi + +$(DOC_DIR)/$(RESULT)/html: + mkdir -p $@ + +$(DOC_DIR)/$(RESULT)/html/index.html: $(DOC_DIR)/$(RESULT)/html $(DOC_FILES) + rm -rf $ "" + | Some x -> string_of_int x + +let string_of_precision = function + | None -> "" + | Some x -> "." ^ string_of_int x + +let string_of_sizespec = function + | None -> "" + | Some s-> + match s with + | Sll -> "ll" | Sl -> "l" | SL -> "L" | Sh -> "h" + | Shh -> "hh" | Sj -> "j" | Sz -> "z" | St -> "t" + +let string_of_convop = function + | Ca -> "a" | CA -> "A" | Cc -> "c" | Cd -> "d" + | Ce -> "e" | CE -> "E" | Cf -> "f" | Cg -> "g" + | CG -> "G" | Ci -> "i" | Cn -> "n" | Co -> "o" + | Cp -> "p" | Cs -> "s" | Cu -> "u" | Cx -> "x" + | CX -> "X" | CPercent -> "%" + +let string_of_format f = + "%" ^ + string_of_options f ^ + string_of_minwidth f.minwidth ^ + string_of_precision f.precision ^ + string_of_sizespec f.sizespec ^ + string_of_convop f.convop + +type section = + | Format of pformat + | String of string + +let sec_of_format cs = + (* 1. Read zero or more flags -, +, 0, #, *) + let cs = ref cs in + let lj, pz, sa, sp, va, fin = + ref false, ref false, ref false, ref false, ref false, ref false + in + while not !fin do + match !cs with + | '-'::_ -> set lj; cs := tl !cs + | '+'::_ -> set sa; cs := tl !cs + | '0'::_ -> set pz; cs := tl !cs + | ' '::_ -> set sp; cs := tl !cs + | '#'::_ -> set va; cs := tl !cs + | _ -> set fin + done; + (* 2. Read a possible minimum field width *) + let minwidth = + let fwchars, rest = cleavewhile isdigit !cs in + cs := rest; + if fwchars = [] then None else Some (int_of_string (implode fwchars)) + in + (* 3. Read an optional precision specification *) + let precision = + match !cs with + | '.'::more -> + cs := more; + let pchars, rest = cleavewhile isdigit !cs in + cs := rest; + if pchars = [] then None else Some (int_of_string (implode pchars)) + | _ -> None + in + (* 4. Read an optional size specification *) + let sizespec = + match !cs with + | 'l'::'l'::r -> cs := r; Some Sll + | 'l'::r -> cs := r; Some Sl + | 'L'::r -> cs := r; Some SL + | 'h'::'h'::r -> cs := r; Some Shh + | 'h'::r -> cs := r; Some Sh + | 'j'::r -> cs := r; Some Sj + | 'z'::r -> cs := r; Some Sz + | 't'::r -> cs := r; Some St + | _ -> None + in + (* 5. Read the conversion operation *) + let convop = + match !cs with + | 'a'::r -> cs := r; Ca + | 'A'::r -> cs := r; CA + | 'c'::r -> cs := r; Cc + | 'd'::r -> cs := r; Cd + | 'e'::r -> cs := r; Ce + | 'E'::r -> cs := r; CE + | 'f'::r -> cs := r; Cf + | 'g'::r -> cs := r; Cg + | 'G'::r -> cs := r; CG + | 'i'::r -> cs := r; Ci + | 'n'::r -> cs := r; Cn + | 'o'::r -> cs := r; Co + | 'p'::r -> cs := r; Cp + | 's'::r -> cs := r; Cs + | 'u'::r -> cs := r; Cu + | 'x'::r -> cs := r; Cx + | 'X'::r -> cs := r; CX + | '%'::r -> cs := r; CPercent + | _ -> raise (Failure "sec_of_format") + in + {leftjustify = !lj; + padzero = !pz; + signalways = !sa; + space = !sp; + variant = !va; + minwidth = minwidth; + precision = precision; + sizespec = sizespec; + convop = convop}, + !cs + +let rec sections_of_string_inner secs currstr = function + | '%'::m -> + let sec, rest = sec_of_format m in + if currstr = [] + then sections_of_string_inner (Format sec::secs) currstr rest + else sections_of_string_inner (Format sec::String (implode (rev currstr))::secs) [] rest + | x::xs -> + sections_of_string_inner secs (x::currstr) xs + | [] -> + if currstr = [] then rev secs else rev (String (implode (rev currstr))::secs) + +(* Take a format string, and split it into sections *) +let sections_of_string s = + try + sections_of_string_inner [] [] (explode s) + with + _ -> raise (PrintfFailure "Couldn't parse Printf format") + +(* Substitute an integer into a format, returning the empty string if the format is not suitable. *) + +(* For now, just 'd', 'u', 'i' *) +let sub_int i f = + (*i Printf.printf "Substituting format |%s|\n" (string_of_format f); i*) + let str = string_of_int i + in let padding = if f.padzero then '0' else ' ' in + if f.minwidth <> None && String.length str < unopt f.minwidth then + let padding = many padding (unopt f.minwidth - String.length str) in + if f.leftjustify then str ^ implode padding else implode padding ^ str + else + str + +(* Given a list of integers, substitute into integer formats *) +let rec substitute_inner donesections sections = function + | [] -> rev donesections @ sections + | i::is -> + match sections with + | [] -> rev donesections @ sections + | String s::more -> substitute_inner (String s::donesections) more (i::is) + | Format f::more -> substitute_inner (String (sub_int i f)::donesections) more is + +let substitute x = + try substitute_inner [] x with + _ -> raise (PrintfFailure "Failed to substitute integer") + +(* Flatten a set of sections to a string *) +let string_of_section = function + | String s -> s + | Format f -> string_of_format f + +let string_of_sections sections = + try fold_left ( ^ ) "" (map string_of_section sections) with + _ -> raise (PrintfFailure "Failed to build string from Printf sections") + +type encoding = + | Raw + | UTF8 + | Stripped + +(* Just strip everything which isn't 7 bit ASCII *) +let crude_de_unicode s = + implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s))) + +let encode_output enc s = + match enc with + | Raw -> s + | UTF8 -> Pdftext.utf8_of_pdfdocstring s + | Stripped -> crude_de_unicode s + +(* Get the number of pages in file. Doesn't need decryption. *) +let endpage_io i user_pw owner_pw = + let pdf = Pdfread.pdf_of_input_lazy user_pw owner_pw i in + Pdfpage.endpage pdf + +(* Raised when syntax is ok, but endpage is too low. Caught by validator. +Caught and reraised as normal failure by parse_pagespec. *) +exception PageSpecUnknownPage of int + +(* There would be no pages *) +exception PageSpecWouldBeNoPages + +(* Raised when syntax is wrong. Caught and reraised by parse_pagespec and +validator. *) +exception PageSpecBadSyntax + +(* Parsing range specifications *) +let rec splitat_commas toks = + match cleavewhile (neq (Pdfgenlex.LexName ",")) toks with + | [], [] -> [] + | [], _ -> raise PageSpecBadSyntax + | some, [] -> [some] + | _::_ as before, _::rest -> before::splitat_commas rest + +let rec mk_numbers endpage = function + | [Pdfgenlex.LexInt n] -> [n] + | [Pdfgenlex.LexName "end"] -> [endpage] + | [Pdfgenlex.LexInt n; Pdfgenlex.LexName "-"; Pdfgenlex.LexInt n'] -> + if n > n' then rev (ilist n' n) else ilist n n' + | [Pdfgenlex.LexName "end"; Pdfgenlex.LexName "-"; Pdfgenlex.LexInt n] -> + if n <= endpage + then rev (ilist n endpage) + else raise (PageSpecUnknownPage n) + | [Pdfgenlex.LexInt n; Pdfgenlex.LexName "-"; Pdfgenlex.LexName "end"] -> + if n <= endpage + then ilist n endpage + else raise (PageSpecUnknownPage n) + | [Pdfgenlex.LexName "end"; Pdfgenlex.LexName "-"; Pdfgenlex.LexName "end"] -> + [endpage] + | [Pdfgenlex.LexName "even"] -> + drop_odds (ilist 1 endpage) + | [Pdfgenlex.LexName "odd"] -> + really_drop_evens (ilist 1 endpage) + | [Pdfgenlex.LexName "all"] -> + ilist 1 endpage + | [Pdfgenlex.LexName "reverse"] -> + rev (ilist 1 endpage) + | toks -> + let ranges = splitat_commas toks in + if ranges = [toks] then raise PageSpecBadSyntax else + flatten (map (mk_numbers endpage) ranges) + +(* Space dashes and commas *) +let rec add_spaces = function + | [] -> [] + | ('-' | ',') as h::t -> ' '::h::' '::add_spaces t + | h::t -> h::add_spaces t + +let space_string s = + implode (add_spaces (explode s)) + +let fixup_negatives endpage = function + | Pdfgenlex.LexName s when String.length s > 1 && s.[0] = '~' -> + Pdfgenlex.LexInt (endpage + 1 + ~-(int_of_string (implode (tl (explode s))))) + | x -> x + +let parse_pagespec_inner endpage spec = + let spec = space_string spec in + if endpage < 1 then raise (Pdf.PDFError "This PDF file has no pages and is therefore malformed") else + let numbers = + try + match rev (explode spec) with + | ['n'; 'e'; 'v'; 'e'] -> keep even (ilist 1 endpage) + | ['d'; 'd'; 'o'] -> keep odd (ilist 1 endpage) + | 'n'::'e'::'v'::'e'::more -> keep even (mk_numbers endpage (Pdfgenlex.lex_string (implode (rev more)))) + | 'd'::'d'::'o'::more -> keep odd (mk_numbers endpage (Pdfgenlex.lex_string (implode (rev more)))) + | _ -> mk_numbers endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string spec)) + with + e -> raise PageSpecBadSyntax + in + if numbers = [] then raise PageSpecWouldBeNoPages else + iter + (fun n -> + if n <= 0 || n > endpage then raise (PageSpecUnknownPage n)) + numbers; + numbers + +let parse_pagespec pdf spec = + try parse_pagespec_inner (Pdfpage.endpage pdf) spec with + | PageSpecUnknownPage n -> + raise (Pdf.PDFError ("Page " ^ string_of_int n ^ " does not exist.")) + | PageSpecWouldBeNoPages -> + raise (Pdf.PDFError ("Page range specifies no pages")) + | e -> + raise + (Pdf.PDFError + ("Bad page specification " ^ spec ^ + ". Raw error was " ^ Printexc.to_string e ^ + ". Last page was " ^ string_of_int (Pdfpage.endpage pdf))) + +(* To validate a pagespec as being syntactically correct without the PDF in +question. This is nasty, since the parser above includes checking based on the +endpage of the PDF (which we don't have). Pass 100 as the endpage, doubling on +page range exception, bailng out above 500000. *) +let rec validate_pagespec_inner n spec = + try + ignore (parse_pagespec_inner n spec); true + with + | PageSpecUnknownPage _ -> if n < 500000 then validate_pagespec_inner (n * 2) spec else false + | PageSpecBadSyntax | _ -> false + +let validate_pagespec spec = + validate_pagespec_inner 100 spec + +(* Convert an integer list representing a set to a page specification, in order. *) +let string_of_pagespec pdf = function + | [] -> "" + | is -> + let iseven len is = + drop_odds (ilist 1 len) = is + in let isodd len is = + really_drop_evens (ilist 1 len) = is + in let isall len is = + ilist 1 len = is + in let is = sort compare is + in let len = Pdfpage.endpage pdf in + let rec mkranges prev = function + | [] -> map extremes (rev (map rev prev)) + | h::t -> + match prev with + | (ph::pht)::pt when h = ph + 1 -> mkranges ((h::ph::pht)::pt) t + | (_::_)::_ -> mkranges ([h]::prev) t + | []::_ -> assert false + | [] -> mkranges [[h]] t + in + if iseven len is && len > 3 then "even" else + if isodd len is && len > 2 then "odd" else + if isall len is then "all" else + let ranges = mkranges [] is in + let rangestrings = + map + (function (s, e) -> + if s = e + then string_of_int s + else string_of_int s ^ "-" ^ string_of_int e) + ranges + in + fold_left ( ^ ) "" (interleave "," rangestrings) + +let string_of_range r = + fold_left (fun a b -> a ^ " " ^ b) "" (map string_of_int r) + +let print_pdf_objs pdf = + Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict); + Printf.printf "Root: %i\n" pdf.Pdf.root; + begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some catalog -> + Printf.printf "Catalog: %s\n" (Pdfwrite.string_of_pdf catalog); + begin match Pdf.lookup_direct pdf "/Pages" catalog with + | Some pages -> + Printf.printf "Pages: %s\n" (Pdfwrite.string_of_pdf pages) + | None -> + flprint "no catalog\n" + end + | None -> + flprint "No catalog!\n" + end; + Pdf.objiter + (fun n obj -> + Printf.printf "%i 0 obj:\n\n" n; + Printf.printf "%s\n" (Pdfwrite.string_of_pdf obj)) + pdf + +(* Recompress anything which isn't compressed, unless it's metadata. *) +let recompress_stream pdf = function + (* If there is no compression, compress with /FlateDecode *) + | Pdf.Stream {contents = (dict, _)} as stream -> + begin match + Pdf.lookup_direct pdf "/Filter" dict, + Pdf.lookup_direct pdf "/Type" dict + with + | _, Some (Pdf.Name "/Metadata") -> () + | (None | Some (Pdf.Array [])), _ -> + Pdfcodec.encode_pdfstream pdf Pdfcodec.Flate stream + | _ -> () + end + | _ -> assert false + +let recompress_pdf pdf = + if not (Pdfcrypt.is_encrypted pdf) then + Pdf.iter_stream (recompress_stream pdf) pdf; + pdf + +let decompress_pdf pdf = + if not (Pdfcrypt.is_encrypted pdf) then + (Pdf.iter_stream (Pdfcodec.decode_pdfstream_until_unknown pdf) pdf); + pdf + +(* Return page label at pdf page num, or page number in arabic if no label *) +let pagelabel pdf num = + Pdfpagelabels.pagelabeltext_of_pagenumber + num + (Pdfpagelabels.complete (Pdfpagelabels.read pdf)) + + +let rec process_text text m = + match m with + | ([] : (string * string) list) -> Cpdfstrftime.strftime text + | (s, r)::t -> process_text (string_replace_all s r text) t + +let expand_date = function + | "now" -> Cpdfstrftime.strftime "D:%Y%m%d%t%M%S" + | x -> x + +let process_pages f pdf range = + let pages = Pdfpage.pages_of_pagetree pdf in + let pages' = + map2 + (fun n p -> if mem n range then f n p else p) + (ilist 1 (length pages)) + pages + in + Pdfpage.change_pages true pdf pages' + +let iter_pages f pdf range = + let pages = Pdfpage.pages_of_pagetree pdf in + iter2 + (fun n p -> if mem n range then f n p) + (ilist 1 (length pages)) + pages + +let map_pages f pdf range = + let pages = Pdfpage.pages_of_pagetree pdf in + option_map2 + (fun n p -> if mem n range then Some (f n p) else None) + (ilist 1 (length pages)) + pages + +(* Add stack operators to a content stream to ensure it is composeable. FIXME: +This will go away once we're using postpend_content or similar for twoup and do_stamp... *) +let protect_removeme pdf resources content = + let ops = Pdfops.parse_operators pdf resources content in + let qs = length (keep (eq Pdfops.Op_q) ops) + and bigqs = length (keep (eq Pdfops.Op_Q) ops) in + let deficit = if qs > bigqs then qs - bigqs else 0 in + if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n" deficit; + Pdfops.stream_of_ops ([Pdfops.Op_q] @ ops @ many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) + +exception SoftError of string + +let error s = raise (SoftError s) + +exception HardError of string + +(* Union two resource dictionaries from the same PDF. *) +let combine_pdf_resources pdf a b = + let a_entries = + match a with + | Pdf.Dictionary entries -> entries + | _ -> [] + in let b_entries = + match b with + | Pdf.Dictionary entries -> entries + | _ -> [] + in + let resource_keys = + ["/Font"; "/ExtGState"; "/ColorSpace"; "/Pattern"; + "/Shading"; "/XObject"; "/Properties"] + in + let combine_entries key = + let a_entries = + match Pdf.lookup_direct pdf key a with + | Some (Pdf.Dictionary d) -> d + | _ -> [] + in let b_entries = + match Pdf.lookup_direct pdf key b with + | Some (Pdf.Dictionary d) -> d + | _ -> [] + in + key, Pdf.Dictionary (a_entries @ b_entries) + in + let unknown_keys_a = + lose (fun (k, _) -> mem k resource_keys) a_entries + in let unknown_keys_b = + lose (fun (k, _) -> mem k resource_keys) b_entries + in let combined_known_entries = + map combine_entries resource_keys + in + Pdf.Dictionary (unknown_keys_a @ unknown_keys_b @ combined_known_entries) + +(* \section{Build PDF Presentations} *) +let change_page_effect t d horizontal inward direction effect_duration page = + let checkname = function + | "Split" | "Blinds" | "Box" | "Wipe" | "Dissolve" | "Glitter" -> () + | _ -> error "Unknown presentation type" + in + let rest = page.Pdfpage.rest in + let transdict = + match t with + | None -> + Pdf.Dictionary [] + | Some name -> + checkname name; + Pdf.Dictionary [("/S", Pdf.Name ("/" ^ name))] + in + let transdict = Pdf.add_dict_entry transdict "/D" (Pdf.Real effect_duration) in + let transdict = + match t with + | Some ("Split" | "Blinds") -> + Pdf.add_dict_entry + transdict "/Dm" (Pdf.Name (if horizontal then "/H" else "/V")) + | _ -> transdict + in + let transdict = + match t with + | Some ("Split" | "Box") -> + Pdf.add_dict_entry + transdict "/M" (Pdf.Name (if inward then "/I" else "/O")) + | _ -> transdict + in + let transdict = + match t with + | Some ("Wipe" | "Glitter") -> + Pdf.add_dict_entry transdict "/Di" (Pdf.Integer direction) + | _ -> transdict + in + let rest = Pdf.add_dict_entry rest "/Trans" transdict in + let rest = + match d with + | None -> Pdf.remove_dict_entry rest "/Dur" + | Some delay -> Pdf.add_dict_entry rest "/Dur" (Pdf.Real delay) + in + {page with Pdfpage.rest = rest} + +let presentation range t d h i dir effect_dur pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + let pages' = + map2 + (fun page num -> + if mem num range + then change_page_effect t d h i dir effect_dur page + else page) + pages + (indx pages) + in + Pdfpage.change_pages true pdf pages' + +(* \section{Attaching files} *) +let make_filestream file = + let data = + let ch = open_in_bin file in + let len = in_channel_length ch in + let stream = mkbytes len in + for x = 0 to bytes_size stream - 1 do + bset stream x (input_byte ch) + done; + close_in ch; + stream + in + Pdf.Stream + (ref (Pdf.Dictionary + [("/Length", Pdf.Integer (bytes_size data)); + ("/Type", Pdf.Name "/EmbeddedFile")], + Pdf.Got data)) + +let attach_file keepversion topage pdf file = + let filestream = make_filestream file in + let filestream_num = Pdf.addobj pdf filestream in + let filespec = + Pdf.Dictionary + [("/EF", Pdf.Dictionary ["/F", Pdf.Indirect filestream_num]); + ("/F", Pdf.String (Filename.basename file)); + ("/Type", Pdf.Name "/F")] + in + match topage with + | None -> + (* Look up /Names and /EmbeddedFiles and /Names. *) + let rootdict = Pdf.lookup_obj pdf pdf.Pdf.root in + let namedict = + match Pdf.lookup_direct pdf "/Names" rootdict with + | None -> Pdf.Dictionary [] + | Some namedict -> namedict + in + let embeddednamedict = + match Pdf.lookup_direct pdf "/EmbeddedFiles" namedict with + | None -> Pdf.Dictionary [] + | Some embeddednamedict -> embeddednamedict + in + let elts = + match Pdf.lookup_direct pdf "/Names" embeddednamedict with + | Some (Pdf.Array elts) -> elts + | _ -> [] + in + let names' = Pdf.Array (elts @ [Pdf.String (Filename.basename file); filespec]) in + let embeddednamedict' = Pdf.add_dict_entry embeddednamedict "/Names" names' in + let namedict' = Pdf.add_dict_entry namedict "/EmbeddedFiles" embeddednamedict' in + let rootdict' = Pdf.add_dict_entry rootdict "/Names" namedict' in + let rootnum = Pdf.addobj pdf rootdict' in + {pdf with + Pdf.minor = if keepversion then pdf.Pdf.minor else max pdf.Pdf.minor 4; + Pdf.root = rootnum; + Pdf.trailerdict = + Pdf.add_dict_entry + pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)} + | Some pagenumber -> + let pages = Pdfpage.pages_of_pagetree pdf in + if pagenumber < 0 || pagenumber > length pages then error "attach_file: Page not found" else + let page = select pagenumber pages in + let annots = + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> annots + | _ -> [] + in + let rect = + let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in + Pdf.Array [Pdf.Real 18.; Pdf.Real (maxy -. 45.); Pdf.Real 45.; Pdf.Real (maxy -. 18.)] + in + let annot = + Pdf.Dictionary + [("/FS", filespec); + ("/Subtype", Pdf.Name "/FileAttachment"); + ("/Contents", Pdf.String (Filename.basename file)); + ("/Rect", rect)] + in + let annots' = Pdf.Array (annot::annots) in + let page' = + {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest "/Annots" annots'} + in + let pages' = replace_number pagenumber page' pages in + let pdf = Pdfpage.change_pages false pdf pages' in + {pdf with + Pdf.minor = if keepversion then pdf.Pdf.minor else max pdf.Pdf.minor 4} + +let list_attached_files pdf = + let toplevel = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | None -> [] + | Some rootdict -> + match Pdf.lookup_direct pdf "/Names" rootdict with + | None -> [] + | Some namedict -> + match Pdf.lookup_direct pdf "/EmbeddedFiles" namedict with + | Some nametree -> + map + (function x -> x, 0) + (option_map + (function (Pdf.String s, _) -> Some s | _ -> None) + (Pdf.contents_of_nametree pdf nametree)) + | _ -> [] + in let pagelevel = + let pages = Pdfpage.pages_of_pagetree pdf in + flatten + (map2 + (fun page pagenumber -> + option_map + (function annot -> + match Pdf.lookup_direct pdf "/Subtype" annot with + | Some (Pdf.Name "/FileAttachment") -> + (match Pdf.lookup_direct pdf "/Contents" annot with + | Some (Pdf.String s) -> Some (s, pagenumber) + | _ -> None) + | _ -> None) + (match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> annots + | _ -> [])) + pages + (indx pages)) + in + toplevel @ pagelevel + +(* \section{Remove Attached files} *) +let remove_attached_files_on_pages pdf = + let remove_from_page page = + {page with Pdfpage.rest = + Pdf.add_dict_entry page.Pdfpage.rest "/Annots" + (Pdf.Array + (option_map + (function annot -> + match Pdf.lookup_direct pdf "/Subtype" annot with + | Some (Pdf.Name "/FileAttachment") -> None + | _ -> Some annot) + (match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> annots + | _ -> [])))} + in + Pdfpage.change_pages true pdf (map remove_from_page (Pdfpage.pages_of_pagetree pdf)) + +let remove_attached_files pdf = + let pdf = remove_attached_files_on_pages pdf in + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | None -> pdf + | Some rootdict -> + match Pdf.lookup_direct pdf "/Names" rootdict with + | None -> pdf + | Some namedict -> + let namedict' = Pdf.remove_dict_entry namedict "/EmbeddedFiles" in + let rootdict' = Pdf.add_dict_entry rootdict "/Names" namedict' in + let rootdict'num = Pdf.addobj pdf rootdict' in + {pdf with + Pdf.root = + rootdict'num; + Pdf.trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootdict'num)} + +(* \section{Copy an /ID from one file to another} *) +let copy_id keepversion copyfrom copyto = + match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with + | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *) + | Some id -> + copyto.Pdf.trailerdict <- + Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id; + copyto.Pdf.minor <- + if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1; + copyto + +(* \section{Remove bookmarks} *) + +(* \section{Add bookmarks} *) +let read_lines input = + let lines = ref [] in + try + while true do + let c = read_line input in + lines =| c + done; [] + with + _ -> rev !lines + +(* Verify a list of bookmarks. Positive jumps of > 1 not allowed, no numbers +smaller than 0. *) +let rec verify_bookmarks pdf lastlevel endpage = function + | [] -> true + | {Pdfmarks.level = level; Pdfmarks.target = target}::more -> + let page = Pdfpage.pagenumber_of_target pdf target in + level < lastlevel + 2 && + level >= 0 && + page <= endpage && + page >= 0 && + verify_bookmarks pdf level endpage more + +(* Parse a line of the bookmarks file. *) + +(* Un-escape things which are escaped. Quotes, newlines and backslashes *) +let rec fixup_characters prev = function + | [] -> rev prev + | '\\'::'\\'::t -> fixup_characters ('\\'::prev) t + | '\\'::'\"'::t -> fixup_characters ('\"'::prev) t + | '\\'::'\n'::t -> fixup_characters ('\n'::prev) t + | h::t -> fixup_characters (h::prev) t + +let parse_bookmark_file verify pdf input = + try + let lines = Pdfio.read_lines input in + (*i Printf.printf "Read %i lines\n" (length lines); + iter (function x -> Printf.printf "%s\n" x) lines; i*) + let currline = ref 0 in + let bookmarks = ref [] in + iter + (function line -> + match + incr currline; + Pdfgenlex.lex_string line + with + | [Pdfgenlex.LexInt i; Pdfgenlex.LexString s; Pdfgenlex.LexInt i'; Pdfgenlex.LexName "open"] -> + bookmarks =| + {Pdfmarks.level = i; + Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))); + Pdfmarks.target = Pdfpage.target_of_pagenumber pdf i'; + Pdfmarks.isopen = true} + | [Pdfgenlex.LexInt i; Pdfgenlex.LexString s; Pdfgenlex.LexInt i'; Pdfgenlex.LexName "closed"] + | [Pdfgenlex.LexInt i; Pdfgenlex.LexString s; Pdfgenlex.LexInt i'] -> + bookmarks =| + {Pdfmarks.level = i; + Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))); + Pdfmarks.target = Pdfpage.target_of_pagenumber pdf i'; + Pdfmarks.isopen = false} + | [] -> () (* ignore blank lines *) + | _ -> + (*i flprint (Pdfgenlex.string_of_tokens n); i*) + error ("Bad bookmark file, line " ^ (string_of_int !currline))) + lines; + let bookmarks = rev !bookmarks in + if verify then + if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) bookmarks + then bookmarks + else + error + "Bad bookmark file (References non-existant pages or is malformed)" + else + bookmarks + with + _ -> (*i Printf.printf "%s\n" (Printexc.to_string e); i*) error "Bad bookmark file (syntax)" + + +let add_bookmarks verify input pdf = + let parsed = parse_bookmark_file verify pdf input in + (*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*) + Pdfmarks.add_bookmarks parsed pdf + +(* \section{Set page mode} *) +let set_page_mode pdf s = + match s with + | "UseNone" | "UseOutlines" | "UseThumbs" + | "FullScreen" | "UseOC" | "UseAttachments" -> + begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some catalog -> + let catalog' = + Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s)) + in + let catalognum = Pdf.addobj pdf catalog' in + let trailerdict' = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) + in + {pdf with + Pdf.root = catalognum; + Pdf.trailerdict = trailerdict'} + | None -> error "bad root" + end + | _ -> error "Unknown page mode" + +(* \section{Set viewer preferences} *) +let set_viewer_preference (key, value, version) pdf = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some catalog -> + let viewer_preferences = + match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with + | Some d -> d + | None -> Pdf.Dictionary [] + in + let viewer_preferences' = + Pdf.add_dict_entry viewer_preferences key value + in + let catalog' = + Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences' + in + let catalognum = Pdf.addobj pdf catalog' in + let trailerdict' = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) + in + {pdf with + Pdf.minor = max pdf.Pdf.minor version; + Pdf.root = catalognum; + Pdf.trailerdict = trailerdict'} + | None -> error "bad root" + +(* \section{Set an entry in the /Info dictionary} *) +let set_pdf_info (key, value, version) pdf = + let infodict = + match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with + | Some d -> d + | None -> Pdf.Dictionary [] + in + let infodict' = Pdf.add_dict_entry infodict key value in + let objnum = Pdf.addobj pdf infodict' in + pdf.Pdf.trailerdict <- + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum); + pdf.Pdf.minor <- + max pdf.Pdf.minor version; + pdf + +(* \section{Set page layout} *) +let set_page_layout pdf s = + match s with + | "SinglePage" | "OneColumn" | "TwoColumnLeft" + | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" -> + begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some catalog -> + let catalog' = + Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s)) + in + let catalognum = Pdf.addobj pdf catalog' in + let trailerdict' = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) + in + {pdf with + Pdf.root = catalognum; + Pdf.trailerdict = trailerdict'} + | None -> error "bad root" + end + | _ -> error "Unknown page layout" + +(* \section{Set or replace metadata} *) +let set_metadata_from_bytes keepversion data pdf = + let metadata_stream = + Pdf.Stream + {contents = + (Pdf.Dictionary + ["/Length", Pdf.Integer (bytes_size data); + "/Type", Pdf.Name "/Metadata"; + "/Subtype", Pdf.Name "/XML"], + Pdf.Got data)} + in + let objnum = Pdf.addobj pdf metadata_stream in + let document_catalog = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some s -> s + | None -> error "Malformed PDF: No root." + in + let document_catalog' = + Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum) + in + let rootnum = Pdf.addobj pdf document_catalog' in + let trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum) + in + {pdf with + Pdf.trailerdict = trailerdict; + Pdf.root = rootnum; + Pdf.minor = + if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor} + +let set_metadata keepversion filename pdf = + let ch = open_in_bin filename in + let data = mkbytes (in_channel_length ch) in + for x = 0 to bytes_size data - 1 do + bset data x (input_byte ch) + done; + set_metadata_from_bytes keepversion data pdf + +(* \section{Remove metadata} *) +let remove_metadata pdf = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | None -> error "malformed file" + | Some root -> + let root' = Pdf.remove_dict_entry root "/Metadata" in + let rootnum = Pdf.addobj pdf root' in + {pdf with + Pdf.trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum); + Pdf.root = + rootnum} + +(* List bookmarks *) + +(* List the bookmarks, optionally deunicoding the text, in the given range to the given output *) +let list_bookmarks encoding range pdf output = + let process_string s = + let rec replace c cs = function + | [] -> [] + | h::t when h = c -> cs @ replace c cs t + | h::t -> h::replace c cs t + in + (* Convert to UTF8, raw, or stripped, and escape backslashed and quotation marks *) + let codepoints = Pdftext.codepoints_of_pdfdocstring s in + let escaped = + let bs = int_of_char '\\' + and nl = int_of_char '\n' + and n = int_of_char 'n' + and q = int_of_char '\"' in + replace bs [bs; bs] (replace nl [bs; n] (replace q [bs; q] codepoints)) + in + match encoding with + | UTF8 -> Pdftext.utf8_of_codepoints escaped + | Stripped -> implode (map char_of_int (lose (fun x -> x > 127) escaped)) + | Raw -> s + in + let bookmarks = Pdfmarks.read_bookmarks pdf in + let inrange = + keep + (function x -> + x.Pdfmarks.target = Pdfdest.NullDestination || + mem (Pdfpage.pagenumber_of_target pdf x.Pdfmarks.target) range) bookmarks + in + iter + (function mark -> + output.Pdfio.output_string + (Printf.sprintf "%i \"%s\" %i %s\n" + mark.Pdfmarks.level + (process_string mark.Pdfmarks.text) + (Pdfpage.pagenumber_of_target pdf mark.Pdfmarks.target) + (if mark.Pdfmarks.isopen then "open" else ""))) + inrange + +(* \section{Stamping files} *) +(* o is the stamp, u is the main pdf page *) +let do_stamp fast scale_to_fit isover pdf o u opdf = + (* Scale page stamp o to fit page u *) + let o = + if scale_to_fit then + let sxmin, symin, sxmax, symax = + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" o.Pdfpage.rest with | Some r -> r | None -> o.Pdfpage.mediabox) + in let txmin, tymin, txmax, tymax = + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" u.Pdfpage.rest with | Some r -> r | None -> u.Pdfpage.mediabox) + in + let xmag = (txmax -. txmin) /. (sxmax -. sxmin) + in let ymag = (tymax -. tymin) /. (symax -. symin) in + let scale = + if xmag < 0.999 && ymag < 0.999 then + if xmag > ymag then xmag else ymag + else if xmag >= 1.001 && ymag >= 1.001 then + if xmag > ymag then ymag else xmag + else if xmag >= 1.001 then ymag + else xmag + in + let dx = txmin +. ((txmax -. txmin) -. (sxmax -. sxmin) *. scale) /. 2. + in let dy = tymin +. ((tymax -. tymin) -. (symax -. symin) *. scale) /. 2. in + let scale_op = + Pdfops.Op_cm + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (dx, dy); + Pdftransform.Scale ((sxmin, symin), scale, scale)]) + in + Pdfpage.prepend_operators opdf [scale_op] ~fast o + else + o + in + {u with + Pdfpage.content = + (if isover then ( @ ) else ( @@ )) + [protect_removeme pdf u.Pdfpage.resources u.Pdfpage.content] + [protect_removeme pdf o.Pdfpage.resources o.Pdfpage.content]; + Pdfpage.resources = + combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} + +let stamp (fast : bool) scale_to_fit isover range over pdf = + let marks = Pdfmarks.read_bookmarks pdf in + let pdf = Pdfmarks.remove_bookmarks pdf in + let over = Pdfmarks.remove_bookmarks over in + let pageseqs = ilist 1 (Pdfpage.endpage pdf) in + let over_firstpage_pdf = + match Pdfpage.pages_of_pagetree over with + | [] -> error "empty PDF" + | h::_ -> Pdfpage.change_pages true over [h] + in + let merged = Pdfmerge.merge_pdfs ~rotations:[Pdfmerge.DNR; Pdfmerge.DNR] false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]] in + let renamed_pdf = + Pdfpage.change_pages true + merged (Pdfpage.renumber_pages merged (Pdfpage.pages_of_pagetree merged)) + in + let renamed_pages = Pdfpage.pages_of_pagetree renamed_pdf in + let under_pages, over_page = + all_but_last renamed_pages, last renamed_pages + in + let new_pages = + map2 + (fun pageseq under_page -> + do_stamp fast scale_to_fit isover renamed_pdf + (if mem pageseq range then over_page else + Pdfpage.blankpage Pdfpaper.a4) + under_page over) + pageseqs + under_pages + in + Pdfmarks.add_bookmarks marks (Pdfpage.change_pages true renamed_pdf new_pages) + +(* Combine pages from two PDFs. For now, assume equal length. *) + +(* If [over] has more pages than [under], chop the excess. If the converse, pad +[over] to the same length *) +let equalize_pages under over = + let length_under = Pdfpage.endpage under + in let length_over = Pdfpage.endpage over + in + if length_over > length_under then + under, + (Pdfpage.change_pages true over (take (Pdfpage.pages_of_pagetree over) length_under)) + else if length_under > length_over then + under, + Pdfpage.change_pages true + over + (Pdfpage.pages_of_pagetree over @ + (many (Pdfpage.blankpage Pdfpaper.a4) (length_under - length_over))) + else + under, over + +let combine_pages (fast : bool) under over scaletofit swap equalize = + let marks_under = Pdfmarks.read_bookmarks under in + let marks_over = Pdfmarks.read_bookmarks over in + let under, over = if equalize then equalize_pages under over else under, over in + let under_length = Pdfpage.endpage under + in let over_length = Pdfpage.endpage over in + if under_length <> over_length then raise (Pdf.PDFError "combine_pages: not of equal length") else + let pageseqs_under = ilist 1 (Pdfpage.endpage under) + in let pageseqs_over = ilist 1 (Pdfpage.endpage over) in + let merged = + Pdfmerge.merge_pdfs ~rotations: [Pdfmerge.DNR; Pdfmerge.DNR] false false ["a"; "b"] [under; over] [pageseqs_under; pageseqs_over] in + let renamed_pdf = + Pdfpage.change_pages true + merged (Pdfpage.renumber_pages merged (Pdfpage.pages_of_pagetree merged)) + in + let under_pages, over_pages = + cleave (Pdfpage.pages_of_pagetree renamed_pdf) under_length + in + let new_pages = + map2 (fun o u -> do_stamp fast scaletofit (not swap) renamed_pdf o u over) over_pages under_pages + in + Pdfmarks.add_bookmarks (marks_under @ marks_over) (Pdfpage.change_pages true renamed_pdf new_pages) + +(* \section{Split at bookmarks} *) + +(* Returns empty string on failure. Should only be used in conjunction with +split at bookmarks code, so should never fail, by definiton. *) +let remove_unsafe_characters s = + let chars = + lose + (function x -> + match x with + '/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true + | x when int_of_char x < 32 || int_of_char x > 126 -> true + | _ -> false) + (explode s) + in + match chars with + | '.'::more -> implode more + | chars -> implode chars + +let get_bookmark_name pdf marks splitlevel n _ = + match keep (function m -> n = Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with + | {Pdfmarks.text = title}::_ -> remove_unsafe_characters title + | _ -> "" + +(* @F means filename without extension *) +(* @N means sequence number with no padding *) +(* @S means start page of this section *) +(* @E means end page of this section *) +(* @B means bookmark name at start page *) +let process_others marks pdf splitlevel filename sequence startpage endpage s = + let rec procss prev = function + | [] -> rev prev + | '@'::'F'::t -> procss (rev (explode filename) @ prev) t + | '@'::'N'::t -> procss (rev (explode (string_of_int sequence)) @ prev) t + | '@'::'S'::t -> procss (rev (explode (string_of_int startpage)) @ prev) t + | '@'::'E'::t -> procss (rev (explode (string_of_int endpage)) @ prev) t + | '@'::'B'::t -> procss (rev (explode (get_bookmark_name pdf marks splitlevel startpage pdf)) @ prev) t + | h::t -> procss (h::prev) t + in + implode (procss [] (explode s)) + +let name_of_spec printf marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage = + if printf then + let spec = + string_of_sections (substitute (sections_of_string spec) [n]) + in + process_others marks pdf splitlevel filename n startpage endpage spec + else + let fill l n = + let chars = explode (string_of_int n) in + if length chars > l + then implode (drop chars (length chars - l)) + else implode ((many '0' (l - length chars)) @ chars) + in + let chars = explode spec in + let before, including = cleavewhile (neq '%') chars in + let percents, after = cleavewhile (eq '%') including in + if percents = [] + then + process_others marks pdf splitlevel filename n startpage endpage spec + else + process_others marks pdf splitlevel filename n startpage endpage + (implode before ^ fill (length percents) n ^ implode after) + +(* Find the stem of a filename *) +let stem s = + implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s)))))) + +let fast_write_split_pdfs enc printf splitlevel original_filename linearize nobble spec main_pdf pagenums pdf_pages = + let marks = Pdfmarks.read_bookmarks main_pdf in + iter2 + (fun number pagenums -> + let pdf = nobble (Pdfpage.pdf_of_pages main_pdf pagenums) in + let startpage, endpage = extremes pagenums in + let name = name_of_spec printf marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in + Pdf.remove_unreferenced pdf; + Pdfwrite.pdf_to_file_options linearize enc (not (enc = None)) pdf name) + (indx pagenums) + pagenums + +let split_pdf enc printf original_filename chunksize linearize nobble spec pdf = + let pdf_pages = Pdfpage.pages_of_pagetree pdf in + fast_write_split_pdfs enc printf 0 original_filename linearize nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages + +(* Return list, in order, a *set* of page numbers of bookmarks at a given level *) +let bookmark_pages level pdf = + setify_preserving_order + (option_map + (function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target pdf l.Pdfmarks.target) | _ -> None) + (Pdfmarks.read_bookmarks pdf)) + +let split_at_bookmarks original_filename linearize nobble level spec pdf = + (*flprint "split_at_bookmarks\n";*) + let pdf_pages = Pdfpage.pages_of_pagetree pdf + in let points = bookmark_pages level pdf in + let points = + lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points) (* FIXME: What actually causes these problems? *) + in + (*flprint "Points: "; + iter (Printf.printf "%i ,") points; + flprint "\n";*) + let pts = splitat points (indx pdf_pages) in + (*flprint "Calling fast_write_split_pdfs\n";*) + fast_write_split_pdfs None false level + original_filename linearize nobble spec pdf pts pdf_pages + +(* Called from cpdflib.ml - different from above *) +let split_on_bookmarks pdf level = + let points = lose (eq 0) (map pred (bookmark_pages level pdf)) + in let pdf_pages = Pdfpage.pages_of_pagetree pdf in + let ranges = splitat points (indx pdf_pages) in + map (fun rs -> Pdfpage.pdf_of_pages pdf rs) ranges + +(* Output information for each page *) +let output_page_info pdf = + let pages = Pdfpage.pages_of_pagetree pdf + in let getbox page box = + if box = "/MediaBox" then + match page.Pdfpage.mediabox with + | Pdf.Array [a; b; c; d] -> + Printf.sprintf "%f %f %f %f" + (Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d) + | _ -> "" + else + match Pdf.lookup_direct pdf box page.Pdfpage.rest with + | Some (Pdf.Array [a; b; c; d]) -> + Printf.sprintf "%f %f %f %f" + (Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d) + | _ -> "" + in let rotation page = + Pdfpage.int_of_rotation page.Pdfpage.rotate + in + for pnum = 1 to Pdfpage.endpage pdf do + let page = select pnum pages in + Printf.printf "Page %i:\n" pnum; + Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox"); + Printf.printf "CropBox: %s\n" (getbox page "/CropBox"); + Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox"); + Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox"); + Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox"); + Printf.printf "Rotation: %i\n" (rotation page) + done + +(* Does the page have a defined box e.g "/CropBox" *) +let hasbox pdf page boxname = + let pages = Pdfpage.pages_of_pagetree pdf in + if page > length pages || page < 1 then raise (Failure "hasbox: bad page") else + let p = select page pages in + match Pdf.lookup_direct pdf boxname p.Pdfpage.rest with + | Some _ -> true + | _ -> false + +(* \section{Print metadata} *) +let get_metadata pdf = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | None -> error "malformed file" + | Some root -> + match Pdf.lookup_direct pdf "/Metadata" root with + | Some ((Pdf.Stream _) as s) -> + Pdf.getstream s; + begin match s with + | Pdf.Stream {contents = (_, Pdf.Got data)} -> data + | _ -> assert false + end + | _ -> mkbytes 0 + +let print_metadata pdf = + let data = get_metadata pdf in + for x = 0 to bytes_size data - 1 do + Printf.printf "%c" (char_of_int (bget data x)) + done + +(* \section{Print font data} *) +let list_font pdf page (name, dict) = + let subtype = + match Pdf.lookup_direct pdf "/Subtype" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in let basefont = + match Pdf.lookup_direct pdf "/BaseFont" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in let encoding = + match Pdf.lookup_direct pdf "/Encoding" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in + (*i Printf.printf + "%i %s %s %s %s\n" i*) + page, name, subtype, basefont, encoding + +let list_fonts pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + flatten + (map + (fun (num, page) -> + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some (Pdf.Dictionary fontdict) -> + map (list_font pdf num) fontdict + | _ -> []) + (combine (ilist 1 (length pages)) pages)) + +let string_of_font (p, n, s, b, e) = + Printf.sprintf "%i %s %s %s %s\n" p n s b e + +let print_fonts pdf = + flprint + (fold_left ( ^ ) "" (map string_of_font (list_fonts pdf))) + +(* \section{Nobbling for Demo Version} *) +let nobble_page pdf _ page = + let minx, miny, maxx, maxy = + (* Use cropbox if available *) + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> r + | None -> page.Pdfpage.mediabox) + in + let fontdict = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | None -> Pdf.Dictionary [] + | Some d -> d + in + let fontname = Pdf.unique_key "F" fontdict in + let width = maxx -. minx in let height = maxy -. miny in + let scalex = + (width *. 1000.) /. float (Pdfstandard14.textwidth false Pdftext.Helvetica "DEMO") + in + let page' = + let font = + Pdf.Dictionary + [("/Type", Pdf.Name "/Font"); + ("/Subtype", Pdf.Name "/Type1"); + ("/BaseFont", Pdf.Name "/Helvetica")] + in let ops = + [Pdfops.Op_BMC "/CPDFSTAMP"; + Pdfops.Op_cm + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (minx, miny +. height /. 2.)]); + Pdfops.Op_gs "/gs0"; + Pdfops.Op_BT; + Pdfops.Op_Tf (fontname, scalex); + Pdfops.Op_Tj "DEMO"; + Pdfops.Op_ET; + Pdfops.Op_EMC] + in + {(Pdfpage.blankpage Pdfpaper.a4) with + Pdfpage.mediabox = page.Pdfpage.mediabox; + Pdfpage.content = [Pdfops.stream_of_ops ops]; + Pdfpage.resources = + Pdf.Dictionary + [("/Font", Pdf.Dictionary [(fontname, font)]); + ("/ExtGState", Pdf.Dictionary + ["/gs0", + Pdf.Dictionary["/Type", Pdf.Name "/ExtGState"; "/ca", Pdf.Real 0.2]]); + ] + } + in + do_stamp false false true pdf page' page (Pdf.empty ()) + +(* \section{Superimpose text, page numbers etc.} *) + +type position = + | PosCentre of float * float + | PosLeft of float * float + | PosRight of float * float + | Top of float + | TopLeft of float + | TopRight of float + | Left of float + | BottomLeft of float + | Bottom of float + | BottomRight of float + | Right of float + | Diagonal + | ReverseDiagonal + +let string_of_position = function + | PosCentre (a, b) -> Printf.sprintf "PosCentre %f %f" a b + | PosLeft (a, b) -> Printf.sprintf "PosLeft %f %f" a b + | PosRight (a, b) -> Printf.sprintf "PosRight %f %f" a b + | Top a -> Printf.sprintf "Top %f" a + | TopLeft a -> Printf.sprintf "TopLeft %f" a + | TopRight a -> Printf.sprintf "TopRight %f" a + | Left a -> Printf.sprintf "Left %f" a + | BottomLeft a -> Printf.sprintf "BottomLeft %f" a + | Bottom a -> Printf.sprintf "Bottom %f" a + | BottomRight a -> Printf.sprintf "BottomRight %f" a + | Right a -> Printf.sprintf "Right %f" a + | Diagonal -> "Diagonal" + | ReverseDiagonal -> "Reverse Diagonal" + +type orientation = + | Horizontal + | Vertical + | VerticalDown + +type justification = LeftJustify | CentreJustify | RightJustify + +(* Given the mediabox, calculate an absolute position for the text. *) +let calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos = + (*i Printf.printf "calculate_position %b %f %f %f %f %f %b\n" ignore_d w xmin ymin xmax ymax shorterside; i*) + let rot = if orientation = VerticalDown then rad_of_deg 270. else 0. in + match pos with + | Diagonal -> + let angle = atan ((ymax -. ymin) /. (xmax -. xmin)) + in let cx, cy = (xmax +. xmin) /. 2., (ymax +. ymin) /. 2. in + let dl = w /. 2. in + let dx = dl *. cos angle + in let dy = dl *. sin angle in + (*i Printf.printf "Diagonal: angle = %f, cx = %f, cy = %f, dx = %f, dy = %f\n" angle cx cy dx dy; i*) + cx -. dx, cy -. dy, angle + | ReverseDiagonal -> + (*flprint "REVERSE DIAGONAL IN CALCULATE POSITION\n";*) + let angle = atan ((ymax -. ymin) /. (xmax -. xmin)) + in let cx, cy = (xmax +. xmin) /. 2., (ymax +. ymin) /. 2. in + let dl = w /. 2. in + let dx = dl *. cos angle + in let dy = dl *. sin angle in + (*Printf.printf "Diagonal: angle = %f\n" (deg_of_rad angle);*) + cx -. dx, (ymax +. ymin) -. (cy -. dy), angle -. ((2. *. pi) -. ((pi -. (2. *. angle)) *. 2.) /. 2.) +. pi + | PosLeft (x, y) -> xmin +. x, ymin +. y, rot + | PosCentre (x, y) -> xmin +. x -. (w /. 2.), ymin +. y, rot + | PosRight (x, y) -> xmin +. x -. w, ymin +. y, rot + | Top d -> + let d = if ignore_d then 0. else d in + (xmin +. xmax) /. 2. -. w /. 2., ymax -. d, rot + | TopLeft d -> + let d = if ignore_d then 0. else d in + xmin +. d, ymax -. d, rot + | TopRight d -> + let d = if ignore_d then 0. else d in + xmax -. d -. w, ymax -. d, rot + | Left d -> + let d = if ignore_d then 0. else d in + xmin +. d, (ymax +. ymin) /. 2., rot + | BottomLeft d -> + let d = if ignore_d then 0. else d in + xmin +. d, ymin +. d, rot + | Bottom d -> + let d = if ignore_d then 0. else d in + (xmin +. xmax) /. 2. -. w /. 2., ymin +. d, rot + | BottomRight d -> + let d = if ignore_d then 0. else d in + xmax -. d -. w, ymin +. d, rot + | Right d -> + let d = if ignore_d then 0. else d in + xmax -. d -. w, (ymax +. ymin) /. 2., rot + +(* Process UTF8 text to /WinAnsiEncoding string. *) +let winansi_of_utf8 s = + (*flprint "winansi_of_utf8:"; + iter (Printf.printf "%C ") (explode s); + flprint "\n";*) + let extractor = Pdftext.charcode_extractor_of_encoding Pdftext.WinAnsiEncoding + and codepoints = Pdftext.codepoints_of_utf8 s in + (*flprint "codepoints after Pdftext.codepoints_of_utf8\n"; + iter (Printf.printf "%i ") codepoints; + flprint "\ndone\n";*) + implode (map char_of_int (option_map extractor codepoints)) + +(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *) +let utf8_of_winansi s = + let text_extractor = + Pdftext.text_extractor_of_font + (Pdf.empty ()) + (Pdf.Dictionary + [("/BaseFont", Pdf.Name "/TimesRoman"); + ("/Subtype", Pdf.Name "/Type1"); + ("/Encoding", Pdf.Name "/WinAnsiEncoding")]) + in + let codepoints = Pdftext.codepoints_of_text text_extractor s in + Pdftext.utf8_of_codepoints codepoints + +(* Get the width of some text in the given font *) +let width_of_text font text = + match font with + | Pdftext.SimpleFont {Pdftext.fontmetrics = Some fontmetrics} -> + begin try + fold_left ( +. ) 0. (map (fun c -> fontmetrics.(int_of_char c)) (explode text)) + with + _ -> 0. + end + | _ -> 0. + +type ops_metrics = + {metrics_text : string; + metrics_x : float; + metrics_y : float; + metrics_rot : float} + +let ops_metrics : ops_metrics list ref = ref [] + +let ops_baseline_adjustment = ref 0. + +let metrics_howmany () = length !ops_metrics + +let metrics_text n = + utf8_of_winansi (select n !ops_metrics).metrics_text + +let metrics_x n = + (select n !ops_metrics).metrics_x + +let metrics_y n = + (select n !ops_metrics).metrics_y + +let metrics_rot n = + (select n !ops_metrics).metrics_rot + +let metrics_baseline_adjustment () = !ops_baseline_adjustment + +let ops longest_w metrics x y rotate hoffset voffset outline linewidth unique_fontname unique_extgstatename colour fontsize text = + if metrics then + ops_metrics := + {metrics_text = text; metrics_x = x -. hoffset; metrics_y = y -. voffset; metrics_rot = rotate} + ::!ops_metrics; + [Pdfops.Op_q; + Pdfops.Op_BMC "/CPDFSTAMP"; + Pdfops.Op_cm + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (x -. hoffset, y -. voffset); + Pdftransform.Rotate ((0., 0.), rotate)]); + Pdfops.Op_BT; + ] @ + (if outline then [Pdfops.Op_w linewidth; Pdfops.Op_Tr 1] else [Pdfops.Op_Tr 0]) @ + [ + (match colour with (r, g, b) -> Pdfops.Op_rg (r, g, b)); + (match colour with (r, g, b) -> Pdfops.Op_RG (r, g, b))] + @ + (match unique_extgstatename with None -> [] | Some n -> [Pdfops.Op_gs n]) + @ + [Pdfops.Op_Tf (unique_fontname, fontsize); + Pdfops.Op_Tj text; + Pdfops.Op_ET; + Pdfops.Op_EMC; + Pdfops.Op_Q] + +(* Find the h-offset for justification based on the longest width, the current +width, the justification and the position. *) +let find_justification_offsets longest_w w position = function + | LeftJustify -> + begin match position with + | TopLeft _ | Left _ | PosLeft _ | BottomLeft _ -> 0. + | Top _ | PosCentre _ | Bottom _ -> (longest_w -. w) /. 2. + | TopRight _ | BottomRight _ | PosRight _ | Right _ -> longest_w -. w + | Diagonal -> 0. + | ReverseDiagonal -> 0. + end + | RightJustify -> + begin match position with + | TopLeft _ | Left _ | PosLeft _ | BottomLeft _ -> ~-.(longest_w -. w) + | Top _ | PosCentre _ | Bottom _ -> ~-.((longest_w -. w) /. 2.) + | TopRight _ | BottomRight _ | PosRight _ | Right _ -> 0. + | Diagonal -> 0. + | ReverseDiagonal -> 0. + end + | CentreJustify -> + begin match position with + | TopLeft _ | Left _ | PosLeft _ | BottomLeft _ -> ~-.((longest_w -. w) /. 2.) + | Top _ | PosCentre _ | Bottom _ -> 0. + | TopRight _ | BottomRight _ | PosRight _ | Right _ -> (longest_w -. w) /. 2. + | Diagonal -> 0. + | ReverseDiagonal -> 0. + end + +let addtext + metrics lines linewidth outline fast colour fontname bates fontsize font + underneath position hoffset voffset text pages orientation cropbox opacity + justification filename pdf += + let endpage = Pdfpage.endpage pdf in + let replace_pairs pdf filename bates num = + ["%Page", string_of_int num; + "%Roman", roman_upper num; + "%roman", roman_lower num; + "%filename", filename; + "%Label", pagelabel pdf num; + "%EndPage", string_of_int endpage; + "%EndLabel", pagelabel pdf endpage; + "%Bates", string_of_int (bates + num - 1)] in + let addtext_page num page = + let resources', unique_extgstatename = + if opacity < 1.0 then + let dict = + match Pdf.lookup_direct pdf "/ExtGState" page.Pdfpage.resources with + | Some d -> d + | None -> Pdf.Dictionary [] + in + let unique_extgstatename = Pdf.unique_key "gs" dict in + let dict' = + Pdf.add_dict_entry dict unique_extgstatename + (Pdf.Dictionary [("/ca", Pdf.Real opacity); ("/CA", Pdf.Real opacity)]) + in + Pdf.add_dict_entry page.Pdfpage.resources "/ExtGState" dict', Some unique_extgstatename + else + page.Pdfpage.resources, None + in + let fontdict = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | None -> Pdf.Dictionary [] + | Some d -> d + in + let unique_fontname = Pdf.unique_key "F" fontdict in + let ops = + let text = process_text text (replace_pairs pdf filename bates num) in + let calc_textwidth text = + match font with + | Some f -> + (* FIXME This is a bit wrong in the prescence of special + characters due to standard encoding not win encoding being + used in textwidth. When we have new AFM parsing up and + running, can improve. *) + let rawwidth = Pdfstandard14.textwidth false f text in + (float rawwidth *. fontsize) /. 1000. + | None -> + let font = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some fontdict -> + begin match Pdf.lookup_direct pdf fontname fontdict with + | Some font -> font + | _ -> failwith "addtext: bad font" + end + | _ -> failwith "addtext: bad font" + in + let rawwidth = width_of_text (Pdftext.read_font pdf font) text in + (rawwidth *. fontsize) /. 1000. + in + let expanded_lines = + map (function text -> process_text text (replace_pairs pdf filename bates num)) lines + in + let textwidth = calc_textwidth text + and allwidths = map calc_textwidth expanded_lines in + let longest_w = last (sort compare allwidths) in + let joffset = find_justification_offsets longest_w textwidth position justification in + let mediabox = + if cropbox then + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some pdfobject -> Pdf.parse_rectangle (Pdf.direct pdf pdfobject) + | None -> Pdf.parse_rectangle page.Pdfpage.mediabox + else + Pdf.parse_rectangle page.Pdfpage.mediabox + in + let x, y, rotate = calculate_position false textwidth mediabox orientation position in + let hoffset, voffset = + if position = Diagonal || position = ReverseDiagonal + then -. (cos ((pi /. 2.) -. rotate) *. voffset), sin ((pi /. 2.) -. rotate) *. voffset + else hoffset, voffset + in + match font with + | Some f -> + ops longest_w metrics x y rotate (hoffset +. joffset) voffset outline linewidth + unique_fontname unique_extgstatename colour fontsize text + | None -> + ops longest_w metrics x y rotate (hoffset +. joffset) voffset outline linewidth + fontname None colour fontsize text + in + let newresources = + match font with + | Some _ -> + let thefont = + Pdf.Dictionary + [("/Type", Pdf.Name "/Font"); + ("/Encoding", Pdf.Name "/WinAnsiEncoding"); + ("/Subtype", Pdf.Name "/Type1"); + ("/BaseFont", Pdf.Name ("/" ^ fontname))] + in + let newfontdict = Pdf.add_dict_entry fontdict unique_fontname thefont in + Pdf.add_dict_entry resources' "/Font" newfontdict + | None -> page.Pdfpage.resources + in + let page = {page with Pdfpage.resources = newresources} in + if underneath + then Pdfpage.prepend_operators pdf ops ~fast:fast page + else Pdfpage.postpend_operators pdf ops ~fast:fast page + in + if metrics then + (ignore (iter_pages (fun a b -> ignore (addtext_page a b)) pdf pages); pdf) + else + process_pages addtext_page pdf pages + +(* Prev is a list of lists of characters *) +let split_at_newline t = + let rec split_at_newline_inner prev = function + | [] -> rev (map implode (map rev prev)) + | '\\'::'n'::t -> split_at_newline_inner ([]::prev) t + | h::t -> split_at_newline_inner ((h::hd prev)::tl prev) t + in + split_at_newline_inner [[]] (explode t) + +let rec unescape_chars prev = function + | [] -> rev prev + | '\\'::('0'..'9' as a)::('0'..'9' as b)::('0'..'9' as c)::t -> + let chr = char_of_int (int_of_string ("0o" ^ implode [a;b;c])) in + unescape_chars (chr::prev) t + | '\\'::c::t when c <> 'n' -> unescape_chars (c::prev) t + | h::t -> unescape_chars (h::prev) t + +let unescape_string s = + implode (unescape_chars [] (explode s)) + +let + addtexts metrics linewidth outline fast fontname font bates colour position linespacing + fontsize underneath text pages orientation cropbox opacity justification midline filename pdf += + (*flprint "addtexts:\n"; + iter (Printf.printf "%C ") (explode text); + flprint "\n"; + Printf.printf "\nCpdf.addtexts: metrics = %b" metrics; + flprint "\n";*) + (*Printf.printf "linewidth = %f\n" linewidth; + Printf.printf "outline = %b\n" outline; + Printf.printf "fast = %b\n" fast; + Printf.printf "fontname = %s\n" fontname; + Printf.printf "winansi text = %s\n" text; + Printf.printf "position = %s\n" (string_of_position position); + Printf.printf "bates = %i\n" bates; + Printf.printf "linespacing = %f\n" linespacing; + Printf.printf "fontsize = %f\n" fontsize; + Printf.printf "underneath = %b\n" underneath; + Printf.printf "font = %s\n" begin match font with None -> "None" | Some x -> Pdftext.string_of_standard_font x end; + Printf.printf "justification = %s\n" + begin match justification with LeftJustify -> "left" | RightJustify -> "right" | CentreJustify -> "centre" end; + Printf.printf "midline = %b\n" midline; + begin match colour with r, g, b -> Printf.printf "%f, %f, %f\n" r g b end; + Printf.printf "opacity = %f\n" opacity; + flprint "\n"; + Printf.printf "relative-to-cropbox = %b" cropbox; + flprint "\n";*) + ops_metrics := []; + let text = winansi_of_utf8 text in + let text = unescape_string text in + let lines = split_at_newline text + and pdf = ref pdf in + let voffset = + match position with + | Bottom _ | BottomLeft _ | BottomRight _ -> + ref (0. -. (linespacing *. fontsize *. (float (length lines) -. 1.))) + | Left _ | Right _ -> + (* Vertically align *) + ref (0. -. (linespacing *. ((fontsize *. (float (length lines) -. 1.)) /. 2.))) + | Diagonal | ReverseDiagonal -> + (* Change so that the whole paragraph sits on the centre... *) + ref (0. -. ((linespacing *. fontsize *. (float (length lines) -. 1.)) /. 2.)) + | _ -> ref 0. + in + if midline then + begin match font with + | Some font -> + let baseline_adjustment = + (fontsize *. float (Pdfstandard14.baseline_adjustment font)) /. 1000. + in + ops_baseline_adjustment := baseline_adjustment; + voffset := !voffset +. baseline_adjustment + | _ -> + ops_baseline_adjustment := 0. + end + else + ops_baseline_adjustment := 0.; + iter + (fun line -> + let voff, hoff = + if orientation = Vertical then 0., -.(!voffset) else !voffset, 0. + in + pdf := + addtext metrics lines linewidth outline fast colour fontname bates fontsize font + underneath position hoff voff line pages orientation cropbox opacity justification filename !pdf; + voffset := !voffset +. (linespacing *. fontsize)) + lines; + ops_metrics := rev !ops_metrics; + !pdf + +let removetext range pdf = + (* Could fail on nesting, or other marked content inside our marked content.*) + let rec remove_until_last_EMC level = function + | [] -> [] + | Pdfops.Op_BMC "/CPDFSTAMP"::more -> + remove_until_last_EMC (level + 1) more + | Pdfops.Op_EMC::more -> + if level = 1 + then more + else remove_until_last_EMC (level - 1) more + | _::more -> + remove_until_last_EMC level more + in + let rec remove_stamps prev = function + | [] -> rev prev + | Pdfops.Op_BMC "/CPDFSTAMP"::more -> + let rest = remove_until_last_EMC 1 more in + remove_stamps prev rest + | h::t -> remove_stamps (h::prev) t + in + let removetext_page _ page = + {page with + Pdfpage.content = + let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in + [Pdfops.stream_of_ops (remove_stamps [] ops)]} + in + process_pages removetext_page pdf range + +(* \section{Padding with blank pages.} *) +let insert_after pos page pages = + let before, after = cleave pages pos in + before @ [page] @ after + +(* Insert many. *) +let rec insert_after_many pages = function + | [] -> pages + | (pos, page)::more -> + let pages' = insert_after pos page pages in + insert_after_many pages' (map (fun (p, pa) -> p + 1, pa) more) + +let pad range pdf i = + let pages = Pdfpage.pages_of_pagetree pdf in + let blankpages = + map + (fun n -> + {Pdfpage.content = []; + Pdfpage.mediabox = (select (n + i) pages).Pdfpage.mediabox; + Pdfpage.resources = Pdf.Dictionary []; + Pdfpage.rotate = (select (n + i) pages).Pdfpage.rotate; + Pdfpage.rest = (select (n + i) pages).Pdfpage.rest}) + range + in + let pages' = insert_after_many pages (combine range blankpages) in + Pdfpage.change_pages true pdf pages' + +let padafter range pdf = + let isinpdf n = mem n (ilist 1 (Pdfpage.endpage pdf)) in + if not (fold_left ( && ) true (map isinpdf range)) then + raise (Failure "padafter: range contains pages not present in pdf"); + pad range pdf 0 + +let padbefore range pdf = + let isinpdf n = mem n (ilist 1 (Pdfpage.endpage pdf)) in + if not (fold_left ( && ) true (map isinpdf range)) then + raise (Failure "padbefore: range contains pages not present in pdf"); + pad (map pred range) pdf 1 + +let padmultiple n pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + let len = length pages in + let pages_to_add = if len / n * n = len then 0 else n - (len mod n) in + if pages_to_add > 0 then + let blankpages = + many + {Pdfpage.content = []; + Pdfpage.mediabox = (select len pages).Pdfpage.mediabox; + Pdfpage.resources = Pdf.Dictionary []; + Pdfpage.rotate = (select len pages).Pdfpage.rotate; + Pdfpage.rest = (select len pages).Pdfpage.rest} + pages_to_add + in + Pdfpage.change_pages true pdf (pages @ blankpages) + else + pdf + +(* \section{Shift page data} *) +let make_mediabox (xmin, ymin, xmax, ymax) = + Pdf.Array + [Pdf.Real xmin; Pdf.Real ymin; Pdf.Real xmax; Pdf.Real ymax] + +(* Change the media box and other known boxes by the function [f] which takes +xmin, xmax, ymin, ymax as input. *) +let change_boxes f pdf page = + let names = ["/TrimBox"; "/ArtBox"; "/CropBox"; "/BleedBox"] + in let getbox n = + Pdf.lookup_direct pdf n page.Pdfpage.rest + in + let boxes = combine names (map getbox names) in + let toreplace = lose (function (_, None) -> true | _ -> false) boxes in + let toreplace = + map + (function (name, Some value) -> (name, value) | _ -> assert false) + toreplace + in + let rest' = + fold_left + (fun e (k, v) -> + let v = + make_mediabox (f (Pdf.parse_rectangle v)) + in + Pdf.replace_dict_entry e k v) + page.Pdfpage.rest + toreplace + in + {page with + Pdfpage.mediabox = + make_mediabox (f (Pdf.parse_rectangle page.Pdfpage.mediabox)); + Pdfpage.rest = rest'} + +(* The content is flipped by altering any use of [Op_cm]. But we must also +alter any /Matrix entries in pattern dictionaries for tiled and shading +patterns. In addition, shadings used by Op_sh in the main page content and in +xobjects must be altered. *) +(*let rec change_shadings pdf tr resources = + let transform_shading s = + s + in + try + let resources = + begin match Pdf.lookup_direct pdf "/Shading" resources with + | Some (Pdf.Dictionary shadings) -> + let names, nums = + split + (map + (fun (name, shading) -> + Printf.printf "Fixing up shading %s\n" name; + name, Pdf.addobj pdf (transform_shading shading)) + shadings) + in + let entries = + map2 (fun name num -> name, Pdf.Indirect num) names nums + in + Pdf.add_dict_entry resources "/Shading" (Pdf.Dictionary entries) + | _ -> resources + end + in + let process_xobject xobject = + change_shadings pdf tr xobject + in + begin match Pdf.lookup_direct pdf "/XObject" resources with + | Some (Pdf.Dictionary xobjects) -> + let names, nums = + split + (map + (fun (name, xobject) -> + Printf.printf "Looking for shadings in xobject %s\n" name; + name, Pdf.addobj pdf (process_xobject xobject)) + xobjects) + in + let entries = + map2 (fun name num -> name, Pdf.Indirect num) names nums + in + Pdf.add_dict_entry resources "/XObject" (Pdf.Dictionary entries) + | _ -> resources + end + with + _ -> resources*) + +let change_pattern_matrices pdf tr resources = + try + (*let resources = change_shadings pdf tr resources in*) + begin match Pdf.lookup_direct pdf "/Pattern" resources with + | Some (Pdf.Dictionary patterns) -> + let names, nums = + split + (map + (fun (name, p) -> + let old_pattern = Pdf.direct pdf p in + let new_pattern = + let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in + let new_tr = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) existing_tr in + Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr) + in + name, Pdf.addobj pdf new_pattern) + patterns) + in + let entries = + map2 (fun name num -> name, Pdf.Indirect num) names nums + in + Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries) + | _ -> resources + end + with + Pdftransform.NonInvertable -> resources + +let shift_page ?(fast=false) dx dy pdf _ page = + let transform_op = + Pdfops.Op_cm (Pdftransform.matrix_of_op (Pdftransform.Translate (dx, dy))) + in + let resources' = + change_pattern_matrices pdf (Pdftransform.mktranslate ~-.dx ~-.dy) page.Pdfpage.resources + in + Pdfpage.prepend_operators pdf [transform_op] ~fast {page with Pdfpage.resources = resources'} + +let shift_pdf ?(fast=false) dx dy pdf range = + process_pages (shift_page ~fast dx dy pdf) pdf range + +(* Change a page's media box so its minimum x and y are 0, making other +operations simpler to think about. Any shift that is done is reflected in +other boxes (clip etc.) *) +let rectify_boxes ?(fast=false) pdf page = + let minx, miny, _, _ = + Pdf.parse_rectangle page.Pdfpage.mediabox + in + let f (iminx, iminy, imaxx, imaxy) = + iminx -. minx, iminy -. miny, imaxx -. minx, imaxy -. miny + in + let page = change_boxes f pdf page in + if minx <> 0. || miny <> 0. + then shift_page ~fast (-.minx) (-.miny) pdf 0 page + else page + +(* \section{Flip pages} *) +let flip_page ?(fast=false) transform_op pdf _ page = + let minx, miny, maxx, maxy = + Pdf.parse_rectangle page.Pdfpage.mediabox + in + let tr = transform_op minx miny maxx maxy in + let resources = + change_pattern_matrices pdf tr page.Pdfpage.resources + in + Pdfpage.prepend_operators pdf [Pdfops.Op_cm tr] ~fast {page with Pdfpage.resources = resources} + +let vflip_pdf ?(fast=false) pdf range = + let transform_op _ miny _ maxy = + Pdftransform.matrix_of_op + (Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.)) + in + process_pages (flip_page ~fast transform_op pdf) pdf range + +let hflip_pdf ?(fast=false) pdf range = + let transform_op minx _ maxx _ = + Pdftransform.matrix_of_op + (Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.)) + in + process_pages (flip_page ~fast transform_op pdf) pdf range + +(* \section{Set media box} *) +let set_mediabox x y w h pdf range = + let crop_page _ page = + {page with + Pdfpage.mediabox = + (Pdf.Array + [Pdf.Real x; Pdf.Real y; + Pdf.Real (x +. w); Pdf.Real (y +. h)])} + in + process_pages crop_page pdf range + +let setBox box minx maxx miny maxy pdf range = + let set_box_page _ page = + {page with + Pdfpage.rest = + Pdf.add_dict_entry + page.Pdfpage.rest box + (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} + in + process_pages set_box_page pdf range + +(* \section{Cropping} *) +let crop_pdf x y w h pdf range = + let crop_page _ page = + {page with + Pdfpage.rest = + (Pdf.add_dict_entry + page.Pdfpage.rest + "/CropBox" + (Pdf.Array + [Pdf.Real x; Pdf.Real y; + Pdf.Real (x +. w); Pdf.Real (y +. h)]))} + in + process_pages crop_page pdf range + +let remove_cropping_pdf pdf range = + let remove_cropping_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")} + in + process_pages remove_cropping_page pdf range + +let remove_trim_pdf pdf range = + let remove_trim_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")} + in + process_pages remove_trim_page pdf range + +let remove_art_pdf pdf range = + let remove_art_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")} + in + process_pages remove_art_page pdf range + +let remove_bleed_pdf pdf range = + let remove_bleed_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")} + in + process_pages remove_bleed_page pdf range + +(* \section{Rotating pages} *) +let rotate_pdf r pdf range = + let rotate_page _ page = + {page with Pdfpage.rotate = + Pdfpage.rotation_of_int r} + in + process_pages rotate_page pdf range + +let rotate_pdf_by r pdf range = + let rotate_page_by _ page = + {page with Pdfpage.rotate = + Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)} + in + process_pages rotate_page_by pdf range + +let rotate_page_contents ~fast rotpoint r pdf _ page = + let rotation_point = + match rotpoint with + | None -> + let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in + (minx +. maxx) /. 2., (miny +. maxy) /. 2. + | Some point -> point + in + let tr = + Pdftransform.matrix_of_op + (Pdftransform.Rotate (rotation_point, -.(rad_of_deg r))) + in let tr2 = + Pdftransform.matrix_of_op + (Pdftransform.Rotate (rotation_point, rad_of_deg r)) + in + let transform_op = Pdfops.Op_cm tr in + let resources' = change_pattern_matrices pdf tr2 page.Pdfpage.resources in + Pdfpage.prepend_operators pdf [transform_op] ~fast {page with Pdfpage.resources = resources'} + +let rotate_contents ?(fast=false) r pdf range = + process_pages (rotate_page_contents ~fast None r pdf) pdf range + +(* Return the pages from the pdf in the range, unordered. *) +let select_pages range pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + option_map (function n -> try Some (select n pages) with _ -> None) range + + +(* Upright functionality *) + +(* If all pages are already upright, do nothing to save time. *) +let allupright range pdf = + let page_is_upright page = + page.Pdfpage.rotate = Pdfpage.Rotate0 + in + not (mem false (map page_is_upright (select_pages range pdf))) + +let upright_transform page = + let rotate = + Pdfpage.int_of_rotation page.Pdfpage.rotate + and cx, cy = + let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in + (minx +. maxx) /. 2., (miny +. maxy) /. 2. + in + Pdftransform.mkrotate (cx, cy) (rad_of_deg (~-.(float rotate))) + +let transform_boxes tr pdf page = + let f (minx, miny, maxx, maxy) = + let minx, miny = Pdftransform.transform_matrix tr (minx, miny) + and maxx, maxy = Pdftransform.transform_matrix tr (maxx, maxy) in + (minx, miny, maxx, maxy) + in + change_boxes f pdf page + +let transform_contents ?(fast=false) tr pdf page = + let transform_op = Pdfops.Op_cm tr in + let resources' = change_pattern_matrices pdf (Pdftransform.matrix_invert tr) page.Pdfpage.resources in + Pdfpage.prepend_operators pdf [transform_op] ~fast {page with Pdfpage.resources = resources'} + +let upright ?(fast=false) range pdf = + if allupright range pdf then pdf else + let upright_page _ _ page = + let tr = upright_transform page in + let page = transform_boxes tr pdf page in + let page = transform_contents ~fast tr pdf page in + rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0} + in + process_pages (upright_page pdf) pdf range + +(* \section{Scale page data} *) +let scale_pdf ?(fast=false) sx sy pdf range = + let scale_page _ page = + let f (xmin, ymin, xmax, ymax) = + xmin *. sx, ymin *. sy, xmax *. sx, ymax *. sy + in + let page = change_boxes f pdf page + and matrix = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), sx, sy)) in + let transform_op = + Pdfops.Op_cm matrix + and resources' = + change_pattern_matrices pdf (Pdftransform.matrix_invert matrix) page.Pdfpage.resources + in + Pdfpage.prepend_operators pdf ~fast [transform_op] {page with Pdfpage.resources = resources'} + in + process_pages scale_page pdf range + +(* Scale to fit page of size x * y *) +(* FIXME: Can we do this in terms of scale_contents - and then just fix up the boxes? For 1.8 *) +let scale_to_fit_pdf ?(fast=false) input_scale x y op pdf range = + let scale_page_to_fit _ page = + let matrix = + let (minx, miny, maxx, maxy) = + (* Use cropbox if available *) + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> r + | None -> page.Pdfpage.mediabox) + in + if maxx <= 0. || maxy <= 0. then failwith "Zero-sized pages are invalid" else + let fx = x /. maxx in let fy = y /. maxy in + let scale = fmin fx fy *. input_scale in + let trans_x = (x -. (maxx *. scale)) /. 2. + in let trans_y = (y -. (maxy *. scale)) /. 2. in + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (trans_x, trans_y); + Pdftransform.Scale ((0., 0.), scale, scale)]) + in + let page = + change_boxes + (function (minx, miny, maxx, maxy) -> 0., 0., x, y (* FIXME: scale boxes properly *)) + pdf page + in + Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast + {page with Pdfpage.resources = change_pattern_matrices pdf (Pdftransform.matrix_invert matrix) page.Pdfpage.resources} + in + process_pages scale_page_to_fit pdf range + +(* Scale contents *) +let scale_page_contents ?(fast=false) scale position pdf _ page = + let (minx, miny, maxx, maxy) as box = + (* Use cropbox if available *) + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> r + | None -> page.Pdfpage.mediabox) + in + let sx, sy, _ = calculate_position true 0. box Horizontal position in + let tx, ty = + match position with + | Top t -> 0., -.t + | TopLeft t -> t, -.t + | TopRight t -> 0., -.t + | Left t -> t, 0. + | BottomLeft t -> t, t + | Bottom t -> 0., t + | BottomRight t -> -.t, -.t + | Right t -> -.t, 0. + | _ -> 0., 0. (* centre it... FIXME: We will add a center position, eventually, for text and this... *) + in + let transform = + Pdftransform.matrix_of_transform + [Pdftransform.Translate (tx, ty); + Pdftransform.Scale ((sx, sy), scale, scale)] + in + let transform_op = Pdfops.Op_cm transform in + let resources' = change_pattern_matrices pdf transform page.Pdfpage.resources in + Pdfpage.prepend_operators pdf [transform_op] ~fast {page with Pdfpage.resources = resources'} + +let scale_contents ?(fast=false) position scale pdf range = + process_pages (scale_page_contents ~fast scale position pdf) pdf range + +(* \section{List annotations} *) +let get_annotation_string encoding pdf annot = + match Pdf.lookup_direct pdf "/Contents" annot with + | Some (Pdf.String s) -> encode_output encoding s + | _ -> "" + +let print_annotation encoding pdf s = + let s = get_annotation_string encoding pdf s in + flprint "------------------------------------------------------------------------\n"; + flprint s; + flprint "\n" + +let list_page_annotations encoding pdf page = + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> + iter (print_annotation encoding pdf) (map (Pdf.direct pdf) annots) + | _ -> () + +let list_annotations encoding pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + iter (list_page_annotations encoding pdf) pages + +let get_annotations encoding pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + flatten + (map2 + (fun page pagenumber -> + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> + let strings = + map (get_annotation_string encoding pdf) (map (Pdf.direct pdf) annots) + in + combine (many pagenumber (length strings)) strings + | _ -> []) + pages + (ilist 1 (length pages))) + +let list_annotations_more pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + iter2 + (fun page pagenumber -> + iter + (function annot -> + let print_annot annot = + let annot_type_string = + match annot.Pdfannot.subtype with + | Pdfannot.Stamp -> "Stamp" + | Pdfannot.Text -> "Text" + | Pdfannot.Link -> "Link" + | Pdfannot.FreeText -> "FreeText" + | Pdfannot.Line -> "Line" + | Pdfannot.Square -> "Square" + | Pdfannot.Circle -> "Circle" + | Pdfannot.Polygon -> "Polygon" + | Pdfannot.PolyLine -> "PolyLine" + | Pdfannot.Highlight -> "Highlight" + | Pdfannot.Underline -> "Underline" + | Pdfannot.Squiggly -> "Squiggly" + | Pdfannot.StrikeOut -> "StrikeOut" + | Pdfannot.Caret -> "Caret" + | Pdfannot.Ink -> "Ink" + | Pdfannot.Popup _ -> "Popup" + | Pdfannot.FileAttachment -> "FileAttachment" + | Pdfannot.Sound -> "Sound" + | Pdfannot.Movie -> "Movie" + | Pdfannot.Widget -> "Widget" + | Pdfannot.Screen -> "Screen" + | Pdfannot.PrinterMark -> "PrinterMark" + | Pdfannot.TrapNet -> "TrapNet" + | Pdfannot.Watermark -> "Watermark" + | Pdfannot.ThreeDee -> "ThreeDee" + | Pdfannot.Unknown -> "Unknown" + in let subject = + match annot.Pdfannot.subject with + | Some s -> s + | None -> "" + in let contents = + match annot.Pdfannot.annot_contents with + | Some s -> s + | None -> "" + in + Printf.printf "Page: %i\n" pagenumber; + Printf.printf "Subtype: %s\n" annot_type_string; + Printf.printf "Subject: %s\n" subject; + Printf.printf "Contents: %s\n" contents; + in + match annot.Pdfannot.subtype with + | Pdfannot.Popup annot -> print_annot annot + | _ -> print_annot annot + ) + (Pdfannot.annotations_of_page pdf page)) + pages + (ilist 1 (length pages)); + flprint "" (* flush *) + +(* Equalise the page lengths of two PDFs by chopping or extending the first one. +*) +let equalise_lengths a b = + let a' = + if Pdfpage.endpage a < Pdfpage.endpage b then + Pdfpage.change_pages true a + (Pdfpage.pages_of_pagetree a @ + many (Pdfpage.blankpage Pdfpaper.a4) (Pdfpage.endpage b - Pdfpage.endpage a)) + else if Pdfpage.endpage a > Pdfpage.endpage b then + Pdfpage.change_pages true a + (take (Pdfpage.pages_of_pagetree a) (Pdfpage.endpage b)) + else a + in + a', b + +(* \section{Copy annotations} *) +let copy_annotations range frompdf topdf = + let frompdf, topdf = equalise_lengths frompdf topdf in + let copy_annotations_page topdf frompdf frompage topage = + match Pdf.lookup_direct frompdf "/Annots" frompage.Pdfpage.rest with + | Some ((Pdf.Array frompage_annots) as annots) -> + let objects_to_copy = Pdf.objects_referenced [] [] frompdf annots in + iter + (fun n -> + ignore (Pdf.addobj_given_num topdf (n, Pdf.lookup_obj frompdf n))) + objects_to_copy; + let topage_annots = + match Pdf.lookup_direct frompdf "/Annots" topage.Pdfpage.rest with + | Some (Pdf.Array annots) -> annots + | _ -> [] + in + let merged_dict = Pdf.Array (frompage_annots @ topage_annots) in + let topage' = + {topage with Pdfpage.rest = + Pdf.add_dict_entry topage.Pdfpage.rest "/Annots" merged_dict} + in + topdf, topage' + | Some x -> topdf, topage + | None -> topdf, topage + in + match Pdf.renumber_pdfs [frompdf; topdf] with + | [frompdf; topdf] -> + let frompdf_pages = Pdfpage.pages_of_pagetree frompdf + in let topdf_pages = Pdfpage.pages_of_pagetree topdf in + let pdf = ref topdf + and pages = ref [] + and pnum = ref 1 + and frompdf_pages = ref frompdf_pages + and topdf_pages = ref topdf_pages in + (* Go through, updating pdf and collecting new pages. *) + while not (isnull !frompdf_pages) do + let frompdf_page = hd !frompdf_pages + and topdf_page = hd !topdf_pages in + let pdf', page = + if mem !pnum range + then copy_annotations_page !pdf frompdf frompdf_page topdf_page + else !pdf, topdf_page + in + pdf := pdf'; + pages =| page; + incr pnum; + frompdf_pages := tl !frompdf_pages; + topdf_pages := tl !topdf_pages + done; + Pdfpage.change_pages true !pdf (rev !pages) + | _ -> assert false + +(* \section{N-up} *) + +(* Given a number to fit and a mediabox, return a list of transforms for the +2 pages. FIXME: Assumes mediabox (0, 0)-based. Check this for all operations for 1.8. *) +let twoup_transforms mediabox = + let width, height = + match Pdf.parse_rectangle mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + let width_exceeds_height = width > height in + let rotate = Pdftransform.Rotate ((0., 0.), rad_of_deg 90.) + in let sc = + if width_exceeds_height + then fmin (height /. width) ((width /. 2.) /. height) + else fmin (width /. height) ((height /. 2.) /. width) + in + let scale = Pdftransform.Scale ((0., 0.), sc, sc) in + let tr0, tr1 = + if width_exceeds_height then + Pdftransform.Translate (height *. sc, 0.), + Pdftransform.Translate (height *. sc *. 2., 0.) + else + Pdftransform.Translate (height *. sc, 0.), + Pdftransform.Translate (height *. sc, width *. sc) + in + let t0 = Pdftransform.matrix_of_transform [tr0; rotate; scale] + in let t1 = Pdftransform.matrix_of_transform [tr1; rotate; scale] in + [t0; t1] + +(* Combine two pages into one throughout the document. The pages have already +had their objects renumbered so as not to clash.*) +let twoup_pages pdf = function + | [] -> assert false + | (h::_) as pages -> + let resources' = + pair_reduce + (combine_pdf_resources pdf) + (map (fun p -> p.Pdfpage.resources) pages) + in + let content' = + let transform_stream contents transform = + let ops = Pdfops.parse_operators pdf resources' contents in + (* Need protect_removeme here? especially new, Q-adding protect? *) + Pdfops.stream_of_ops + ([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ ops @ [Pdfops.Op_Q]) + in + map2 + (fun p -> transform_stream p.Pdfpage.content) + pages + (take (twoup_transforms h.Pdfpage.mediabox) (length pages)) + in + {Pdfpage.mediabox = h.Pdfpage.mediabox; + Pdfpage.rotate = h.Pdfpage.rotate; + Pdfpage.content = content'; + Pdfpage.resources = resources'; + Pdfpage.rest = h.Pdfpage.rest} + +(* Main function *) +let twoup pdf = + let pdf = upright (ilist 1 (Pdfpage.endpage pdf)) pdf in + let pages = Pdfpage.pages_of_pagetree pdf in + let pagesets = splitinto 2 pages in + let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in + let pages' = map (twoup_pages pdf) renumbered in + Pdfpage.change_pages true pdf pages' + +let twoup_stack_transforms mediabox = + let width, height = + match Pdf.parse_rectangle mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + let rotate = Pdftransform.Rotate ((0., 0.), rad_of_deg 90.) + in let tr0 = Pdftransform.Translate (height, 0.) + in let tr1 = Pdftransform.Translate (height, width) in + let t0 = Pdftransform.matrix_of_transform [tr0; rotate] + in let t1 = Pdftransform.matrix_of_transform [tr1; rotate] in + [t0; t1] + +let twoup_pages_stack pdf = function + | [] -> assert false + | (h::_) as pages -> + let resources = + pair_reduce + (combine_pdf_resources pdf) + (map (fun p -> p.Pdfpage.resources) pages) + in + (* Remove any CropBox *) + let rest = + Pdf.remove_dict_entry h.Pdfpage.rest "/CropBox" + in + let content' = + let transform_stream contents transform = + let ops = Pdfops.parse_operators pdf resources contents in + Pdfops.stream_of_ops + ([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ ops @ [Pdfops.Op_Q]) + in + map2 + (fun p -> transform_stream p.Pdfpage.content) + pages + (take (twoup_stack_transforms h.Pdfpage.mediabox) (length pages)) + in + {Pdfpage.mediabox = + (let width, height = + match Pdf.parse_rectangle h.Pdfpage.mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real height; Pdf.Real (width *. 2.)]); + Pdfpage.rotate = h.Pdfpage.rotate; + Pdfpage.content = content'; + Pdfpage.resources = resources; + Pdfpage.rest = rest} + +let twoup_stack pdf = + let pdf = upright (ilist 1 (Pdfpage.endpage pdf)) pdf in + let pages = Pdfpage.pages_of_pagetree pdf in + let pagesets = splitinto 2 pages in + let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in + let pages' = map (twoup_pages_stack pdf) renumbered in + Pdfpage.change_pages true pdf pages' + +(* \section{Output info} *) +let get_info raw pdf = + let infodict = + match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with + | Some infodict -> infodict + | _ -> Pdf.Dictionary [] + in + let getstring name = + match Pdf.lookup_direct pdf name infodict with + | Some (Pdf.String s) -> + if raw then s else crude_de_unicode s + | _ -> "" + in + getstring + +let get_info_utf8 pdf = + let infodict = + match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with + | Some infodict -> infodict + | _ -> Pdf.Dictionary [] + in + (function name -> + match Pdf.lookup_direct pdf name infodict with + | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s + | _ -> "") + +let output_info encoding pdf = + let getstring = + match encoding with + | Raw -> get_info true pdf + | Stripped -> get_info false pdf + | UTF8 -> get_info_utf8 pdf + in + Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor; + Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf); + Printf.printf "Title: %s\n" (getstring "/Title"); + Printf.printf "Author: %s\n" (getstring "/Author"); + Printf.printf "Subject: %s\n" (getstring "/Subject"); + Printf.printf "Keywords: %s\n" (getstring "/Keywords"); + Printf.printf "Creator: %s\n" (getstring "/Creator"); + Printf.printf "Producer: %s\n" (getstring "/Producer"); + Printf.printf "Created: %s\n" (getstring "/CreationDate"); + Printf.printf "Modified: %s\n" (getstring "/ModDate") + +(* \section{Blacken text} *) + +(* + \begin{verbatim} + Algorithm: Change + BT + + ET + + ...to... + + BT + Op_g 0. + + ET + + \end{verbatim} +*) +let blacktext_ops pdf resources content = + let not_text = function + | Pdfops.Op_Tj _ | Pdfops.Op_TJ _ + | Pdfops.Op_' _ | Pdfops.Op_'' (_, _, _) + | Pdfops.Op_Td (_, _) | Pdfops.Op_TD (_, _) + | Pdfops.Op_Tm _ | Pdfops.Op_T' + | Pdfops.Op_Tc _ + | Pdfops.Op_Tw _ + | Pdfops.Op_Tz _ + | Pdfops.Op_TL _ + | Pdfops.Op_Tf (_, _) + | Pdfops.Op_Tr _ + | Pdfops.Op_Ts _ -> false + | _ -> true + in let textlevel = ref 0 + in let removed = ref [] + in let operators = + Pdfops.parse_operators pdf resources content + in + let rec remove_colourops prev = function + | [] -> rev prev + | Pdfops.Op_BT::more -> + incr textlevel; + remove_colourops + (Pdfops.Op_g 0.::Pdfops.Op_BT::prev) + more + | Pdfops.Op_ET::more -> + decr textlevel; + let prev' = !removed @ Pdfops.Op_ET::prev in + removed := []; + remove_colourops prev' more + | (Pdfops.Op_G _ + | Pdfops.Op_g _ + | Pdfops.Op_RG (_, _, _) + | Pdfops.Op_rg (_, _, _) + | Pdfops.Op_k (_, _, _, _) + | Pdfops.Op_K (_, _, _, _) + | Pdfops.Op_SCN _ + | Pdfops.Op_SC _ + | Pdfops.Op_scn _ + | Pdfops.Op_sc _ + | Pdfops.Op_SCNName (_, _) + | Pdfops.Op_scnName (_, _) + | Pdfops.Op_CS _ + | Pdfops.Op_cs _ + | Pdfops.Op_sh _ + | Pdfops.Op_gs _) + as op::more -> + if !textlevel > 0 + then + begin + removed =| op; + remove_colourops prev more + end + else remove_colourops (op::prev) more + | op::more -> + if !textlevel > 0 && not_text op then removed =| op; + remove_colourops (op::prev) more + in + let operators' = remove_colourops [] operators in + [Pdfops.stream_of_ops operators'] + +(* Blacken a form xobject, writing it to the same object. *) +let process_xobject f pdf resources i = + let xobj = Pdf.lookup_obj pdf i in + match Pdf.lookup_direct pdf "/Subtype" xobj with + | None -> raise (Pdf.PDFError "No /Subtype in Xobject") + | Some (Pdf.Name "/Form") -> + Pdf.getstream xobj; + begin match xobj with + | Pdf.Stream ({contents = Pdf.Dictionary dict, Pdf.Got bytes} as rf) -> + begin match f pdf resources [Pdf.Stream rf] with + | [Pdf.Stream {contents = (Pdf.Dictionary dict', data)}] -> + let dict' = + Pdf.remove_dict_entry + (Pdf.Dictionary (mergedict dict dict')) + "/Filter" + in + rf := (dict', data) + | _ -> assert false + end + | _ -> assert false (* getstream would have complained already *) + end + | Some _ -> () + + +let process_xobjects pdf page f = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary elts) -> + iter + (fun (k, v) -> + match v with + | Pdf.Indirect i -> process_xobject f pdf page.Pdfpage.resources i + | _ -> raise (Pdf.PDFError "blacktext")) + elts + | _ -> () + +let blacktext range pdf = + let blacktext_page _ page = + let content' = + blacktext_ops pdf page.Pdfpage.resources page.Pdfpage.content + in + process_xobjects pdf page blacktext_ops; + {page with Pdfpage.content = content'} + in + process_pages blacktext_page pdf range + +(* \section{Blacken lines} *) +let blacklines_ops pdf resources content = + let rec blacken_strokeops prev = function + | [] -> rev prev + | Pdfops.Op_CS _::t -> + blacken_strokeops (Pdfops.Op_CS "/DeviceGray"::prev) t + | (Pdfops.Op_SC _ | Pdfops.Op_SCN _ | Pdfops.Op_SCNName _ | Pdfops.Op_G _ + | Pdfops.Op_RG _ | Pdfops.Op_K _)::t -> + blacken_strokeops (Pdfops.Op_G 0.::prev) t + | h::t -> blacken_strokeops (h::prev) t + and operators = + Pdfops.parse_operators pdf resources content + in + let operators' = blacken_strokeops [] operators in + [Pdfops.stream_of_ops operators'] + +let blacklines range pdf = + let blacklines_page _ page = + let content' = + blacklines_ops pdf page.Pdfpage.resources page.Pdfpage.content + in + process_xobjects pdf page blacklines_ops; + {page with Pdfpage.content = content'} + in + process_pages blacklines_page pdf range + +(* \section{Blacken Fills} *) +let blackfills_ops pdf resources content = + let rec blacken_fillops prev = function + | [] -> rev prev + | Pdfops.Op_cs _::t -> + blacken_fillops (Pdfops.Op_cs "/DeviceGray"::prev) t + | (Pdfops.Op_sc _ | Pdfops.Op_scn _ | Pdfops.Op_scnName _ | Pdfops.Op_g _ + | Pdfops.Op_rg _ | Pdfops.Op_k _)::t -> + blacken_fillops (Pdfops.Op_g 0.::prev) t + | h::t -> blacken_fillops (h::prev) t + and operators = + Pdfops.parse_operators pdf resources content + in + let operators' = blacken_fillops [] operators in + [Pdfops.stream_of_ops operators'] + +let blackfills range pdf = + let blackfills_page _ page = + let content' = + blackfills_ops pdf page.Pdfpage.resources page.Pdfpage.content + in + process_xobjects pdf page blackfills_ops; + {page with Pdfpage.content = content'} + in + process_pages blackfills_page pdf range + +(* \section{Set a minimum line width to avoid dropout} *) +let thinlines range width pdf = + let thinpage _ page = + let operators = + Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content + in + let ctmstack = ref [ref Pdftransform.i_matrix] in + let scaleof_ctm () = + try + match Pdftransform.decompose (!(hd !ctmstack)) with + (scale, _, _, _, _, _) -> + scale + with + Failure "hd" -> 1. + in + let rec replace_operators prev = function + | [] -> rev prev + | (Pdfops.Op_w w)::more -> + (* Alter width. *) + let width' = width /. scaleof_ctm () in + let w' = + if w >= width' then Pdfops.Op_w w else Pdfops.Op_w width' + in + replace_operators (w'::prev) more + | (Pdfops.Op_cm m)::more -> + (* Update CTM *) + begin try + let top = hd !ctmstack in + top := Pdftransform.matrix_compose !top m + with + Failure "hd" -> error "Malformed file." + end; + replace_operators ((Pdfops.Op_cm m)::prev) more + | Pdfops.Op_q::more -> + (* Push stack *) + begin try + ctmstack =| ref (!(hd !ctmstack)) + with + Failure "hd" -> error "Malformed file" + end; + replace_operators (Pdfops.Op_q::prev) more + | Pdfops.Op_Q::more -> + (* Pop stack *) + begin try + ctmstack := tl !ctmstack + with + Failure "tl" -> error "Malformed file" + end; + replace_operators (Pdfops.Op_Q::prev) more + | (Pdfops.Op_gs gsname)::more -> + (* Perhaps insert [Op_w]. *) + let opw = + match Pdf.lookup_direct pdf "/ExtGState" page.Pdfpage.resources with + | None -> [] + | Some ext_state_dict -> + match Pdf.lookup_direct pdf gsname ext_state_dict with + | None -> [] + | Some gdict -> + match Pdf.lookup_direct pdf "/LW" gdict with + | Some s -> (try [Pdfops.Op_w (Pdf.getnum s)] with _ -> []) + | None -> [] + in + replace_operators (opw @ ((Pdfops.Op_gs gsname)::prev)) more + | x::more -> replace_operators (x::prev) more + in + let operators = replace_operators [] operators in + (* 2. Add an initial 'w' if width more than default width *) + let operators = + if width > 1. then (Pdfops.Op_w width)::operators else operators + in + let content' = [Pdfops.stream_of_ops operators] in + {page with Pdfpage.content = content'} + in + process_pages thinpage pdf range + +(* \section{Remove annotations} *) +let remove_annotations range pdf = + let remove_annotations_page pagenum page = + if mem pagenum range then + let rest' = + Pdf.remove_dict_entry page.Pdfpage.rest "/Annots" + in + {page with Pdfpage.rest = rest'} + else + page + in + process_pages remove_annotations_page pdf range + +(* \section{Making draft documents} *) + +(* Predicate on an xobject: true if an image xobject. *) +let isimage pdf (_, xobj) = + match Pdf.lookup_direct pdf "/Subtype" xobj with + | Some (Pdf.Name "/Image") -> true + | _ -> false + +(* Given a set of resources for a page, and the name of a resource, determine if +that name refers to an image xobject. *) +let xobject_isimage pdf resources name = + match resources with + | Pdf.Dictionary _ -> + begin match Pdf.lookup_direct pdf "/XObject" resources with + | Some xobjects -> + isimage pdf ("", Pdf.lookup_fail "xobject not there" pdf name xobjects) + | _ -> false + end + | _ -> failwith "bad resources" + +(* The subsitute for an image. *) +let substitute boxes = + if boxes then + rev + [Pdfops.Op_q; + Pdfops.Op_w 0.; + Pdfops.Op_G 0.; + Pdfops.Op_re (0., 0., 1., 1.); + Pdfops.Op_m (0., 0.); + Pdfops.Op_l (1., 1.); + Pdfops.Op_m (0., 1.); + Pdfops.Op_l (1., 0.); + Pdfops.Op_S; + Pdfops.Op_Q] + else + [] + +(* Remove references to images from a graphics stream. *) +let rec remove_images_stream boxes pdf resources prev = function + | [] -> rev prev + | (Pdfops.Op_Do name) as h::t -> + if xobject_isimage pdf resources name + then remove_images_stream boxes pdf resources (substitute boxes @ prev) t + else remove_images_stream boxes pdf resources (h::prev) t + | Pdfops.InlineImage _::t -> + remove_images_stream boxes pdf resources (substitute boxes @ prev) t + | h::t -> + remove_images_stream boxes pdf resources (h::prev) t + +let rec process_form_xobject boxes pdf form = + let form = Pdf.direct pdf form in + let page = + {Pdfpage.content = [form]; + Pdfpage.mediabox = Pdf.Null; + Pdfpage.resources = + begin match Pdf.lookup_direct pdf "/Resources" form with + | Some r -> r + | None -> Pdf.Dictionary [] + end; + Pdfpage.rotate = Pdfpage.Rotate0; + Pdfpage.rest = Pdf.Dictionary []} + in + let page', pdf = + remove_images_page boxes pdf page + in + let form' = + match form with + | Pdf.Stream {contents = (dict, _)} -> + begin match + Pdfops.stream_of_ops + (Pdfops.parse_operators pdf (Pdf.Dictionary []) page'.Pdfpage.content) + with + | Pdf.Stream {contents = (_, Pdf.Got data)} -> + let dict' = + Pdf.add_dict_entry dict "/Length" (Pdf.Integer (bytes_size data)) + in + Pdf.Stream {contents = (dict', Pdf.Got data)} + | _ -> assert false + end + | _ -> raise (Pdf.PDFError "not a stream") + in + form', pdf + +(* Remove images from a page. *) +and remove_images_page boxes pdf page = + let isform pdf xobj = + match Pdf.lookup_direct pdf "/Subtype" xobj with Some (Pdf.Name "/Form") -> true | _ -> false + in + (* Remove image xobjects and look into form ones *) + let form_xobjects = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary elts) -> + keep (function (_, p) -> isform pdf p) elts + | _ -> [] + in + let resources', pdf = + let names, pointers = split form_xobjects in + let form_xobjects', pdf = + let pdf = ref pdf + in let outputs = ref [] in + iter + (fun p -> + let p', pdf' = process_form_xobject boxes !pdf p in + pdf := pdf'; + outputs =| p') + pointers; + rev !outputs, !pdf + in + let nums = ref [] in + iter + (fun xobj -> + let objnum = Pdf.addobj pdf xobj in + nums =| objnum) + form_xobjects'; + let newdict = + Pdf.Dictionary (combine names (map (fun x -> Pdf.Indirect x) (rev !nums))) + in + Pdf.add_dict_entry page.Pdfpage.resources "/XObject" newdict, pdf + in + let content' = + remove_images_stream boxes pdf page.Pdfpage.resources [] + (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) + in + {page with + Pdfpage.content = + (let stream = Pdfops.stream_of_ops content' in + Pdfcodec.encode_pdfstream pdf Pdfcodec.Flate stream; + [stream]); + Pdfpage.resources = resources'}, pdf + +(* Remove images from all pages in a document. *) +let draft boxes range pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + let pagenums = indx pages in + let pdf = ref pdf + in let pages' = ref [] in + iter2 + (fun p pagenum -> + let p', pdf' = + if mem pagenum range + then remove_images_page boxes !pdf p + else p, !pdf + in + pdf := pdf'; + pages' =| p') + pages + pagenums; + Pdfpage.change_pages true !pdf (rev !pages') + +let set_version v pdf = + pdf.Pdf.minor <- v + +(* Custom Code: CSP1 - four up duplication. Alter media box and crop-box. 4-up the data. *) +let custom_csp1_page pdf _ page = + let minx, miny, maxx, maxy = + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> Pdf.parse_rectangle r + | None -> Pdf.parse_rectangle page.Pdfpage.mediabox + in + let mx0 = -.minx + in let my0 = -.miny + in let dx = maxx -. minx + in let dy = maxy -. miny in + let content = + let ops = + Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content + in + [Pdfops.Op_q; + Pdfops.Op_cm (Pdftransform.matrix_of_transform [Pdftransform.Translate (mx0, my0)])] @ ops @ [Pdfops.Op_Q] @ + [Pdfops.Op_q; + Pdfops.Op_cm (Pdftransform.matrix_of_transform [Pdftransform.Translate (mx0 +. dx, my0 +. dy)])] @ ops @ [Pdfops.Op_Q] @ + [Pdfops.Op_q; + Pdfops.Op_cm (Pdftransform.matrix_of_transform [Pdftransform.Translate (mx0, my0 +. dy)])] @ ops @ [Pdfops.Op_Q] @ + [Pdfops.Op_q; + Pdfops.Op_cm (Pdftransform.matrix_of_transform [Pdftransform.Translate (mx0 +. dx, my0)])] @ ops @ [Pdfops.Op_Q] + in + let new_mediabox = + Pdf.Array + [Pdf.Real 0.; + Pdf.Real 0.; + Pdf.Real ((maxx -. minx) *. 2.); + Pdf.Real ((maxy -. miny) *. 2.)] + in + {page with + Pdfpage.content = [Pdfops.stream_of_ops content]; + Pdfpage.mediabox = new_mediabox; + Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest "/CropBox" new_mediabox} + +let custom_csp1 pdf = + process_pages (custom_csp1_page pdf) pdf (ilist 1 (Pdfpage.endpage pdf)) + +let custom_csp2 f pdf = + let page = hd (Pdfpage.pages_of_pagetree pdf) in + let m_minx, m_miny, m_maxx, m_maxy = + match page.Pdfpage.mediabox with + | Pdf.Array [a; b; c; d] -> + Pdf.getnum a, Pdf.getnum b, Pdf.getnum c, Pdf.getnum d + | _ -> 0., 0., 0., 0. + in + let c_minx, c_miny, c_maxx, c_maxy = + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some (Pdf.Array [a; b; c; d]) -> + Pdf.getnum a, Pdf.getnum b, Pdf.getnum c, Pdf.getnum d + | _ -> m_minx, m_miny, m_maxx, m_maxy + in + let x = (c_minx +. c_maxx) /. 2. + in let y = (c_miny +. c_maxy) /. 2. in + scale_contents (PosCentre (x, y)) (f /. 100.) pdf (ilist 1 (Pdfpage.endpage pdf)) + +let blank_document width height pages = + let pdf_pages = + map (fun () -> Pdfpage.blankpage (Pdfpaper.make Pdfunits.PdfPoint width height)) (many () pages) + in + let pdf, pageroot = Pdfpage.add_pagetree pdf_pages (Pdf.empty ()) in + Pdfpage.add_root pageroot [] pdf + +let blank_document_paper papersize pages = + let pdf_pages = + map (fun () -> Pdfpage.blankpage papersize) (many () pages) + in + let pdf, pageroot = Pdfpage.add_pagetree pdf_pages (Pdf.empty ()) in + Pdfpage.add_root pageroot [] pdf + diff --git a/cpdf.mli b/cpdf.mli new file mode 100644 index 0000000..b31b989 --- /dev/null +++ b/cpdf.mli @@ -0,0 +1,305 @@ +(** Coherent PDF Tools Core Routines *) +open Pdfutil + +type encoding = Raw | UTF8 | Stripped + +val parse_pagespec : Pdf.t -> string -> int list + +val string_of_pagespec : Pdf.t -> int list -> string + +val validate_pagespec : string -> bool + +val name_of_spec : bool -> Pdfmarks.t list -> Pdf.t -> int -> string -> int -> string -> int -> int -> string + +(** Debug: Print out a PDF in readable form to the terminal *) +val print_pdf_objs : Pdf.t -> unit + +(** Same, but from an input and possible password - does the minimal work to find the number of pages. *) +val endpage_io : Pdfio.input -> string option -> string option -> int + +(** Compresses all streams in the PDF document which are uncompressed, using +/FlateDecode, leaving out metadata. If the PDF is encrypted, does nothing. *) +val recompress_pdf : Pdf.t -> Pdf.t + +val decompress_pdf : Pdf.t -> Pdf.t + +(* [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating +the PDF minor version to [version].*) +val set_pdf_info : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t + +(* [set_pdf_info (key, value, version)] sets the entry [key] in the +/ViewerPreferences directory, updating the PDF minor version to [version].*) +val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t + +(* Set the page layout to the given name (sans slash) e.g SinglePage *) +val set_page_layout : Pdf.t -> string -> Pdf.t + +(* Set the page layout to the given name (sans slash) e.g SinglePage *) +val set_page_mode : Pdf.t -> string -> Pdf.t + +(* Expand the string "now" to a PDF date string, ignoring any other string *) +val expand_date : string -> string + +(* Given a function from page number and page to page, a document, and a list +of page numbers to apply it to, apply the function to all those pages. *) +val process_pages : (int -> Pdfpage.t -> Pdfpage.t) -> Pdf.t -> int list -> Pdf.t + +(* Same, but just iterate *) +val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit + +(* Same, but map *) +val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list + +(* Two possible error classes. *) +exception SoftError of string +exception HardError of string + +(* [presentation range t d h i dir effect_dur pdf] *) +val presentation : int list -> string option -> + float option -> bool -> bool -> int -> float -> Pdf.t -> Pdf.t + +(* [attach_file keep-version topage pdf filename] *) +val attach_file : bool -> int option -> Pdf.t -> string -> Pdf.t + +(* Remove attached files. *) +val remove_attached_files : Pdf.t -> Pdf.t + +(* List attached files. Attachment name and page number. Page 0 is document level. *) +val list_attached_files : Pdf.t -> (string * int) list + +(* [parse_bookmark_file verify pdf input] *) +val parse_bookmark_file : bool -> Pdf.t -> Pdfio.input -> Pdfmarks.t list + +(* [add_bookmarks verify input pdf] *) +val add_bookmarks : bool -> Pdfio.input -> Pdf.t -> Pdf.t + +(* [copy_id keepversion copyfrom copyto] *) +val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t + +(* [set_metadata keepversion filename pdf] *) +val set_metadata : bool -> string -> Pdf.t -> Pdf.t + +val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t + +(* Remove the metadata from a file *) +val remove_metadata : Pdf.t -> Pdf.t + +(* [combine_pages fast under over scaletofit swap equalize] *) +val combine_pages : bool -> Pdf.t -> Pdf.t -> bool -> bool -> bool -> Pdf.t + +(* [stamp scale_to_fit fast isover range over pdf] stamps the first page of [over] over each page of the PDF. *) +val stamp : bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t + +(* [list_bookmarks deunicode range page_offset pdf output] *) +val list_bookmarks : encoding -> int list -> Pdf.t -> Pdfio.output -> unit + +(* Custom CSP1 *) +val custom_csp1 : Pdf.t -> Pdf.t + +(* Custom CSP2 *) +val custom_csp2 : float -> Pdf.t -> Pdf.t + +(* FIXME: Horrid - should return PDFs, write to file separtely. *) +(* [split_at_bookmarks linearize nobble level spec pdf] *) +val split_at_bookmarks : string -> bool -> (Pdf.t -> Pdf.t) -> int -> string -> Pdf.t -> unit + +(* The new one *) +val split_on_bookmarks : Pdf.t -> int -> Pdf.t list + +(* [split_pdf printf chunksize linearize nobble spec pdf] *) +val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit + +(* Nobble a page, given pdf, pagenumber and page *) +val nobble_page : Pdf.t -> 'a -> Pdfpage.t -> Pdfpage.t + +(* Print page info (Mediabox etc) *) +val output_page_info : Pdf.t -> unit + +val hasbox : Pdf.t -> int -> string -> bool + +(* Print metadate to stdout *) +val get_metadata : Pdf.t -> Pdfio.bytes + +val print_metadata : Pdf.t -> unit + +(* Print font list to stdout *) +val print_fonts : Pdf.t -> unit + +val list_fonts : Pdf.t -> (int * string * string * string * string) list + +(* Possible positions for adding text and other uses. *) +type position = + | PosCentre of float * float + | PosLeft of float * float + | PosRight of float * float + | Top of float + | TopLeft of float + | TopRight of float + | Left of float + | BottomLeft of float + | Bottom of float + | BottomRight of float + | Right of float + | Diagonal + | ReverseDiagonal + +val string_of_position : position -> string + +type orientation = + | Horizontal + | Vertical + | VerticalDown + +type justification = + | LeftJustify + | CentreJustify + | RightJustify + +(* [calculate ignore_d w (xmin, ymin, xmax, ymax) shorterside pos] *) +val calculate_position : + bool -> + float -> + float * float * float * float -> + orientation -> position -> float * float * float + +(* Returns what the added text string would be *) +(*val addtext_returntext : Pdf.t -> string -> int -> string -> int -> string*) + +val metrics_howmany : unit -> int +val metrics_text : int -> string +val metrics_x : int -> float +val metrics_y : int -> float +val metrics_rot : int -> float +val metrics_baseline_adjustment : unit -> float + +(* [add_texts fontname font bates colour positino linespacing fontsize shorterside text pages pdf] *) +val addtexts : + bool -> (*metrics*) + float -> (*linewidth*) + bool -> (*outline*) + bool -> (*fast*) + string -> (*fontname*) + Pdftext.standard_font option -> (*font*) + int -> (*bates number *) + float * float * float -> (*colour*) + position -> (*position*) + float -> (*linespacing*) + float -> (*fontsize*) + bool -> (*underneath*) + string ->(*text*) + int list ->(*page range*) + orientation ->(*orientation*) + bool ->(*relative to cropbox?*) + float ->(*opacity*) + justification ->(*justification*) + bool ->(*midline adjust?*) + string ->(*filename*) + Pdf.t ->(*pdf*) + Pdf.t + +(* Remove text from the given pages. *) +val removetext : int list -> Pdf.t -> Pdf.t + +(* Modify the rotation of the page and its contents to leave the rotation at 0 with the page effectively unaltered. *) +val upright : ?fast:bool -> int list -> Pdf.t -> Pdf.t + +(* [crop_pdf x y w h pdf range] *) +val crop_pdf : float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t + +(* [set_mediabox x y w h pdf range] *) +val set_mediabox : float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t + +val setBox : string -> float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t + +(* Remove any cropping from the given pages. *) +val remove_cropping_pdf : Pdf.t -> int list -> Pdf.t +val remove_trim_pdf : Pdf.t -> int list -> Pdf.t +val remove_bleed_pdf : Pdf.t -> int list -> Pdf.t +val remove_art_pdf : Pdf.t -> int list -> Pdf.t + +(* Change rotation to a given value 0, 90, 180, 270 on given pages. *) +val rotate_pdf : int -> Pdf.t -> int list -> Pdf.t + +(* Rotate clockwise by 0, 90, 180, 270 on given pages. *) +val rotate_pdf_by : int -> Pdf.t -> int list -> Pdf.t + +(* Rotate the contents by the given angle on the given pages. *) +val rotate_contents : ?fast:bool -> float -> Pdf.t -> int list -> Pdf.t + +(* Flip the given pages vertically *) +val vflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t + +(* Flip the given pages horizontally *) +val hflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t + +(* Shift a PDF in x and y (in pts) in the given pages. *) +val shift_pdf : ?fast:bool -> float -> float -> Pdf.t -> int list -> Pdf.t + +(* Scale a PDF in sx, sy in the given pages. *) +val scale_pdf : ?fast:bool -> float -> float -> Pdf.t -> int list -> Pdf.t + +(* FIXME: See .ml *) +(* [scale_to_fit_pdf input_scale x y op pdf range] *) +val scale_to_fit_pdf : ?fast:bool -> float -> float -> float -> 'a -> Pdf.t -> int list -> Pdf.t + +(* Scale the contents of a page by a given factor centred around a given point in a given range. *) +val scale_contents : ?fast:bool -> position -> float -> Pdf.t -> int list -> Pdf.t + +(* Put blank pages before the given page numbers *) +val padbefore : int list -> Pdf.t -> Pdf.t + +(* Put blank pages after the given page numbers *) +val padafter : int list -> Pdf.t -> Pdf.t + +(* Pad to a multiple of n pages *) +val padmultiple : int -> Pdf.t -> Pdf.t + +(* List the annotations to standard output *) +val list_annotations : encoding -> Pdf.t -> unit + +val list_annotations_more : Pdf.t -> unit + +val get_annotations : encoding -> Pdf.t -> (int * string) list + +(* Copy the annotations on a given set of pages from a to b yielding c. *) +val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t + +(* Remove the annotations on given pages. *) +val remove_annotations : int list -> Pdf.t -> Pdf.t + +(* Two-up a PDF. *) +val twoup : Pdf.t -> Pdf.t + +(* Stack Two-up a PDF. *) +val twoup_stack : Pdf.t -> Pdf.t + +(* Output to standard output general information about a PDF. *) +(*val get_info : bool -> Pdf.t -> string -> string*) + +val get_info_utf8 : Pdf.t -> string -> string + +val output_info : encoding -> Pdf.t -> unit + +(* Make all lines in the PDF at least a certain thickness. *) +val thinlines : int list -> float -> Pdf.t -> Pdf.t + +(* Make all text on certain pages black. *) +val blacktext : int list -> Pdf.t -> Pdf.t + +(* Make all lines on certain pages black. *) +val blacklines : int list -> Pdf.t -> Pdf.t + +(* Make all fills on certain pages black. *) +val blackfills : int list -> Pdf.t -> Pdf.t + +(* Remove images from a PDF, optionally adding crossed boxes. *) +val draft : bool -> int list -> Pdf.t -> Pdf.t + +val set_version : int -> Pdf.t -> unit + +(*i val pdf_of_pages : Pdfmarks.bookmark list option -> int list -> Pdfdoc.page list -> Pdf.t -> Pdf.t i*) + +val blank_document : float -> float -> int -> Pdf.t + +val blank_document_paper : Pdfpaper.t -> int -> Pdf.t + diff --git a/cpdfcommand.ml b/cpdfcommand.ml new file mode 100644 index 0000000..26f4d50 --- /dev/null +++ b/cpdfcommand.ml @@ -0,0 +1,3400 @@ +(* cpdf command line tools} *) +let demo = false +and major_version = 1 +and minor_version = 7 +and version_date = "(7th August 2013)" + +open Pdfutil +open Pdfio + +(* Wrap up the file reading functions to exit with code 1 when an encryption +problem occurs. This happens when object streams are in an encrypted document +and so it can't be read without the right password... The existing error +handling only dealt with the case where the document couldn't be decrypted once +it had been loaded. *) +let pdfread_pdf_of_input a b c = + try Pdfread.pdf_of_input a b c with + Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" -> + raise (Cpdf.SoftError "Bad owner or user password when reading document") + +let pdfread_pdf_of_channel_lazy ?source b c d = + try Pdfread.pdf_of_channel_lazy ?source b c d with + Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" -> + raise (Cpdf.SoftError "Bad owner or user password when reading document") + +let pdfread_pdf_of_file a b c = + try Pdfread.pdf_of_file a b c with + Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" -> + raise (Cpdf.SoftError "Bad owner or user password when reading document") + +let optstring = function + | "" -> None + | x -> Some x + +(* To prevent problems when piping on Windows *) +let _ = + set_binary_mode_in stdin true; + set_binary_mode_out stdout true + +(* Fatal error reporting. *) +let error s = + prerr_string (s ^ "\nUse -help for help.\n"); + flush stderr; + exit 2 + +let soft_error s = + Printf.eprintf "%s\n" s; + flush stderr; + exit 1 + +let parse_pagespec pdf spec = + try Cpdf.parse_pagespec pdf spec with + Failure x -> error x + +(* Operations. *) +type op = + | CopyFont of string + | CSP1 + | CSP2 of float + | CSP3 + | CountPages + | Version + | Encrypt + | Decrypt + | StampOn of string + | StampUnder of string + | CombinePages of string + | TwoUp + | TwoUpStack + | RemoveBookmarks + | AddBookmarks of string + | AddText of string + | AddRectangle + | RemoveText + | Draft + | PadBefore + | PadAfter + | PadEvery of int + | PadMultiple of int + | Shift + | Scale + | ScaleToFit + | ScaleContents of float + | AttachFile of string list + | RemoveAttachedFiles + | ListAttachedFiles + | DumpAttachedFiles + | RemoveAnnotations + | ListAnnotations + | ListAnnotationsMore + | CopyAnnotations of string + | Merge + | Split + | SplitOnBookmarks of int + | Clean + | Info + | PageInfo + | Metadata + | SetMetadata of string + | RemoveMetadata + | Fonts + | RemoveFonts + | Compress + | Decompress + | Crop + | RemoveCrop + | CopyCropBoxToMediaBox + | CopyBox + | MediaBox + | Rotate of int + | Rotateby of int + | RotateContents of float + | Upright + | VFlip + | HFlip + | ThinLines of float + | SetAuthor of string + | SetTitle of string + | SetSubject of string + | SetKeywords of string + | SetCreate of string + | SetModify of string + | SetCreator of string + | SetProducer of string + | SetTrapped + | SetUntrapped + | SetVersion of int + | ListBookmarks + | SetPageLayout of string + | SetPageMode of string + | HideToolbar of bool + | HideMenubar of bool + | HideWindowUI of bool + | FitWindow of bool + | CenterWindow of bool + | DisplayDocTitle of bool + | Presentation + | ChangeId + | RemoveId + | CopyId of string + | BlackText + | BlackLines + | BlackFills + | ExtractImages + | ImageResolution of float + | MissingFonts + | DumpData + | UpdateInfo of string + | RemoveUnusedResources + | ExtractFontFile + | ExtractText + | PrintLinearization + +(* Inputs: filename, pagespec. *) +type input_kind = + | AlreadyInMemory of Pdf.t + | InFile of string + | StdIn + +let string_of_input_kind = function + | AlreadyInMemory _ -> "AlreadyInMemory" + | InFile s -> s + | StdIn -> "Stdin" + +type input = + input_kind * string * Pdfmerge.rotation * string * string (* input kind, range, rotation, user_pw, owner_pw *) + +type output_method = + | NoOutputSpecified + | Stdout + | File of string + +(* A list of PDFs to be output, if no output method was specified. *) +let output_pdfs : Pdf.t list ref = ref [] + +type font = + | StandardFont of Pdftext.standard_font + | OtherFont of string + +type args = + {mutable op : op option; + mutable preserve_objstm : bool; + mutable create_objstm : bool; + mutable out : output_method; + mutable inputs : input list; + mutable chunksize : int; + mutable linearize : bool; + mutable rectangle : float * float * float * float; + mutable coord : float * float; + mutable duration : float option; + mutable transition : string option; + mutable horizontal : bool; + mutable inward : bool; + mutable direction : int; + mutable effect_duration : float; + mutable font : font; + mutable fontname : string; + mutable fontsize : float; + mutable color : float * float * float; + mutable opacity : float; + mutable position : Cpdf.position; + mutable underneath : bool; + mutable linespacing : float; + mutable midline : bool; + mutable justification : Cpdf.justification; + mutable bates : int; + mutable prerotate : bool; + mutable orientation : Cpdf.orientation; + mutable relative_to_cropbox : bool; + mutable keepversion : bool; + mutable bycolumns : bool; + mutable pagerotation : int; + mutable crypt_method : string; + mutable owner : string; + mutable user : string; + mutable no_edit : bool; + mutable no_print : bool; + mutable no_copy : bool; + mutable no_annot : bool; + mutable no_forms : bool; + mutable no_extract : bool; + mutable no_assemble : bool; + mutable no_hq_print : bool; + mutable debug : bool; + mutable boxes : bool; + mutable encrypt_metadata : bool; + mutable retain_numbering : bool; + mutable remove_duplicate_fonts : bool; + mutable remove_duplicate_streams : bool; + mutable encoding : Cpdf.encoding; + mutable scale : float; + mutable copyfontpage : int; + mutable copyfontname : string option; + mutable fast : bool; + mutable dashrange : string; + mutable outline : bool; + mutable linewidth : float; + mutable path_to_ghostscript : string; + mutable frombox : string option; + mutable tobox : string option; + mutable mediabox_if_missing : bool; + mutable topage : string option; + mutable printf_format : bool; + mutable scale_stamp_to_fit : bool; + mutable keep_this_id : string option; + mutable do_ask : bool; + mutable verbose : bool; + mutable dont_overwrite_inputs : bool; + mutable dont_overwrite_existing_files : bool; + mutable makenewid : bool; + mutable ismulti : bool; + mutable uprightstamp : bool} + +(* List of all filenames in any AND stage - this is used to check that we don't +overwrite any input file when -dont-overwrite-existing-files is used. *) + +let all_inputs : string list ref = ref [] + +let args = + {op = None; + preserve_objstm = true; + create_objstm = false; + out = NoOutputSpecified; + inputs = []; + chunksize = 1; + linearize = false; + rectangle = 0., 0., 0., 0.; + coord = 0., 0.; + duration = None; + transition = None; + horizontal = true; + inward = true; + direction = 0; + effect_duration = 1.; + font = StandardFont Pdftext.TimesRoman; + fontname = "Times-Roman"; + fontsize = 12.; + color = 0., 0., 0.; + opacity = 1.; + position = Cpdf.TopLeft 100.; + underneath = false; + linespacing = 1.; + midline = false; + justification = Cpdf.LeftJustify; + bates = 0; + prerotate = false; + orientation = Cpdf.Horizontal; + relative_to_cropbox = false; + keepversion = false; + bycolumns = false; + pagerotation = 0; + crypt_method = ""; + owner = ""; + user = ""; + no_edit = false; + no_print = false; + no_copy = false; + no_annot = false; + no_forms = false; + no_extract = false; + no_assemble = false; + no_hq_print = false; + debug = false; + boxes = false; + encrypt_metadata = true; + retain_numbering = false; + remove_duplicate_fonts = false; + remove_duplicate_streams = false; + encoding = Cpdf.Stripped; + scale = 1.; + copyfontpage = 1; + copyfontname = None; + fast = false; + dashrange = "all"; + outline = false; + linewidth = 1.0; + path_to_ghostscript = ""; + frombox = None; + tobox = None; + mediabox_if_missing = false; + topage = None; + printf_format = false; + scale_stamp_to_fit = false; + keep_this_id = None; + do_ask = false; + verbose = false; + dont_overwrite_inputs = false; + dont_overwrite_existing_files = false; + makenewid = false; + ismulti = false; + uprightstamp = false} + +let reset_arguments () = + args.op <- None; + args.preserve_objstm <- true; + args.create_objstm <- false; + args.out <- NoOutputSpecified; + args.inputs <- []; + args.chunksize <- 1; + args.linearize <- false; + args.rectangle <- 0., 0., 0., 0.; + args.coord <- 0., 0.; + args.duration <- None; + args.transition <- None; + args.horizontal <- true; + args.inward <- true; + args.direction <- 0; + args.effect_duration <- 1.; + args.font <- StandardFont Pdftext.TimesRoman; + args.fontname <- "Times-Roman"; + args.fontsize <- 12.; + args.color <- 0., 0., 0.; + args.opacity <- 1.; + args.position <- Cpdf.TopLeft 100.; + args.underneath <- false; + args.linespacing <- 1.; + args.midline <- false; + args.justification <- Cpdf.LeftJustify; + args.bates <- 0; + args.prerotate <- false; + args.orientation <- Cpdf.Horizontal; + args.relative_to_cropbox <- false; + args.keepversion <- false; + args.bycolumns <- false; + args.pagerotation <- 0; + args.crypt_method <- ""; + args.owner <- ""; + args.user <- ""; + args.no_edit <- false; + args.no_print <- false; + args.no_copy <- false; + args.no_annot <- false; + args.no_forms <- false; + args.no_extract <- false; + args.no_assemble <- false; + args.no_hq_print <- false; + args.debug <- false; + args.boxes <- false; + args.encrypt_metadata <- true; + args.retain_numbering <- false; + args.remove_duplicate_fonts <- false; + args.remove_duplicate_streams <- false; + args.encoding <- Cpdf.Stripped; + args.scale <- 1.; + args.copyfontpage <- 1; + args.copyfontname <- None; + args.fast <- false; + args.dashrange <- "all"; + args.outline <- false; + args.linewidth <- 1.0; + args.path_to_ghostscript <- ""; + args.frombox <- None; + args.tobox <- None; + args.mediabox_if_missing <- false; + args.topage <- None; + args.printf_format <- false; + args.scale_stamp_to_fit <- false; + args.keep_this_id <- None; + args.makenewid <- false; + args.ismulti <- false; + args.uprightstamp <- false + (* We don't reset args.do_ask and args.verbose, because they operate on all parts of the AND-ed command line sent from cpdftk. *) + +let banlist_of_args () = + let l = ref [] in + if args.no_edit then l =| Pdfcrypt.NoEdit; + if args.no_print then l =| Pdfcrypt.NoPrint; + if args.no_copy then l =| Pdfcrypt.NoCopy; + if args.no_annot then l =| Pdfcrypt.NoAnnot; + if args.no_forms then l =| Pdfcrypt.NoForms; + if args.no_extract then l =| Pdfcrypt.NoExtract; + if args.no_assemble then l =| Pdfcrypt.NoAssemble; + if args.no_hq_print then l =| Pdfcrypt.NoHqPrint; + !l + +(* If a file is encrypted, decrypt it using the owner password or, if not +present, the user password. If the user password is used, the operation to be +performed is checked to see if it's allowable under the permissions regime. *) + +(* The bans. Each function has a list of bans. If any of these is present in the +bans list in the input file, the operation cannot proceed. Other operations +cannot proceed at all without owner password. *) +let banned banlist = function + | Fonts | Info | Metadata | PageInfo | CountPages -> false (* Always allowed *) + | Decrypt | Encrypt -> true (* Never allowed *) + | _ -> mem Pdfcrypt.NoEdit banlist + +let operation_allowed banlist = function + | None -> true (* Merge *) (* changed to allow it *) + | Some op -> not (banned banlist op) + +let rec decrypt_if_necessary (a, b, c, user_pw, owner_pw) op pdf = + if not (Pdfcrypt.is_encrypted pdf) then pdf else + match Pdfcrypt.decrypt_pdf_owner owner_pw pdf with + | Some pdf -> pdf + | _ -> + match Pdfcrypt.decrypt_pdf user_pw pdf with + | Some pdf, permissions -> + if operation_allowed permissions op + then pdf + else if args.do_ask + then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf + else soft_error "User password cannot give permission for this operation" + | _ -> + if args.do_ask + then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf + else soft_error "Failed to decrypt file: wrong password?" + +and decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf = + let name = match a with InFile x -> x | StdIn -> "Standard input" | AlreadyInMemory _ -> "PDF" in + flprint "The password supplied for input PDF:\n"; + flprint (" " ^ name); + flprint "\n did not work. The PDF is encrypted, so you must supply the\n"; + flprint " owner password to open it. To quit, enter a blank password\n"; + flprint "Please enter the password to use on the input PDF:\n"; + flprint (" " ^ name ^ ".\n"); + match Pervasives.read_line () with + | "" -> soft_error "Failed to decrypt file: wrong password?" + | x -> decrypt_if_necessary (a, b, c, user_pw, x) op pdf + +let nobble pdf = + if not demo then pdf else + Cpdf.process_pages (Cpdf.nobble_page pdf) pdf (ilist 1 (Pdfpage.endpage pdf)) + +(* Output Page Count *) +let output_page_count pdf = + Printf.printf "%i\n" (Pdfpage.endpage pdf) + +let setop op () = + args.op <- Some op + +let setout name = + args.out <- File name + +let setchunk c = + if c > 0 + then args.chunksize <- c + else error "invalid chunk size" + +let setlinearize () = + args.linearize <- true + +let fixdashes s = + let bufferdashes chars = + let buf = ref [] in + iter + (function '-' -> buf =@ [' '; '-'; ' '] | x -> buf =| x) + chars; + rev !buf + in + let chars = explode s in + implode (bufferdashes chars) + +let encrypt_to_collect = ref 0 + +let setmethod s = + if args.op = None then args.op <- Some Encrypt; (* Could be additional to -split *) + match s with + | "40bit" | "128bit" | "AES" | "AES256" | "AES256ISO" -> args.crypt_method <- s + | _ -> error "Unsupported encryption method" + +let setowner s = + match s with + | "PROMPT" -> + flprint "Enter owner password to use on the output PDF.\n"; + args.owner <- Pervasives.read_line () + | s -> + args.owner <- s + +let setuser s = + match s with + | "PROMPT" -> + flprint "Enter user password to use on the output PDF.\n"; + args.user <- Pervasives.read_line () + | s -> + args.user <- s + +let anon_fun s = + try + match !encrypt_to_collect with + | 3 -> setmethod s; decr encrypt_to_collect + | 2 -> setowner s; decr encrypt_to_collect + | 1 -> setuser s; decr encrypt_to_collect + | 0 -> + let before, after = cleavewhile (neq '=') (explode s) in + begin match implode before with + | "user" -> + begin match args.inputs with + | [] -> () + | (a, b, c, _, e)::more -> + args.inputs <- (a, b, c, implode (tl after), e)::more + end + | "owner" -> + begin match args.inputs with + | [] -> () + | (a, b, c, d, _)::more -> + args.inputs <- (a, b, c, d, implode (tl after))::more + end + | _ -> raise Not_found + end + | _ -> assert false + with + Not_found -> + try + ignore (String.index s '.'); + args.inputs <- (InFile s, "all", Pdfmerge.DNR, "", "")::args.inputs; + all_inputs := s::!all_inputs + with + Not_found -> + match args.inputs with + | [] -> () + | (a, _, r, d, e)::t -> + match rev (explode s) with + | 'N'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.N, d, e)::t + | 'S'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.S, d, e)::t + | 'E'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.E, d, e)::t + | 'W'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.W, d, e)::t + | 'L'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.L, d, e)::t + | 'R'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.R, d, e)::t + | 'D'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.D, d, e)::t + | _ -> + args.inputs <- (a, fixdashes s, r, d, e)::t + +(* Unit conversions to points. *) +let mm x = ((x /. 10.) /. 2.54) *. 72. + +let cm x = (x /. 2.54) *. 72. + +let inch x = x *. 72. + +let points_of_papersize p = + let unit = Pdfpaper.unit p + and w = Pdfpaper.width p + and h = Pdfpaper.height p in + let c = Pdfunits.convert 0. unit Pdfunits.PdfPoint in + c w, c h + +let rec parse_units_again numbers papersize more = + let w, h = points_of_papersize papersize in + parse_units (h::w::numbers) more + +and parse_units numbers = function + | Pdfgenlex.LexName "a10portrait"::more -> + parse_units_again numbers Pdfpaper.a10 more + | Pdfgenlex.LexName "a9portrait"::more -> + parse_units_again numbers Pdfpaper.a9 more + | Pdfgenlex.LexName "a8portrait"::more -> + parse_units_again numbers Pdfpaper.a8 more + | Pdfgenlex.LexName "a7portrait"::more -> + parse_units_again numbers Pdfpaper.a7 more + | Pdfgenlex.LexName "a6portrait"::more -> + parse_units_again numbers Pdfpaper.a6 more + | Pdfgenlex.LexName "a5portrait"::more -> + parse_units_again numbers Pdfpaper.a5 more + | Pdfgenlex.LexName "a4portrait"::more -> + parse_units_again numbers Pdfpaper.a4 more + | Pdfgenlex.LexName "a3portrait"::more -> + parse_units_again numbers Pdfpaper.a3 more + | Pdfgenlex.LexName "a2portrait"::more -> + parse_units_again numbers Pdfpaper.a2 more + | Pdfgenlex.LexName "a1portrait"::more -> + parse_units_again numbers Pdfpaper.a1 more + | Pdfgenlex.LexName "a0portrait"::more -> + parse_units_again numbers Pdfpaper.a0 more + | Pdfgenlex.LexName "a10landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a10) more + | Pdfgenlex.LexName "a9landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a9) more + | Pdfgenlex.LexName "a8landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a8) more + | Pdfgenlex.LexName "a7landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a7) more + | Pdfgenlex.LexName "a6landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a6) more + | Pdfgenlex.LexName "a5landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a5) more + | Pdfgenlex.LexName "a4landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a4) more + | Pdfgenlex.LexName "a3landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a3) more + | Pdfgenlex.LexName "a2landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a2) more + | Pdfgenlex.LexName "a1landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a1) more + | Pdfgenlex.LexName "a0landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a0) more + | Pdfgenlex.LexName "uslegalportrait"::more -> + parse_units_again numbers Pdfpaper.uslegal more + | Pdfgenlex.LexName "usletterportrait"::more -> + parse_units_again numbers Pdfpaper.usletter more + | Pdfgenlex.LexName "uslegallandscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.uslegal) more + | Pdfgenlex.LexName "usletterlandscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.usletter) more + | Pdfgenlex.LexInt x::Pdfgenlex.LexName "mm"::more -> + parse_units ((mm <| float_of_int x)::numbers) more + | Pdfgenlex.LexReal x::Pdfgenlex.LexName "mm"::more -> + parse_units (mm x::numbers) more + | Pdfgenlex.LexInt x::Pdfgenlex.LexName "cm"::more -> + parse_units ((cm <| float_of_int x)::numbers) more + | Pdfgenlex.LexReal x::Pdfgenlex.LexName "cm"::more -> + parse_units (cm x::numbers) more + | Pdfgenlex.LexInt x::Pdfgenlex.LexName "in"::more -> + parse_units ((inch <| float_of_int x)::numbers) more + | Pdfgenlex.LexReal x::Pdfgenlex.LexName "in"::more -> + parse_units (inch x::numbers) more + | Pdfgenlex.LexInt x::more -> + parse_units (float_of_int x::numbers) more + | Pdfgenlex.LexReal x::more -> + parse_units (x::numbers) more + | Pdfgenlex.LexName "pt"::more -> + parse_units numbers more + | _ -> rev numbers + +let rec space_units_inner = function + | [] -> [] + | 'm'::'m'::t -> ' '::'m'::'m'::' '::space_units_inner t + | 'c'::'m'::t -> ' '::'c'::'m'::' '::space_units_inner t + | 'i'::'n'::t -> ' '::'i'::'n'::' '::space_units_inner t + | 'p'::'t'::t -> ' '::'p'::'t'::' '::space_units_inner t + | h::t -> h::space_units_inner t + +let space_units s = + implode (space_units_inner (explode s)) + +let parse_units_string s = + parse_units [] (Pdfgenlex.lex_string <| space_units s) + +let parse_rectangle s = + try + match parse_units_string s with + | [x; y; w; h] -> x, y, w, h + | _ -> error "Bad rectangle specification" + with + _ -> error "Bad rectangle specification" + +let parse_coordinate s = + try + match parse_units_string s with + | [dx; dy] -> dx, dy + | _ -> error "Bad coordinate specification" + with + _ -> error "Bad coordinate specification" + +let parse_single_number s = + try + match parse_units_string s with + | [x] -> x + | _ -> error "Bad number Argument" + with + _ -> error "Bad number argument" + +(* Setting operations *) +let setcrop s = + setop Crop (); + args.rectangle <- parse_rectangle s + +let setmediabox s = + setop MediaBox (); + args.rectangle <- parse_rectangle s + +let setrectangle s = + setop AddRectangle (); + args.coord <- parse_coordinate s + +let setrotate i = + if i = 0 || i = 90 || i = 180 || i = 270 + then setop (Rotate i) () + else error "bad rotation" + +let setrotateby i = + if i = 0 || i = 90 || i = 180 || i = 270 + then setop (Rotateby i) () + else error "bad rotation" + +let setrotatecontents f = + setop (RotateContents f) () + +let setauthor s = setop (SetAuthor s) () +let settitle s = setop (SetTitle s) () +let setsubject s = setop (SetSubject s) () +let setkeywords s = setop (SetKeywords s) () +let setcreate s = setop (SetCreate s) () +let setmodify s = setop (SetModify s) () +let setcreator s = setop (SetCreator s) () +let setproducer s = setop (SetProducer s) () +let setmetadata s = setop (SetMetadata s) () +let setversion i = setop (SetVersion i) () +let setpagelayout s = setop (SetPageLayout s) () +let setpagemode s = setop (SetPageMode s) () + +let hidetoolbar b = + try setop (HideToolbar (bool_of_string b)) () with + _ -> failwith "HideToolBar: must use true or false" + +let hidemenubar b = + try setop (HideMenubar (bool_of_string b)) () with + _ -> failwith "HideMenuBar: must use true or false" + +let hidewindowui b = + try setop (HideWindowUI (bool_of_string b)) () with + _ -> failwith "HideWindowUI: must use true or false" + +let fitwindow b = + try setop (FitWindow (bool_of_string b)) () with + _ -> failwith "FitWindow: must use true or false" + +let centerwindow b = + try setop (CenterWindow (bool_of_string b)) () with + _ -> failwith "CenterWindow: must use true or false" + +let displaydoctitle b = + try setop (DisplayDocTitle (bool_of_string b)) () with + _ -> failwith "DisplayDocTitle: must use true or false" + +let setsplitbookmarks i = setop (SplitOnBookmarks i) () +let setstdout () = args.out <- Stdout +let setstdin () = args.inputs <- [StdIn, "all", Pdfmerge.DNR, "", ""] +let settrans s = args.transition <- Some s +let setduration f = args.duration <- Some f +let setvertical () = args.horizontal <- false +let setoutward () = args.inward <- false +let setdirection i = + args.direction <- + match i with + | 0 | 90 | 180 | 270 | 315 -> i + | _ -> error "Bad direction" +let seteffectduration f = args.effect_duration <- f +let setcopyid s = setop (CopyId s) () +let setthinlines s = setop (ThinLines (parse_single_number s)) () + +let setcopyannotations s = setop (CopyAnnotations s) () + +let setshift s = + setop Shift (); + args.coord <- parse_coordinate s + +let setscale s = + setop Scale (); + args.coord <- parse_coordinate s + +let setscaletofit s = + setop ScaleToFit (); + args.coord <- parse_coordinate s + +let setattachfile s = + match args.op with + | Some (AttachFile t) -> + args.op <- Some (AttachFile (s::t)) + | _ -> + setop (AttachFile [s]) () + +let setfont f = + args.font <- + begin match Pdftext.standard_font_of_name ("/" ^ f) with + | Some x -> StandardFont x + | None -> OtherFont f + end; + args.fontname <- f + +let setfontsize f = + if f > 0. + then args.fontsize <-f + else error "Negative font size specified" + +let setaddtext s = + setop (AddText s) () + +let setcolor s = + let r, g, b = + match String.lowercase s with + | "white" -> 1., 1., 1. + | "black" -> 0., 0., 0. + | "red" -> 1., 0., 0. + | "green" -> 0., 1., 0. + | "blue" -> 0., 0., 1. + | _ -> + let getnum = function + | Pdfgenlex.LexInt i -> float i + | Pdfgenlex.LexReal f -> f + | _ -> error "Bad color" + in + match Pdfgenlex.lex_string s with + | [a;b;c] -> getnum a, getnum b, getnum c + | _ -> error "Bad color" + in + args.color <- r, g, b + +let setopacity o = + args.opacity <- o + +let setaddbookmarks s = + setop (AddBookmarks s) () + +let setstampon f = + setop (StampOn f) () + +let setstampunder f = + setop (StampUnder f) () + +let setstamponmulti f = + setop (StampOn f) (); + args.ismulti <- true + +let setstampundermulti f = + setop (StampUnder f) (); + args.ismulti <- true + +let setcombinepages f = + setop (CombinePages f) () + +let setposcenter s = + let x, y = parse_coordinate s in + args.position <- Cpdf.PosCentre (x, y) + +let setposleft s = + let x, y = parse_coordinate s in + args.position <- Cpdf.PosLeft (x, y) + +let setposright s = + let x, y = parse_coordinate s in + args.position <- Cpdf.PosRight (x, y) + +let settop n = + args.position <- Cpdf.Top (parse_single_number n); + args.justification <- Cpdf.CentreJustify + +let settopleft n = + args.position <- Cpdf.TopLeft (parse_single_number n); + args.justification <- Cpdf.LeftJustify + +let settopright n = + args.position <- Cpdf.TopRight (parse_single_number n); + args.justification <- Cpdf.RightJustify + +let setleft n = + args.position <- Cpdf.Left (parse_single_number n); + args.justification <- Cpdf.LeftJustify + +let setbottomleft n = + args.position <- Cpdf.BottomLeft (parse_single_number n); + args.justification <- Cpdf.LeftJustify + +let setbottom n = + args.position <- Cpdf.Bottom (parse_single_number n); + args.justification <- Cpdf.CentreJustify + +let setbottomright n = + args.position <- Cpdf.BottomRight (parse_single_number n); + args.justification <- Cpdf.RightJustify + +let setright n = + args.position <- Cpdf.Right (parse_single_number n); + args.justification <- Cpdf.RightJustify + +let setdiagonal n = + args.position <- Cpdf.Diagonal; + args.justification <- Cpdf.CentreJustify + +let setreversediagonal n = + args.position <- Cpdf.ReverseDiagonal; + args.justification <- Cpdf.CentreJustify + +(* FIXME: We will add a center option to text positioning, which can be used for this too *) +let setcenter n = + args.position <- Cpdf.Diagonal; + args.justification <- Cpdf.CentreJustify + +let setbates n = + args.bates <- n + +let setkeepversion () = + args.keepversion <- true + +let setbycolumns () = + args.bycolumns <- true + +let setpagerotation r = + match r with + | 90 | 270 -> args.pagerotation <- r + | _ -> error "Bad Page rotation. Try 90 or 270." + +let set_no_edit () = + args.no_edit <- true + +let set_no_print () = + args.no_print <- true + +let set_no_copy () = + args.no_copy <- true + +let set_no_annot () = + args.no_annot <- true + +let set_no_forms () = + args.no_forms <- true + +let set_no_extract () = + args.no_extract <- true + +let set_no_assemble () = + args.no_assemble <- true + +let set_no_hq_print () = + args.no_hq_print <- true + +let set_input s = + args.inputs <- (InFile s, "all", Pdfmerge.DNR, "", "")::args.inputs; + all_inputs := s::!all_inputs + +let set_input_dir s = + let names = sort compare (leafnames_of_dir s) in + (* We don't need to set all_inputs here, since pdftk doesn't used -idir *) + args.inputs <- + (rev (map (fun n -> (InFile (s ^ slash ^ n), "all", Pdfmerge.DNR, "", "")) names)) @ args.inputs + +let setdebug () = + set Pdfread.read_debug; + set Pdfwrite.write_debug; + set Pdfcrypt.crypt_debug; + args.debug <- true + +let setboxes () = + args.boxes <- true + +let set_no_encrypt_metadata () = + args.encrypt_metadata <- false + +let set_retain_numbering () = + args.retain_numbering <- true + +let set_remove_duplicate_fonts () = + args.remove_duplicate_fonts <- true + +let setencoding enc () = + args.encoding <- enc + +let setlinespacing f = + args.linespacing <- f + +let setmidline () = + args.midline <- true + +let setscaletofitscale f = + args.scale <- f + +let setscalecontents f = + args.op <- Some (ScaleContents f); + args.position <- Cpdf.Diagonal (* Will be center *) + +(* Parsing the control file *) +let rec getuntilendquote prev = function + | [] -> implode (rev prev), [] + | '"'::t -> implode (rev prev), t + | '\\'::'"'::t -> getuntilendquote ('"'::prev) t + | h::t -> getuntilendquote (h::prev) t + +let rec getarg prev = function + | [] -> implode (rev prev), [] + | h::t -> + if Pdf.is_whitespace h + then implode (rev prev), t + else getarg (h::prev) t + +let rec parse_chars args = function + | [] -> rev args + | h::more when Pdf.is_whitespace h -> + parse_chars args more + | '"'::more -> + let this, rest = getuntilendquote [] more in + parse_chars (this::args) rest + | h::t -> + let this, rest = getarg [] (h::t) in + parse_chars (this::args) rest + +let control_args = ref [] + +let parse_control_file name = + (parse_chars [] + (charlist_of_bytes (Pdfio.bytes_of_input_channel (open_in_bin name)))) + +let setencryptcollect () = + encrypt_to_collect := 3 + +let setcopyfont s = + args.op <- Some (CopyFont s) + +let setfontpage i = + args.copyfontpage <- i + +let setcopyfontname s = + args.copyfontname <- Some s + +let setpadevery i = + args.op <- Some (PadEvery i) + +let setpadmultiple i = + args.op <- Some (PadMultiple i) + +let setfast () = + args.fast <- true + +let setcsp2 f = + args.op <- Some (CSP2 f) + +let setextractimages () = + args.op <- Some ExtractImages + +(* Explicitly add a range. Parse it and replace the top input file with the range. *) +let setrange spec = + args.dashrange <- spec + +let setoutline () = + args.outline <- true + +let setlinewidth l = + args.linewidth <- l + +let setunderneath () = + args.underneath <- true + +let setimageresolution f = + args.op <- Some (ImageResolution f) + +let setgspath p = + args.path_to_ghostscript <- p + +let setvertical () = + args.orientation <- Cpdf.Vertical + +let setverticaldown () = + args.orientation <- Cpdf.VerticalDown + +let setfrombox s = + args.op <- Some CopyBox; + args.frombox <- Some s + +let settobox s = + args.tobox <- Some s + +let setmediaboxifmissing () = + args.mediabox_if_missing <- true + +let setrelativetocropbox () = + args.relative_to_cropbox <- true + +let setprerotate () = + args.prerotate <- true + +let setflatkids () = + Pdfpage.flat_pagetrees := true + +let settopage s = + args.topage <- Some s + +let setprintfformat () = + args.printf_format <- true + +let setscalestamptofit () = + args.scale_stamp_to_fit <- true + +let setkeepthisid () = + match args.inputs with + | (InFile s, _, _, _, _)::_ -> args.keep_this_id <- Some s + | _ -> () + +let setupdateinfo s = + args.op <- Some (UpdateInfo s) + +let setdoask () = + args.do_ask <- true + +let setverbose () = + args.verbose <- true + +let promptinputs () = + flprint "Please enter a filename for an input PDF:\n"; + set_input (Pervasives.read_line ()) + +let promptinputpasswords () = + flprint "Please enter the open password to use on the input PDF:\n "; + match args.inputs with + | (InFile s, b, c, d, _)::more -> + flprint s; + flprint ".\n It can be empty, or have a maximum of 32 characters:\n"; + let pw = Pervasives.read_line () in + args.inputs <- (InFile s, b, c, d, pw)::more + | _ -> () + +let promptoutput () = + flprint "Please enter a name for the output:\n"; + args.out <- File (Pervasives.read_line ()) + +let setdontoverwriteexistingfiles () = + args.dont_overwrite_existing_files <- true + +let setdontoverwriteinputs () = + args.dont_overwrite_inputs <- true + +let setmakenewid () = + args.makenewid <- true + +let setuprightstamp () = + args.uprightstamp <- true + +let setjustifyleft () = + args.justification <- Cpdf.LeftJustify + +let setjustifyright () = + args.justification <- Cpdf.RightJustify + +let setjustifycenter () = + args.justification <- Cpdf.CentreJustify + +let setremoveduplicatestreams () = + args.remove_duplicate_streams <- true + +let setnopreserveobjstm () = + args.preserve_objstm <- false + +let setcreateobjstm () = + args.create_objstm <- true + +let setstdinuser u = + match args.inputs with + | (StdIn, x, y, _, o)::t -> args.inputs <- (StdIn, x, y, u, o)::t + | _ -> error "-stdin-user: must follow -stdin" + +let setstdinowner o = + match args.inputs with + | (StdIn, x, y, u, _)::t -> args.inputs <- (StdIn, x, y, u, o)::t + | _ -> error "-stdin-user: must follow -stdin" + +(* Parse a control file, make an argv, and then make Arg parse it. *) +let rec make_control_argv_and_parse filename = + control_args := !control_args @ parse_control_file filename + +and specs = + [("-version", + Arg.Unit (setop Version), + " Print the cpdf version number"); + ("-o", + Arg.String setout, + " Set the output file, if appropriate"); + ("-i", + Arg.String set_input, + " Add an input file"); + ("-idir", + Arg.String set_input_dir, + " Add a directory of files"); + ("-stdin", + Arg.Unit setstdin, + " Read input from standard input"); + ("-stdin-owner", + Arg.String setstdinowner, + " Owner password for -stdin"); + ("-stdin-user", + Arg.String setstdinuser, + " User password for -stdin"); + ("-stdout", + Arg.Unit setstdout, + " Send result to standard output"); + ("-range", + Arg.String setrange, + " Explicitly add a range"); + ("-change-id", + Arg.Unit (setop ChangeId), + " Change the file's /ID tag"); + ("-no-preserve-objstm", + Arg.Unit setnopreserveobjstm, + " Don't preserve object streams"); + ("-create-objstm", + Arg.Unit setcreateobjstm, + " Create object streams anew"); + ("-keep-version", + Arg.Unit setkeepversion, + " Don't change the version number"); + ("-l", + Arg.Unit setlinearize, + " Linearize output files where possible"); + ("-raw", + Arg.Unit (setencoding Cpdf.Raw), + " Do not process text"); + ("-stripped", + Arg.Unit (setencoding Cpdf.Stripped), + " Process text by simple stripping to ASCII"); + ("-utf8", + Arg.Unit (setencoding Cpdf.UTF8), + " Process text by conversion to UTF8 Unicode"); + ("-fast", + Arg.Unit setfast, + " Speed over correctness with malformed documents"); + ("-control", + Arg.String make_control_argv_and_parse, + " Use a control file"); + ("-merge", + Arg.Unit (setop Merge), + " Merge a number of files into one"); + ("-retain-numbering", + Arg.Unit set_retain_numbering, + " Don't renumber pages when merging"); + ("-remove-duplicate-fonts", + Arg.Unit set_remove_duplicate_fonts, + " Remove duplicate fonts when merging"); + ("-split", + Arg.Unit (setop Split), + " Split a file into individual pages"); + ("-chunk", + Arg.Int setchunk, + " Set chunk size for -split (default 1)"); + ("-split-bookmarks", + Arg.Int setsplitbookmarks, + " Split a file at bookmarks at a given level"); + ("-scale-page", + Arg.String setscale, + " -scale-page \"sx sy\" scales by (sx, sy)"); + ("-scale-to-fit", + Arg.String setscaletofit, + " -scale-to-fit \"x y\" scales to page size (x, y)"); + ("-scale-contents", + Arg.Float setscalecontents, + " Scale Contents by the given factor"); + ("-center", + Arg.Float setcenter, + " Scale contents around center"); + ("-scale-to-fit-scale", + Arg.Float setscaletofitscale, + " -scale-to-fit-scale (1.0 = 100%)"); + ("-shift", + Arg.String setshift, + " -shift \"dx dy\" shifts the chosen pages"); + ("-rotate", + Arg.Int setrotate, + " Set rotation of pages to 0, 90, 180, 270"); + ("-rotateby", + Arg.Int setrotateby, + " Rotate pages by 90, 180 or 270 degrees"); + ("-rotate-contents", + Arg.Float setrotatecontents, + " Rotate contents of pages"); + ("-upright", + Arg.Unit (setop Upright), + " Make pages upright"); + ("-hflip", + Arg.Unit (setop HFlip), + " Flip pages horizontally"); + ("-vflip", + Arg.Unit (setop VFlip), + " Flip pages vertically"); + ("-crop", + Arg.String setcrop, + " Crop specified pages"); + ("-remove-crop", + Arg.Unit (setop RemoveCrop), + " Remove cropping on specified pages"); + ("-copy-cropbox-to-mediabox", + Arg.Unit (setop CopyCropBoxToMediaBox), + ""); (* Undocumented now, since /frombox, /tobox now used *) + ("-frombox", Arg.String setfrombox, " Set box to copy from"); + ("-tobox", Arg.String settobox, " Set box to copy to"); + ("-mediabox-if-missing", Arg.Unit setmediaboxifmissing, " If copy from box missing, substitute media box"); + ("-mediabox", + Arg.String setmediabox, + " Set media box on specified pages"); + ("-encrypt", + Arg.Unit setencryptcollect, + " Encrypt a document"); + ("-decrypt", + Arg.Unit (setop Decrypt), + " Decrypt a file"); + ("-no-edit", Arg.Unit set_no_edit, " No edits"); + ("-no-print", Arg.Unit set_no_print, " No printing"); + ("-no-copy", Arg.Unit set_no_copy, " No copying"); + ("-no-annot", Arg.Unit set_no_annot, " No annotations"); + ("-no-forms", Arg.Unit set_no_forms, " No forms"); + ("-no-extract", Arg.Unit set_no_extract, " No extracting"); + ("-no-assemble", Arg.Unit set_no_assemble, " No assembling"); + ("-no-hq-print", Arg.Unit set_no_hq_print, " No high quality printing"); + ("-no-encrypt-metadata", + Arg.Unit set_no_encrypt_metadata, + " Don't encrypt metadata (AES only)"); + ("-decompress", + Arg.Unit (setop Decompress), + " Decompress"); + ("-compress", + Arg.Unit (setop Compress), + " Compress streams, leaving metadata alone"); + ("-remove-duplicate-streams", + Arg.Unit setremoveduplicatestreams, + ""); + ("-list-bookmarks", + Arg.Unit (setop ListBookmarks), + " List Bookmarks"); + ("-remove-bookmarks", + Arg.Unit (setop RemoveBookmarks), + " Remove bookmarks from a file"); + ("-add-bookmarks", + Arg.String setaddbookmarks, + " Add bookmarks from the given file"); + ("-presentation", + Arg.Unit (setop Presentation), + " Make a presentation"); + ("-trans", + Arg.String settrans, + " Set the transition method for -presentation"); + ("-duration", + Arg.Float setduration, + " Set the display duration for -presentation"); + ("-vertical", + Arg.Unit setvertical, + " Set dimension for Split and Blinds styles"); + ("-outward", + Arg.Unit setoutward, + " Set direction for Split and Box styles"); + ("-direction", + Arg.Int setdirection, + " Set direction for Wipe and Glitter styles"); + ("-effect-duration", + Arg.Float seteffectduration, + " Set the effect duration in seconds"); + ("-stamp-on", + Arg.String setstampon, + " Stamp a file on some pages of another"); + ("-stamp-under", + Arg.String setstampunder, + " Stamp a file under some pages of another"); + ("-combine-pages", + Arg.String setcombinepages, + " Combine two files by merging individual pages"); + ("-add-text", + Arg.String setaddtext, + " Superimpose text on the given range of pages"); + ("-remove-text", + Arg.Unit (setop RemoveText), + " Remove text previously added by cpdf"); + ("-add-rectangle", + Arg.String setrectangle, + ""); + ("-bates", + Arg.Int setbates, + " Set the base bates number"); + ("-font", + Arg.String setfont, + " Set the font"); + ("-font-size", + Arg.Float setfontsize, + " Set the font size"); + ("-color", + Arg.String setcolor, + " Set the color"); + ("-opacity", + Arg.Float setopacity, + " Set the text opacity"); + ("-outline", + Arg.Unit setoutline, + " Use outline mode for text"); + ("-linewidth", + Arg.Float setlinewidth, + " Set line width for outline text"); + ("-pos-center", + Arg.String setposcenter, + " Set position relative to center of baseline"); + ("-pos-left", + Arg.String setposleft, + " Set position relative to left of baseline"); + ("-pos-right", + Arg.String setposright, + " Set position relative to right of baseline"); + ("-top", + Arg.String settop, + " Set position relative to center top of page"); + ("-topleft", + Arg.String settopleft, + " Set position relative to top left of page"); + ("-topright", + Arg.String settopright, + " Set position relative to top right of page"); + ("-left", + Arg.String setleft, + " Set position relative to center left of page"); + ("-bottomleft", + Arg.String setbottomleft, + " Set position relative to bottom left of page"); + ("-bottom", + Arg.String setbottom, + " Set position relative to center bottom of page"); + ("-bottomright", + Arg.String setbottomright, + " Set position relative to bottom right of page"); + ("-right", + Arg.String setright, + " Set position relative to center right of page"); + ("-diagonal", + Arg.Unit setdiagonal, + " Place text diagonally across page"); + ("-reverse-diagonal", + Arg.Unit setreversediagonal, + " Place text diagonally across page from top left"); + ("-justify-left", + Arg.Unit setjustifyleft, + " Justify multiline text left"); + ("-justify-right", + Arg.Unit setjustifyright, + " Justify multiline text right"); + ("-justify-center", + Arg.Unit setjustifycenter, + " Justify multiline text centre"); + ("-underneath", + Arg.Unit setunderneath, + " Text stamp is underneath content"); + ("-line-spacing", + Arg.Float setlinespacing, + " Line spacing (1 is normal)"); + ("-midline", + Arg.Unit setmidline, + " Adjust text to midline rather than baseline"); + ("-relative-to-cropbox", + Arg.Unit setrelativetocropbox, + " Add text relative to Crop Box not Media Box"); + ("-prerotate", + Arg.Unit setprerotate, + " Calls -upright on pages before adding text"); + ("-twoup", + Arg.Unit (setop TwoUp), + " Put 2 pages onto one"); + ("-twoup-stack", + Arg.Unit (setop TwoUpStack), + " Stack 2 pages onto one twice the size"); + ("-pad-before", + Arg.Unit (setop PadBefore), + " Add a blank page before the given pages"); + ("-pad-after", + Arg.Unit (setop PadAfter), + " Add a blank page after the given pages"); + ("-pad-every", + Arg.Int setpadevery, + " Add a blank page after every n pages"); + ("-pad-multiple", + Arg.Int setpadmultiple, + " Pad the document to a multiple of n pages"); + ("-list-annotations", + Arg.Unit (setop ListAnnotations), + " List annotations"); + ("-copy-annotations", + Arg.String setcopyannotations, + " Copy annotations from given file"); + ("-remove-annotations", + Arg.Unit (setop RemoveAnnotations), + " Remove annotations"); + ("-list-fonts", + Arg.Unit (setop Fonts), + " Output font list"); + ("-info", + Arg.Unit (setop Info), + " Output file information"); + ("-page-info", + Arg.Unit (setop PageInfo), + " Output file information"); + ("-set-author", + Arg.String setauthor, + " Set Author"); + ("-set-title", + Arg.String settitle, + " Set Title"); + ("-set-subject", + Arg.String setsubject, + " Set Subject"); + ("-set-keywords", + Arg.String setkeywords, + " Set Keywords"); + ("-set-create", + Arg.String setcreate, + " Set Creation date"); + ("-set-modify", + Arg.String setmodify, + " Set Modification date"); + ("-set-creator", + Arg.String setcreator, + " Set Creator"); + ("-set-producer", + Arg.String setproducer, + " Set Producer"); + ("-set-trapped", + Arg.Unit (setop SetTrapped), + " Mark as trapped"); + ("-set-untrapped", + Arg.Unit (setop SetUntrapped), + " Mark as not trapped"); + ("-set-page-layout", + Arg.String setpagelayout, + " Set page layout upon document opening"); + ("-set-page-mode", + Arg.String setpagemode, + " Set page mode upon document opening"); + ("-set-metadata", + Arg.String setmetadata, + " Set metadata to the contents of a file"); + ("-print-metadata", + Arg.Unit (setop Metadata), + " Output metadata information"); + ("-remove-metadata", + Arg.Unit (setop RemoveMetadata), + " Remove document metadata"); + ("-hide-toolbar", + Arg.String hidetoolbar, + " Hide the viewer's toolbar"); + ("-hide-menubar", + Arg.String hidemenubar, + " Hide the viewer's menubar"); + ("-hide-window-ui", + Arg.String hidewindowui, + " Hide the viewer's scroll bars etc."); + ("-fit-window", + Arg.String fitwindow, + " Resize document's window to fit size of page"); + ("-center-window", + Arg.String centerwindow, + " Position window in the center of screen"); + ("-display-doc-title", + Arg.String displaydoctitle, + " Display document's title in the title bar"); + ("-pages", + Arg.Unit (setop CountPages), + " Count pages"); + ("-list-attached-files", + Arg.Unit (setop ListAttachedFiles), + " List attached files"); + ("-dump-attachments", + Arg.Unit (setop DumpAttachedFiles), + ""); + ("-attach-file", + Arg.String setattachfile, + " Attach a file"); + ("-to-page", + Arg.String settopage, + " Attach file to given page instead of document"); + ("-remove-files", + Arg.Unit (setop RemoveAttachedFiles), + " Remove embedded attached document-level files"); + ("-image-resolution", + Arg.Float setimageresolution, + " List images under a given dpi"); + ("-copy-font", + Arg.String setcopyfont, + " Copy a named font"); + ("-copy-font-page", + Arg.Int setfontpage, + " Set the page a copied font is drawn from"); + ("-remove-fonts", + Arg.Unit (setop RemoveFonts), + " Remove embedded fonts"); + ("-copy-font-name", + Arg.String setcopyfontname, + " Set the name of the font to copy"); + ("-missing-fonts", + Arg.Unit (setop MissingFonts), + " List missing fonts"); + ("-remove-id", + Arg.Unit (setop RemoveId), + " Remove the file's /ID tag"); + ("-draft", + Arg.Unit (setop Draft), + " Remove images from the file"); + ("-boxes", + Arg.Unit setboxes, + " Add crossed boxes to -draft option"); + ("-blacktext", + Arg.Unit (setop BlackText), + " Blacken document text"); + ("-blacklines", + Arg.Unit (setop BlackLines), + " Blacken lines in document"); + ("-blackfills", + Arg.Unit (setop BlackFills), + " Blacken fills in document"); + ("-thinlines", + Arg.String setthinlines, + " Set minimum line thickness to the given width"); + ("-clean", + Arg.Unit (setop Clean), + " Garbage-collect a file"); + ("-set-version", + Arg.Int setversion, + " Set PDF version number"); + ("-copy-id-from", + Arg.String setcopyid, + " Copy one file's ID tag to another"); + (* These items are for cpdftk *) + ("-update-info", Arg.String setupdateinfo, ""); + ("-printf-format", Arg.Unit setprintfformat, ""); + ("-scale-stamp-to-fit", Arg.Unit setscalestamptofit, ""); + ("-dump-data", Arg.Unit (setop DumpData), ""); + ("-keep-this-id", Arg.Unit setkeepthisid, ""); + ("-do-ask", Arg.Unit setdoask, ""); + ("-verbose", Arg.Unit setverbose, ""); + ("-prompt-inputs", Arg.Unit promptinputs, ""); + ("-prompt-input-passwords", Arg.Unit promptinputpasswords, ""); + ("-prompt-output", Arg.Unit promptoutput, ""); + ("-dont-overwrite-existing-files", Arg.Unit setdontoverwriteexistingfiles, ""); + ("-dont-overwrite-inputs", Arg.Unit setdontoverwriteinputs, ""); + ("-make-new-id", Arg.Unit setmakenewid, ""); + ("-upright-stamp", Arg.Unit setuprightstamp, ""); + ("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), ""); + ("-stamp-under-multi", Arg.String setstampundermulti, ""); + ("-stamp-on-multi", Arg.String setstamponmulti, ""); + ("-list-annotations-more", Arg.Unit (setop ListAnnotationsMore), ""); + (*These items are undocumented *) + ("-extract-fontfile", Arg.Unit (setop ExtractFontFile), ""); + ("-extract-images", Arg.Unit setextractimages, ""); + (*("-output-graph", Arg.Unit (setoutputgraph), "");*) + ("-csp1", Arg.Unit (setop CSP1), ""); + ("-csp2", Arg.Float setcsp2, ""); + ("-csp3", Arg.Unit (setop CSP3), ""); + ("-text-vertical", Arg.Unit setvertical, ""); + ("-text-vertical-down", Arg.Unit setverticaldown, ""); + ("-flat-kids", Arg.Unit setflatkids, ""); + ("-gs", Arg.String setgspath, ""); + ("-debug", Arg.Unit setdebug, ""); + ("-fix-prince", Arg.Unit (setop RemoveUnusedResources), ""); + ("-extract-text", Arg.Unit (setop ExtractText), ""); + ("-print-linearization", Arg.Unit (setop PrintLinearization), "")] + +and usage_msg = +"Syntax: cpdf [-o ] \n\n\ +This is a copyrighted, commercial program, and may NOT be freely copied.\n\n\ +Version " ^ string_of_int major_version ^ "." ^ string_of_int minor_version ^ " " ^ version_date ^ "\n\n\ +To buy, visit http://www.coherentpdf.com/\n\n\ +Input names are distinguished by containing a '.' and may be\n\ +followed by a page range specification, for instance \"1,2,3\"\n\ +or \"1-6,9-end\" or \"even\" or \"odd\" or \"reverse\".\n\nOperations (See \ +manual for full details):\n" + +(* Reading and writing *) +let rec writing_ok outname = + if args.dont_overwrite_inputs && mem outname !all_inputs then + error ("Error: The output filename: " ^ outname ^"\n is the same as an input filename.\n"); + if args.dont_overwrite_existing_files && Sys.file_exists outname then + begin + flprint ("Output file: " ^ outname ^ " already exists. Overwrite? (y/n)\n"); + match explode (Pervasives.read_line ()) with + | ('y' | 'Y')::_ -> outname + | _ -> + flprint "Enter a name for the output:\n"; + writing_ok (Pervasives.read_line ()) + end + else + outname + +let write_pdf mk_id pdf = + if args.create_objstm && not args.keepversion + then pdf.Pdf.minor <- max pdf.Pdf.minor 5; + let mk_id = args.makenewid || mk_id in + match args.out with + | NoOutputSpecified -> + output_pdfs =| pdf + | File outname -> + let outname = writing_ok outname in + let pdf = Cpdf.recompress_pdf <| nobble pdf in + Pdf.remove_unreferenced pdf; + Pdfwrite.pdf_to_file_options + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize None mk_id pdf outname + | Stdout -> + let pdf = Cpdf.recompress_pdf <| nobble pdf in + Pdf.remove_unreferenced pdf; + Pdfwrite.pdf_to_channel + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize None mk_id pdf stdout; + flush stdout (*r For Windows *) + +let pdf_of_stdin user_pw owner_pw = + let user_pw = Some user_pw + and owner_pw = if owner_pw = "" then None else Some owner_pw + in + let o, bytes = Pdfio.input_output_of_bytes 16384 in + try + while true do o.Pdfio.output_char (input_char stdin) done; + Pdf.empty () + with + End_of_file -> + let i = Pdfio.input_of_bytes (Pdfio.extract_bytes_from_input_output o bytes) in + pdfread_pdf_of_input user_pw owner_pw i + +let get_single_pdf op read_lazy = + match args.inputs with + | (InFile inname, _, _, u, o) as input::_ -> + let pdf = + if read_lazy then + pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname) + else + pdfread_pdf_of_file (optstring u) (optstring o) inname + in + decrypt_if_necessary input op pdf + | (StdIn, _, _, u, o) as input::_ -> + decrypt_if_necessary input op (pdf_of_stdin u o) + | (AlreadyInMemory pdf, _, _, _, _)::_ -> pdf + | _ -> + raise (Arg.Bad "cpdf: No input specified.\n") + +let get_pagespec () = + match args.inputs with + | (_, ps, _, _, _)::_ -> ps + | _ -> error "get_pagespec" + +let filenames = null_hash () + +(* This now memoizes on the name of the file to make sure we only load each +file once *) +let get_pdf_from_input_kind ((_, _, _, u, o) as input) op = function + | AlreadyInMemory pdf -> pdf + | InFile s -> + begin try Hashtbl.find filenames s with + Not_found -> + let pdf = decrypt_if_necessary input op (pdfread_pdf_of_file (optstring u) (optstring o) s) in + Hashtbl.add filenames s pdf; pdf + end + | StdIn -> + decrypt_if_necessary input op (pdf_of_stdin u o) + +(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *) +let copy_font frompdf fontname fontpage range pdf = + match Pdf.renumber_pdfs [frompdf; pdf] with + | [] | [_] | _::_::_::_ -> assert false + | [frompdf; pdf] -> + (* 1. Get fontpage *) + let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in + let frompdf_page = + try select fontpage frompdf_pages with + Not_found -> failwith "copy_font: Page not found in input pdf" + in + (* 2. Extract font *) + let fonts = + match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with + | Some f -> f + | None -> failwith "copy_font: font not found" + in + let fromfont = + match Pdf.lookup_direct frompdf fontname fonts with + | Some f -> f + | None -> failwith "copy_font: font not found" + in + let basefontname = + match Pdf.lookup_direct frompdf "/BaseFont" fromfont with + | Some (Pdf.Name n) -> n + | _ -> "/CopyFontAddedNoName" + in + (* 3. Get all objects forming font (except main /Font one) *) + let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in + (* 4. Copy them to from frompdf to pdf. *) + iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers; + (* 5. Get pages from pdf *) + let pdf_pages = Pdfpage.pages_of_pagetree pdf in + (* 6. Add the font to pages in range *) + let pages' = + map + (function (page, pagenum) -> + if mem pagenum range then + let font = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some f -> f + | None -> Pdf.Dictionary [] + in + let font' = + match font with + | (Pdf.Dictionary _) as d -> + Pdf.add_dict_entry d basefontname fromfont + | _ -> failwith "copy_font: error" + in + let resources' = + Pdf.add_dict_entry page.Pdfpage.resources "/Font" font' + in + {page with + Pdfpage.resources = resources'} + else page) + (combine pdf_pages (indx pdf_pages)); + in + (* 7. Put the pages back into the pdf, and return *) + let pdf, root = Pdfpage.add_pagetree pages' pdf in + Pdfpage.add_root root [] pdf + +(* Extract Images. *) +let pnm_to_channel_24 channel w h s = + let white () = output_char channel ' ' + and newline () = output_char channel '\n' + and output_string = Pervasives.output_string channel in + output_string "P6"; + white (); + output_string (string_of_int w); + white (); + output_string (string_of_int h); + white (); + output_string "255"; + newline (); + let pos = ref 0 in + for y = 1 to h do + for x = 1 to w * 3 do + output_byte channel (bget s !pos); + incr pos + done + done + +let null_device = + match Sys.os_type with + | "Win32" -> "nul" + | _ -> "/dev/null" + +(* cpdf -extract-images in.pdf 2-5 -o img%%% (FIXME: Add output spec. Document png stuff.) *) +let write_stream name stream = + let fh = open_out_bin name in + for x = 0 to bytes_size stream - 1 do + output_byte fh (bget stream x) + done; + close_out fh + +let write_image pdf resources name image = + match Pdfimage.get_image_24bpp pdf resources image with + | Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream + | Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream + | Pdfimage.JBIG2 (stream, _) -> write_stream (name ^ ".jbig2") stream + | Pdfimage.Raw (w, h, Pdfimage.BPP24, stream) -> + let fh = open_out_bin (name ^ ".pnm") in + pnm_to_channel_24 fh w h stream; + close_out fh; + (* If pnmtopng is present, convert the pnm to a PNG. *) + begin match + Sys.command ("pnmtopng -gamma 0.45 -quiet " ^ "\"" ^ name ^ ".pnm\"" ^ "> \"" ^ name ^ ".png\" 2>" ^ null_device) + with + | 0 -> Sys.remove (name ^ ".pnm") + | _ -> () + end + | _ -> () + +(* FIXME: Doesn't cope with images within form xobjects *) +let extract_images pdf range stem = + let pdf_pages = Pdfpage.pages_of_pagetree pdf in + let pages = + option_map + (function (i, pdf_pages) -> if mem i range then Some pdf_pages else None) + (combine (indx pdf_pages) pdf_pages) + in + iter + (function page -> + let xobjects = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary elts) -> map snd elts + | _ -> [] + in + let images = + keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Image")) xobjects + in + if images <> [] then + (let names = + map + (function n -> let r = Cpdf.name_of_spec false [] (*FIXME *) pdf 0 stem n "" 0 0 in (*i flprint r; flprint "\n"; i*) r) + (ilist 1 (length images)) + in + iter2 (write_image pdf page.Pdfpage.resources) names images)) + pages + +let getencryption pdf = + match Pdfread.what_encryption pdf with + | None | Some Pdfwrite.AlreadyEncrypted -> "Not encrypted" + | Some Pdfwrite.PDF40bit -> "40bit" + | Some Pdfwrite.PDF128bit -> "128bit" + | Some (Pdfwrite.AES128bit true) -> "128bit AES, Metadata encrypted" + | Some (Pdfwrite.AES128bit false) -> "128bit AES, Metadata not encrypted" + | Some (Pdfwrite.AES256bit true) -> "256bit AES, Metadata encrypted" + | Some (Pdfwrite.AES256bit false) -> "256bit AES, Metadata not encrypted" + | Some (Pdfwrite.AES256bitISO true) -> "256bit AES ISO, Metadata encrypted" + | Some (Pdfwrite.AES256bitISO false) -> "256bit AES ISO, Metadata not encrypted" + +let string_of_permission = function + | Pdfcrypt.NoEdit -> "No edit" + | Pdfcrypt.NoPrint -> "No print" + | Pdfcrypt.NoCopy -> "No copy" + | Pdfcrypt.NoAnnot -> "No Annotate" + | Pdfcrypt.NoForms -> "No edit forms" + | Pdfcrypt.NoExtract -> "No extract" + | Pdfcrypt.NoAssemble -> "No assemble" + | Pdfcrypt.NoHqPrint -> "No high-quality print" + +let getpermissions pdf = + fold_left + (fun x y -> if x = "" then x ^ y else x ^ ", " ^ y) + "" + (map string_of_permission (Pdfread.permissions pdf)) + +(* If a cropbox exists, make it the mediabox. If not, change nothing. *) +let copy_cropbox_to_mediabox pdf range = + Cpdf.process_pages + (fun _ page -> + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject} + | None -> page) + pdf + range + +(* Image resolution *) +type xobj = + | Image of int * int (* width, height *) + | Form of Pdftransform.transform_matrix * Pdf.pdfobject * Pdf.pdfobject (* Will add actual data later. *) + +(* Given a page and a list of (pagenum, name, thing) *) +let rec image_resolution_page pdf page pagenum dpi (images : (int * string * xobj) list) = + try + let pageops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content + and transform = ref [ref Pdftransform.i_matrix] in + iter + (function + | Pdfops.Op_cm matrix -> + begin match !transform with + | [] -> raise (Failure "no transform") + | _ -> (hd !transform) := Pdftransform.matrix_compose !(hd !transform) matrix + end + | Pdfops.Op_Do xobject -> + let trans (x, y) = + match !transform with + | [] -> raise (Failure "no transform") + | _ -> Pdftransform.transform_matrix !(hd !transform) (x, y) + in + let o = trans (0., 0.) + and x = trans (1., 0.) + and y = trans (0., 1.) + in + (*i Printf.printf "o = %f, %f, x = %f, %f, y = %f, %f\n" (fst o) (snd o) (fst x) (snd x) (fst y) (snd y); i*) + let rec lookup_image k = function + | [] -> assert false + | (_, a, _) as h::_ when a = k -> h + | _::t -> lookup_image k t + in + begin match lookup_image xobject images with + | (pagenum, name, Form (xobj_matrix, content, resources)) -> + let content = + (* Add in matrix etc. *) + let total_matrix = Pdftransform.matrix_compose xobj_matrix !(hd !transform) in + let ops = + Pdfops.Op_cm total_matrix:: + Pdfops.parse_operators pdf resources [content] + in + Pdfops.stream_of_ops ops + in + let page = + {Pdfpage.content = [content]; + Pdfpage.mediabox = Pdf.Null; + Pdfpage.resources = resources; + Pdfpage.rotate = Pdfpage.Rotate0; + Pdfpage.rest = Pdf.Dictionary []} + in + let newpdf = Pdfpage.change_pages true pdf [page] in + image_resolution newpdf [pagenum] dpi + | (pagenum, name, Image (w, h)) -> + let lx = Pdfunits.convert 0. Pdfunits.PdfPoint Pdfunits.Inch (distance_between o x) + and ly = Pdfunits.convert 0. Pdfunits.PdfPoint Pdfunits.Inch (distance_between o y) in + let wdpi = float w /. lx + and hdpi = float h /. ly in + if wdpi < dpi || hdpi < dpi then + Printf.printf "%i, %s, %i, %i, %f, %f\n" pagenum xobject w h wdpi hdpi + (*i else + Printf.printf "S %i, %s, %i, %i, %f, %f\n" pagenum xobject (int_of_float w) (int_of_float h) wdpi hdpi i*) + end + | Pdfops.Op_q -> + begin match !transform with + | [] -> raise (Failure "Unbalanced q/Q ops") + | h::t -> + let h' = ref Pdftransform.i_matrix in + h' := !h; + transform := h'::h::t + end + | Pdfops.Op_Q -> + begin match !transform with + | [] -> raise (Failure "Unbalanced q/Q ops") + | _ -> transform := tl !transform + end + | _ -> ()) + pageops + with + e -> Printf.printf "Error %s\n" (Printexc.to_string e); flprint "\n" + +and image_resolution pdf range dpi = + let images = ref [] in + Cpdf.iter_pages + (fun pagenum page -> + (* 1. Get all image names and their native resolutions from resources as string * int * int *) + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary xobjects) -> + iter + (function (name, xobject) -> + match Pdf.lookup_direct pdf "/Subtype" xobject with + | Some (Pdf.Name "/Image") -> + let width = + match Pdf.lookup_direct pdf "/Width" xobject with + | Some x -> Pdf.getnum x + | None -> 1. + and height = + match Pdf.lookup_direct pdf "/Height" xobject with + | Some x -> Pdf.getnum x + | None -> 1. + in + images := (pagenum, name, Image (int_of_float width, int_of_float height))::!images + | Some (Pdf.Name "/Form") -> + let resources = + match Pdf.lookup_direct pdf "/Resources" xobject with + | None -> page.Pdfpage.resources (* Inherit from page or form above. *) + | Some r -> r + and contents = + xobject + and matrix = + match Pdf.lookup_direct pdf "/Matrix" xobject with + | Some (Pdf.Array [a; b; c; d; e; f]) -> + {Pdftransform.a = Pdf.getnum a; Pdftransform.b = Pdf.getnum b; Pdftransform.c = Pdf.getnum c; + Pdftransform.d = Pdf.getnum d; Pdftransform.e = Pdf.getnum e; Pdftransform.f = Pdf.getnum f} + | _ -> Pdftransform.i_matrix + in + images := (pagenum, name, Form (matrix, contents, resources))::!images + | _ -> () + ) + xobjects + | _ -> ()) + pdf + range; + (* Now, split into differing pages, and call [image_resolution_page] on each one *) + let pagesplits = + map + (function (a, _, _)::_ as ls -> (a, ls) | _ -> assert false) + (collate (fun (a, _, _) (b, _, _) -> compare a b) (rev !images)) + and pages = + Pdfpage.pages_of_pagetree pdf + in + iter + (function (pagenum, images) -> + let page = select pagenum pages in + image_resolution_page pdf page pagenum dpi images) + pagesplits + +(* Missing Fonts *) + +let is_missing pdf dict = + match Pdf.lookup_direct pdf "/FontDescriptor" dict with + | None -> true + | Some d -> + match Pdf.lookup_direct pdf "/FontFile" d with + | Some _ -> false + | None -> + match Pdf.lookup_direct pdf "/FontFile2" d with + | Some _ -> false + | None -> + match Pdf.lookup_direct pdf "/FontFile3" d with + | Some _ -> false + | None -> true + +let missing_font pdf page (name, dict) = + if is_missing pdf dict then + let subtype = + match Pdf.lookup_direct pdf "/Subtype" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + and basefont = + match Pdf.lookup_direct pdf "/BaseFont" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + and encoding = + match Pdf.lookup_direct pdf "/Encoding" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in + if Pdftext.standard_font_of_name basefont <> None then () else + Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding + +let missing_fonts pdf range = + Cpdf.iter_pages + (fun num page -> + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some (Pdf.Dictionary fontdict) -> + (* Extract descendant fonts *) + let name_dict_pairs = + flatten + (map + (fun (name, dict) -> + match Pdf.lookup_direct pdf "/DescendantFonts" dict with + | Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts + | _ -> [(name, dict)]) + fontdict) + in + iter (missing_font pdf num) name_dict_pairs + | _ -> ()) + pdf + range + +let lines_of_channel c = + let ls = ref [] in + try + while true do ls =| input_line c done; [] + with + End_of_file -> rev !ls + +(* parse the results of the standard error part of gs -sDEVICE=bbox *) +let parse_whiteboxes filename = + try + let fh = open_in filename in + let lines = lines_of_channel fh in + let lines = + keep + (function l -> + match explode l with + | '%'::'%'::_ -> true | _ -> false) + lines + in + let hires_lines = drop_odds lines in + let result = + map + (fun line -> + let line = implode (drop (explode line) 20) in + match Pdfgenlex.lex_string line with + | [a; b; c; d] -> + let getfloat = function + | Pdfgenlex.LexInt i -> float i + | Pdfgenlex.LexReal f -> f + | _ -> raise (Failure "parse_whiteboxes") + in + getfloat a, getfloat b, getfloat c, getfloat d + | x -> raise (Failure ("bad lex on line " ^ line ^ "made tokens " ^ Pdfgenlex.string_of_tokens x))) + hires_lines + in + close_in fh; + result + with + e -> + Printf.eprintf "%s\n" ("parse_whiteboxes " ^ Printexc.to_string e); + raise (Failure "") + +(* Make start, end pairs from a sortedrange *) +let rec startends_of_range_inner pairs ls = + match pairs, ls with + | _, [] -> rev pairs + | [], (h::t) -> + startends_of_range_inner [(h, h)] t + | ((s, e)::ps), (h::t) when h = e + 1 -> + startends_of_range_inner ((s, h)::ps) t + | ps, (h::t) -> + startends_of_range_inner ((h, h)::ps) t + +let startends_of_range x = + startends_of_range_inner [] x + +(* Calculating margins *) +let calculate_margins filename pdf (s, e) = + (* Call ghostscript *) + let gscall = + args.path_to_ghostscript ^ + " -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox -r1200" + ^ " -dFirstPage=" + ^ string_of_int s + ^ " -dLastPage=" ^ string_of_int e + ^ " \"" ^ filename ^ "\"" ^ " > waste.txt 2> margins.txt" + in + match Sys.command gscall with + | 0 -> + (* Parse white boxes *) + let whiteboxes = + parse_whiteboxes "margins.txt" + (* Get media boxes *) + and mediaboxes = + take' (e - s + 1) + (drop' (s - 1) + (map + (function page -> + (* Prefer the crop box *) + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some pdfobject -> Pdf.parse_rectangle (Pdf.direct pdf pdfobject) + | None -> Pdf.parse_rectangle page.Pdfpage.mediabox) + (Pdfpage.pages_of_pagetree pdf))) + in + iter2 + (fun (m_minx, m_miny, m_maxx, m_maxy) (w_minx, w_miny, w_maxx, w_maxy) -> + if w_minx = 0. && w_miny = 0. && w_maxx = 0. && w_maxy = 0. then + Printf.printf "100, 100, 100, 100\n" + else + let topmargin = ((m_maxy -. w_maxy) /. (m_maxy -. m_miny)) *. 100. + and bottommargin = ((w_miny -. m_miny) /. (m_maxy -. m_miny)) *. 100. + and leftmargin = ((w_minx -. m_minx) /. (m_maxx -. m_minx)) *. 100. + and rightmargin = ((m_maxx -. w_maxx) /. (m_maxx -. m_minx)) *. 100. in + Printf.printf "%f, %f, %f, %f\n" leftmargin rightmargin topmargin bottommargin) + mediaboxes + whiteboxes; + (* Clean up temp files *) + Sys.remove "margins.txt"; + Sys.remove "waste.txt" + | _ -> Printf.eprintf "Call to ghostscript failed." + +let calculate_margins filename pdf range = + iter (calculate_margins filename pdf) (startends_of_range (sort compare range)) + +(* copy the contents of the box f to the box t. If mediabox_if_missing is set, +the contents of the mediabox will be used if the from fox is not available. If +mediabox_is_missing is false, the page is unaltered. *) +let copy_box f t mediabox_if_missing pdf range = + Cpdf.process_pages + (fun _ page -> + if f = "/MediaBox" then + {page with Pdfpage.rest = + (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} + else + match Pdf.lookup_direct pdf f page.Pdfpage.rest with + | Some pdfobject -> + if t = "/MediaBox" + then {page with + Pdfpage.mediabox = Pdf.direct pdf pdfobject} + else {page with Pdfpage.rest = + (Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))} + | None -> + if mediabox_if_missing + then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox} + else page) + pdf + range + +(* Remove Embedded fonts. This is done by removing the Font Descriptor. *) +let remove_fontdescriptor pdf = function + | Pdf.Dictionary d as font -> + begin match lookup "/Type" d with + | Some (Pdf.Name "/Font") -> + (match Pdf.lookup_direct pdf "/FontDescriptor" font with + | Some fontdes -> + let fontdescriptor' = + Pdf.remove_dict_entry + (Pdf.remove_dict_entry + (Pdf.remove_dict_entry fontdes "/FontFile") + "/FontFile2") + "/FontFile3" + in + Pdf.add_dict_entry font "/FontDescriptor" (Pdf.Indirect (Pdf.addobj pdf fontdescriptor')) + | _ -> font) + | _ -> font + end + | x -> x + +let remove_fonts pdf = + Pdf.objiter (fun k v -> ignore (Pdf.addobj_given_num pdf (k, remove_fontdescriptor pdf v))) pdf; + pdf + +let dump_attachment out pdf (_, embeddedfile) = + match Pdf.lookup_direct pdf "/F" embeddedfile with + | Some (Pdf.String s) -> + let efdata = + begin match Pdf.lookup_direct pdf "/EF" embeddedfile with + | Some d -> + let stream = + match Pdf.lookup_direct pdf "/F" d with + | Some s -> s + | None -> error "Bad embedded file stream" + in + Pdfcodec.decode_pdfstream_until_unknown pdf stream; + begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> b | _ -> error "Bad embedded file stream" end + | _ -> error "Bad embedded file stream" + end + in + let filename = if out = "" then s else out ^ "/" ^ s in + (*i Printf.printf "writing to %s\n" filename; i*) + let fh = open_out_bin filename in + for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done; + close_out fh + | _ -> () + +let dump_attached_document pdf out = + let root = Pdf.lookup_obj pdf pdf.Pdf.root in + let names = + match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary [] + in + match Pdf.lookup_direct pdf "/EmbeddedFiles" names with + | Some x -> + iter (dump_attachment out pdf) (Pdf.contents_of_nametree pdf x) + | None -> () + +let dump_attached_page pdf out page = + let annots = + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array l) -> l + | _ -> [] + in + let efannots = + keep + (fun annot -> + match Pdf.lookup_direct pdf "/Subtype" annot with + | Some (Pdf.Name "/FileAttachment") -> true + | _ -> false) + annots + in + let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in + iter (dump_attachment out pdf) (map (fun x -> 0, x) fsannots) + +(* Dump both document-level and page-level attached files to file, using their file names *) +let dump_attached_files pdf out = + try + dump_attached_document pdf out; + iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf) + with + _ -> error "Couldn't dump attached files" + +(* Prerotate a pdf *) +let prerotate_pdf pdf r = + let setto angle = Cpdf.rotate_pdf angle pdf (ilist 1 (Pdfpage.endpage pdf)) + and setby angle = Cpdf.rotate_pdf_by angle pdf (ilist 1 (Pdfpage.endpage pdf)) in + match r with + | Pdfmerge.DNR -> pdf + | Pdfmerge.N -> setto 0 + | Pdfmerge.S -> setto 180 + | Pdfmerge.E -> setto 90 + | Pdfmerge.W -> setto 270 + | Pdfmerge.L -> setby ~-90 + | Pdfmerge.R -> setby 90 + | Pdfmerge.D -> setby 180 + +(* Convert from unicode or PDFDocencoded to ASCII string with HTML entities in it. *) +let html_of_unicode s = + implode + (flatten + (map + (function 60 -> explode "<" + | 62 -> explode ">" + | 38 -> explode "&" + | 34 -> explode """ + | x when x >= 0x20 && x <= 0x7e -> [char_of_int x] + | x -> ['&';'#'] @ explode (string_of_int x) @ [';']) + (Pdftext.codepoints_of_pdfdocstring s))) + +(* Convert from HTML entities to a PDF string which is unicode-encoded (if there are any non-ASCII chars, or PDFDocEncoded if there aren't) . *) +let unicode_of_html s = + let rec codepoints_of_html ps = function + | '&'::'l'::'t'::';'::r -> codepoints_of_html (60::ps) r + | '&'::'g'::'t'::';'::r -> codepoints_of_html (62::ps) r + | '&'::'a'::'m'::'p'::';'::r -> codepoints_of_html (38::ps) r + | '&'::'q'::'u'::'o'::'t'::';'::r -> codepoints_of_html (34::ps) r + | '&'::'#'::r -> + begin match cleavewhile (function '0'..'9' -> true | _ -> false) r with + | [], r -> codepoints_of_html ps r + | cs, (';'::r) -> + let i = try int_of_string (implode cs) with _ -> error "bad HTML literal in update_info" in + codepoints_of_html (i::ps) r + | _ -> error "bad HTML literal in update_info 2" + end + | x::r when int_of_char x >= 0x20 && int_of_char x <= 0x7e -> codepoints_of_html (int_of_char x::ps) r + | _::r -> codepoints_of_html ps r + | [] -> rev ps + in + Pdftext.pdfdocstring_of_codepoints (codepoints_of_html [] (explode s)) + +let dump_data pdf out = + let channel = + match out with + | NoOutputSpecified -> stdout + | Stdout -> stdout + | File f -> open_out_bin f + in + let prs s = Pervasives.output_string channel s in + (* 1. Info keys *) + begin match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with + | Some (Pdf.Dictionary d) -> + iter + (function (name, pdfobj) -> + match pdfobj with + | Pdf.String s -> + begin match s with "" -> () | _ -> + begin match explode name with + | [] -> () + | h::t -> prs (Printf.sprintf "InfoKey: %s\nInfoValue: %s\n" (implode t) (html_of_unicode s)) + end + end + | _ -> ()) + d + | _ -> flprint "Warning: no info dictionary found\n"; () + end; + let hex s = + fold_left ( ^ ) "" (map (Printf.sprintf "%02x") (map int_of_char (explode s))) + in + (* 2. IDs *) + begin match Pdf.lookup_direct pdf "/ID" pdf.Pdf.trailerdict with + | Some (Pdf.Array [Pdf.String s; Pdf.String t]) -> prs (Printf.sprintf "PdfID0: %s\nPdfID1: %s\n" (hex s) (hex t)) + | _ -> () + end; + (* 3. No of pages *) + prs (Printf.sprintf "NumberOfPages: %i\n" (Pdfpage.endpage pdf)); + (* 4. Outlines *) + iter + (function m -> + prs (Printf.sprintf "BookmarkTitle: %s\n" (html_of_unicode m.Pdfmarks.text)); + prs (Printf.sprintf "BookmarkLevel: %i\n" (m.Pdfmarks.level + 1)); + prs (Printf.sprintf "BookmarkPageNumber: %i\n" (Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target))) + (Pdfmarks.read_bookmarks pdf); + (* 5. Close and finish *) + match out with File _ -> close_out channel | _ -> flush stdout + +(* Parse and update info *) +let update_info pdf source = + let channel = + match source with + | "use-stdin" -> stdin + | x -> open_in_bin x + in + let rec read_lines prev channel = + try read_lines (input_line channel::prev) channel with End_of_file -> rev prev + in + let lines = read_lines [] channel in + let kvpairs = + map + (function l -> let k, v = cleavewhile (neq ':') (explode l) in implode k, implode (tail_no_fail (tail_no_fail v))) + lines + in + (*i iter + (function (k, v) -> Printf.printf "(%s,%s)\n" k v) + kvpairs; i*) + (* Split into 1) info keys / values 2) PdfIDs, Bookmarks *) + let infolines = + keep (function (("InfoKey" | "InfoValue"), _) -> true | _ -> false) kvpairs; + and pdfidlines = + keep (function (("PdfID0" | "PdfID1"), _) -> true | _ -> false) kvpairs + and bookmarklines = + keep (function (("BookmarkTitle" | "BookmarkLevel" | "BookmarkPageNumber"), _) -> true | _ -> false) kvpairs + in + (* 1. Add/Replace info keys *) + let kvpairs = + map + (function [(_, k); (_, v)] -> k, v | _ -> error "Mismatched info Key/Value pairs") + (splitinto 2 infolines) + in + let pdf = + {pdf with Pdf.trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" + (Pdf.Dictionary + (fold_left + (fun d (k, v) -> add k v d) + (match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some (Pdf.Dictionary d) -> d | _ -> []) + (map (function (k, v) -> "/" ^ k, Pdf.String (unicode_of_html v)) kvpairs)))} + in + (* 2. Add/Replace PDF Id *) + let pdf = + let unhex s = + match Pdfread.lex_hexstring (Pdfio.input_of_string ("<" ^ s ^ ">")) with + | Pdfgenlex.LexString s -> s + | _ -> error "PDFId wrongly formed in update_info file" + in + match pdfidlines with + | ["PdfID0", a; "PdfID1", b] -> + {pdf with Pdf.trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/ID" (Pdf.Array [Pdf.String (unhex a); Pdf.String (unhex b)])} + | _ -> pdf + in + (* 3. Replace Bookmarks *) + let marks = + map + (function + | [("BookmarkTitle", a); ("BookmarkLevel", b); ("BookmarkPageNumber", c)] -> + {Pdfmarks.level = int_of_string b - 1; + Pdfmarks.text = unicode_of_html a; + Pdfmarks.target = Pdfpage.target_of_pagenumber pdf (int_of_string c); + Pdfmarks.isopen = false} + | _ -> error "Bookmark entries malformed in update_info file") + (splitinto 3 bookmarklines) + in + let pdf = Pdfmarks.add_bookmarks marks pdf in + begin match source with "use-stdin" -> () | _ -> close_in channel end; + pdf + +(* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *) +let equalize_pages_extend main stamp = + let length_stamp = Pdfpage.endpage stamp + in let length_main = Pdfpage.endpage main + in let extend_lastpage lastpage page len = + Pdfpage.change_pages true page (Pdfpage.pages_of_pagetree page @ (many lastpage len)) + in let chop pdf n = + Pdfpage.change_pages true pdf (take (Pdfpage.pages_of_pagetree pdf) n) + in + if length_stamp > length_main + then chop stamp length_main + else extend_lastpage (last (Pdfpage.pages_of_pagetree stamp)) stamp (length_main - length_stamp) + +let remove_unused_resources_page pdf n page = + let xobjects, all_names = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d + | _ -> Pdf.Dictionary [], [] + in + let names_to_keep = + option_map + (function Pdfops.Op_Do n -> Some n | _ -> None) + (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) + in + let names_to_remove = lose (mem' names_to_keep) all_names in + let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in + {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} + +let remove_unused_resources pdf = + Cpdf.process_pages (remove_unused_resources_page pdf) pdf (ilist 1 (Pdfpage.endpage pdf)) + +let extract_page_text pdf _ page = + let text_extractor = ref None in + fold_left ( ^ ) "" + (map + (function + | Pdfops.Op_Tf (fontname, _) -> + let fontdict = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | None -> raise (Pdf.PDFError "Missing /Font in text extraction") + | Some d -> + match Pdf.lookup_direct pdf fontname d with + | None -> raise (Pdf.PDFError "Missing font in text extraction") + | Some d -> d + in + text_extractor := Some (Pdftext.text_extractor_of_font pdf fontdict); + "" + | Pdfops.Op_Tj text when !text_extractor <> None -> + Pdftext.utf8_of_codepoints + (Pdftext.codepoints_of_text (unopt !text_extractor) text) + | Pdfops.Op_TJ (Pdf.Array objs) when !text_extractor <> None -> + fold_left ( ^ ) "" + (option_map + (function + | Pdf.String text -> + Some + (Pdftext.utf8_of_codepoints + (Pdftext.codepoints_of_text (unopt !text_extractor) text)) + | _ -> None) + objs) + | _ -> "") + (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)) + +(* For each page, extract all the ops with text in them, and concatenate it all together *) +let extract_text pdf range = + fold_left ( ^ ) "" + (Cpdf.map_pages (extract_page_text pdf) pdf range) + +(* Extracts font to font.dat in CWD. *) +let extract_fontfile pagenumber fontname pdf = + let resources = (select pagenumber (Pdfpage.pages_of_pagetree pdf)).Pdfpage.resources in + match Pdf.lookup_direct pdf "/Font" resources with + | None -> failwith "extract_fontfile: font not found" + | Some fonts -> + let fontobj = Pdf.lookup_fail "no /Fonts" pdf fontname fonts in + let font = Pdftext.read_font pdf fontobj in + match font with + | Pdftext.CIDKeyedFont (_, {Pdftext.cid_fontdescriptor = {Pdftext.fontfile = Some fontfile}}, _) + | Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.fontfile = Some fontfile}} -> + begin let objnum = + match fontfile with + | Pdftext.FontFile i -> i + | Pdftext.FontFile2 i -> i + | Pdftext.FontFile3 i -> i + in + match Pdf.lookup_obj pdf objnum with + | Pdf.Stream s as obj -> + Pdfcodec.decode_pdfstream pdf obj; + begin match s with + | {contents = (_, Pdf.Got bytes)} -> + let fh = open_out_bin "font.dat" in + for x = 0 to bytes_size bytes - 1 do output_byte fh (bget bytes x) done; + close_out fh; + (* Now try to read using Pdfcff module *) + (*let font = Pdftruetype.to_type3 pdf font in*) + (*let extractor = Pdftext.text_extractor_of_font pdf fontobj in*) + (*flprint "glyph names for incodes 0,1,2,3..."; + iter print_string (Pdftext.glyphnames_of_text extractor "\000\001\002\003\004\005\006\007"); + flprint "\n";*) + () + | _ -> failwith "extract_fontfile" + end + | _ -> failwith "extract_fontfile" + end + | _ -> failwith "unsupported or unfound font" + +let addrectangle (w, h) color position relative_to_cropbox underneath range pdf = + let addrectangle_page _ page = + page + in + Cpdf.process_pages addrectangle_page pdf range + +(* \section{Main function} *) +let go () = + match args.op with + | Some Version -> + flprint + ("cpdf Version " ^ string_of_int major_version ^ "." ^ string_of_int minor_version ^ " " ^ version_date ^ "\n") + | None | Some Merge -> + (* 14th Sept 2010 - changed to write_pdf false pdf because we're not supposed to change it.*) + begin match args.out, args.inputs with + | _, (_::_ as inputs) -> + let write_pdf x pdf = + match args.keep_this_id with + | None -> write_pdf x pdf + | Some s -> + (* get the ID from the file with name 's', and copy to pdf *) + let namewiths = + keep (function (InFile s', _, _, _, _) when s' = s -> true | _ -> false) inputs + in + match namewiths with + | (namewiths, _, _, _, _) as input::t -> + let spdf = get_pdf_from_input_kind input (Some Decrypt) namewiths in + write_pdf x (Cpdf.copy_id true spdf pdf) + | _ -> write_pdf x pdf + in + let names, ranges, rotations, _, _ = split5 inputs in + let pdfs = map2 (fun i -> get_pdf_from_input_kind i (Some Decrypt)) inputs names in + (* If at least one file had object streams and args.preserve_objstm is true, set -objstm-create *) + if args.preserve_objstm then + iter + (fun pdf -> + if Hashtbl.length pdf.Pdf.objects.Pdf.object_stream_ids > 0 + then args.create_objstm <- true) + pdfs; + begin match pdfs with + | [pdf] -> + (* FIXME Here, if the output file is different from the + input file, and we're just extracting pages, might we use a + lazy read? *) + (*Pdfwrite.debug_whole_pdf pdf;*) + if hd ranges <> "all" || hd rotations <> Pdfmerge.DNR || !Pdfpage.flat_pagetrees then + let pdf = if hd rotations <> Pdfmerge.DNR then prerotate_pdf pdf (hd rotations) else pdf in + let range = parse_pagespec pdf (hd ranges) in + let newpdf = Pdfpage.pdf_of_pages ~retain_numbering:args.retain_numbering pdf range in + write_pdf false newpdf + else + write_pdf false pdf + | _ -> + (* If args.keep_this_id is set, change the ID to the one from the kept one *) + let rangenums = map2 parse_pagespec pdfs ranges in + let outpdf = + Pdfmerge.merge_pdfs ~rotations args.retain_numbering args.remove_duplicate_fonts + (map string_of_input_kind names) pdfs rangenums + in + write_pdf false outpdf + end + | _ -> + match args.op with + | Some Merge -> + error "Merge: Must specify one output and at least one input" + | None -> + error "Must specify one output and at least one input" + | _ -> assert false + end + | Some RemoveUnusedResources -> + begin match args.inputs, args.out with + | _::_, _ -> + let pdf = get_single_pdf (Some RemoveUnusedResources) false in + let outpdf = remove_unused_resources pdf in + write_pdf true outpdf + | _ -> error "RemoveUnusedResources: bad command line" + end + | Some (CopyFont fromfile) -> + begin match args.inputs, args.out with + | (_, pagespec, _, u, o)::_, _ -> + let pdf = get_single_pdf (Some (CopyFont fromfile)) false + and frompdf = pdfread_pdf_of_file (optstring u) (optstring o) fromfile in + let range = parse_pagespec pdf pagespec in + let copyfontname = + match args.copyfontname with + | Some x -> x + | None -> failwith "copy_font: no font name given" + in + let outpdf = copy_font frompdf copyfontname args.copyfontpage range pdf in + write_pdf true outpdf + | _ -> error "copyfont: bad command line" + end + | Some RemoveFonts -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf (Some RemoveFonts) false in + write_pdf true (remove_fonts pdf) + | _ -> error "remove fonts: bad command line" + end + | Some ExtractFontFile -> + (*Graphics.open_graph " 1600x1050";*) + begin match args.inputs, args.out with + | (_, pagespec, _, u, o)::_, _ -> + let pdf = get_single_pdf (Some ExtractFontFile) false in + let page = args.copyfontpage + and name = + match args.copyfontname with + | Some x -> x + | None -> failwith "extract fontfile: no font name given" + in + extract_fontfile page name pdf + | _ -> error "extract fontfile: bad command line" + end + | Some CountPages -> + let pdf, inname, input = + match args.inputs with + | (InFile inname, _, _, u, o) as input::_ -> pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname), inname, input + | (StdIn, _, _, u, o) as input::_ -> pdf_of_stdin u o, "", input + | (AlreadyInMemory pdf, _, _, _, _) as input::_ -> pdf, "", input + | _ -> raise (Arg.Bad "cpdf: No input specified.\n") + in + let pdf = decrypt_if_necessary input (Some Info) pdf in + output_page_count pdf + | Some Clean -> + begin match args.out with + | (File _ | Stdout) -> + let pdf' = get_single_pdf (Some Clean) false in + write_pdf false pdf' + | _ -> error "Clean: No output specified" + end + | Some Info -> + (* Change as of 17th Sept 08 - now presents the pdf undecrypted so that + encryption info can be read out *) + let pdf, inname, input = + match args.inputs with + | (InFile inname, _, _, u, o) as input::_ -> + pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname), inname, input + | (StdIn, _, _, u, o) as input::_ -> pdf_of_stdin u o, "", input + | (AlreadyInMemory pdf, _, _, _, _) as input::_ -> pdf, "", input + | _ -> raise (Arg.Bad "cpdf: No input specified.\n") + in + Printf.printf "Encryption: %s\n" (getencryption pdf); + Printf.printf "Permissions: %s\n" (getpermissions pdf); + if inname <> "" then + Printf.printf "Linearized: %b\n" (Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname))); + let pdf = decrypt_if_necessary input (Some Info) pdf in + Cpdf.output_info args.encoding pdf + | Some PageInfo -> + Cpdf.output_page_info (get_single_pdf (Some PageInfo) true) + | Some Metadata -> + Cpdf.print_metadata (get_single_pdf (Some Metadata) true) + | Some Fonts -> + Cpdf.print_fonts (get_single_pdf (Some Fonts) true) + | Some ListBookmarks -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf pagespec in + Cpdf.list_bookmarks args.encoding range pdf (Pdfio.output_of_channel stdout); + flush stdout + | _ -> error "list-bookmarks: bad command line" + end + | Some Crop -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let x, y, w, h = args.rectangle in + let pdf = get_single_pdf (Some Crop) false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.crop_pdf x y w h pdf range in + write_pdf false pdf + | _ -> error "crop: bad command line" + end + + | Some MediaBox -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let x, y, w, h = args.rectangle in + let pdf = get_single_pdf (Some MediaBox) false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.set_mediabox x y w h pdf range in + write_pdf false pdf + | _ -> error "set media box: bad command line" + end + | Some CopyBox -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf (Some CopyBox) false in + let range = parse_pagespec pdf pagespec in + let f, t = + begin match args.frombox, args.tobox with + | Some f, Some t -> f, t + | _ -> error "Copy box: no tobox or no frombox specified" + end + in + let pdf = copy_box f t args.mediabox_if_missing pdf range in + write_pdf false pdf + | _ -> error "Copy Box: bad command line" + end + | Some Decompress -> + let pdf = get_single_pdf (Some Decompress) true in + Pdf.iter_stream + (function stream -> + try Pdfcodec.decode_pdfstream_until_unknown pdf stream with + e -> Printf.eprintf "Decode failure: %s. Carrying on...\n" (Printexc.to_string e); ()) + pdf; + begin match args.out with + | NoOutputSpecified -> + error "no output specified" + | File outname -> + let outname = writing_ok outname in + Pdfwrite.pdf_to_file_options args.linearize None args.makenewid pdf outname + | Stdout -> + Pdfwrite.pdf_to_channel args.linearize None args.makenewid pdf stdout; + flush stdout + end + | Some Compress -> + let pdf = get_single_pdf (Some Compress) false in + if args.remove_duplicate_streams then + Pdfmerge.remove_duplicate_fonts pdf; (* Will eventually do more! *) + write_pdf false (Cpdf.recompress_pdf pdf) + | Some RemoveCrop -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf (Some RemoveCrop) false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.remove_cropping_pdf pdf range in + write_pdf false pdf + | _ -> error "remove-crop: bad command line" + end + | Some CopyCropBoxToMediaBox -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf (Some CopyCropBoxToMediaBox) false in + let range = parse_pagespec pdf pagespec in + let pdf = copy_cropbox_to_mediabox pdf range in + write_pdf false pdf + | _ -> error "remove-crop: bad command line" + end + | Some (Rotate _) | Some (Rotateby _) -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf pagespec in + let rotate = + match args.op with + | Some (Rotate i) -> Cpdf.rotate_pdf i + | Some (Rotateby i) -> Cpdf.rotate_pdf_by i + | _ -> assert false + in + let pdf = rotate pdf range in + write_pdf false pdf + | _ -> error "rotate: bad command line" + end + | Some (RotateContents a) -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.rotate_contents ~fast:args.fast a pdf range in + write_pdf false pdf + | _ -> error "rotate-contents: bad command line" + end + | Some Upright -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.upright ~fast:args.fast range pdf in + write_pdf false pdf + | _ -> error "rotate-contents: bad command line" + end + | Some ((VFlip | HFlip) as flip) -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf pagespec in + let pdf = + if flip = VFlip + then Cpdf.vflip_pdf ~fast:args.fast pdf range + else Cpdf.hflip_pdf ~fast:args.fast pdf range + in + write_pdf false pdf + | _ -> error "flip: bad command line" + end + | Some ((SetAuthor _ | SetTitle _ | SetSubject _ | SetKeywords _ + | SetCreate _ | SetModify _ | SetCreator _ | SetProducer _ + | SetTrapped | SetUntrapped) as op) -> + let key, value, version = + let f s = if args.encoding <> Cpdf.Raw then Pdftext.pdfdocstring_of_utf8 s else s in + match op with + | SetAuthor s -> "/Author", Pdf.String (f s), 0 + | SetTitle s -> "/Title", Pdf.String (f s), 1 + | SetSubject s -> "/Subject", Pdf.String (f s), 1 + | SetKeywords s -> "/Keywords", Pdf.String (f s), 1 + | SetCreate s -> "/CreationDate", Pdf.String (Cpdf.expand_date s), 0 + | SetModify s -> "/ModDate", Pdf.String (Cpdf.expand_date s), 0 + | SetCreator s -> "/Creator", Pdf.String (f s), 0 + | SetProducer s -> "/Producer", Pdf.String (f s), 0 + | SetTrapped -> "/Trapped", Pdf.Boolean true, 3 + | SetUntrapped -> "/Trapped", Pdf.Boolean false, 3 + | _ -> assert false + in + let pdf = get_single_pdf args.op false in + let version = if args.keepversion then pdf.Pdf.minor else version in + write_pdf false (Cpdf.set_pdf_info (key, value, version) pdf) + | Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _ + | FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) -> + begin match args.out with + | _ -> + let key, value, version = + match op with + | HideToolbar s -> "/HideToolbar", Pdf.Boolean s, 0 + | HideMenubar s -> "/HideMenubar", Pdf.Boolean s, 0 + | HideWindowUI s -> "/HideWindowUI", Pdf.Boolean s, 0 + | FitWindow s -> "/FitWindow", Pdf.Boolean s, 0 + | CenterWindow s -> "/CenterWindow", Pdf.Boolean s, 0 + | DisplayDocTitle s -> "/DisplayDocTitle", Pdf.Boolean s, 4 + | _ -> assert false + in + let pdf = get_single_pdf args.op false in + let version = if args.keepversion then pdf.Pdf.minor else version in + write_pdf false (Cpdf.set_viewer_preference (key, value, version) pdf) + end + | Some (SetMetadata metadata_file) -> + write_pdf false (Cpdf.set_metadata args.keepversion metadata_file (get_single_pdf args.op false)) + | Some (SetVersion v) -> + let pdf = get_single_pdf args.op false in + write_pdf false {pdf with Pdf.minor = v} + | Some (SetPageLayout s) -> + write_pdf false (Cpdf.set_page_layout (get_single_pdf args.op false) s) + | Some (SetPageMode s) -> + write_pdf false (Cpdf.set_page_mode (get_single_pdf args.op false) s) + | Some Split -> + begin match args.inputs, args.out with + | [(f, ranges, _, _, _)], File output_spec -> + let pdf = get_single_pdf args.op true + and filename = + match f with + | InFile n -> n + | _ -> "" + in + let enc = + match args.crypt_method with + | "" -> None + | _ -> + Some + {Pdfwrite.encryption_method = + (match args.crypt_method with + | "40bit" -> Pdfwrite.PDF40bit + | "128bit" -> Pdfwrite.PDF128bit + | "AES" -> Pdfwrite.AES128bit args.encrypt_metadata + | "AES256" -> Pdfwrite.AES256bit args.encrypt_metadata + | "AES256ISO" -> Pdfwrite.AES256bitISO args.encrypt_metadata + | _ -> assert false (* Pre-checked *)); + Pdfwrite.owner_password = args.owner; + Pdfwrite.user_password = args.user; + Pdfwrite.permissions = banlist_of_args ()} + in + Cpdf.split_pdf enc args.printf_format filename args.chunksize args.linearize nobble output_spec pdf + | _, Stdout -> error "Can't split to standard output" + | _, NoOutputSpecified -> error "Split: No output format specified" + | _ -> error "Split: bad parameters" + end + | Some Presentation -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + let pdf' = + Cpdf.presentation + range + args.transition args.duration args.horizontal + args.inward args.direction args.effect_duration pdf + in + pdf.Pdf.minor <- if args.keepversion then pdf.Pdf.minor else max pdf.Pdf.minor 1; + write_pdf false pdf' + | Some ChangeId -> + begin match args.inputs, args.out with + | [(k, _, _, _, _) as input], File s -> + let pdf = get_pdf_from_input_kind input args.op k in + let s = writing_ok s in + Pdfwrite.pdf_to_file_options + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize None true pdf s + | [(k, _, _, _, _) as input], Stdout -> + let pdf = get_pdf_from_input_kind input args.op k in + Pdfwrite.pdf_to_channel + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize None true pdf stdout; + flush stdout + | _ -> error "ChangeId: exactly one input file and output file required." + end + | Some RemoveId -> + let pdf = get_single_pdf args.op false in + pdf.Pdf.trailerdict <- Pdf.remove_dict_entry pdf.Pdf.trailerdict "/ID"; + write_pdf false pdf + | Some (CopyId getfrom) -> + begin match args.inputs with + | [(k, _, _, u, o) as input] -> + let pdf = + Cpdf.copy_id + args.keepversion + (pdfread_pdf_of_file (optstring u) (optstring o) getfrom) + (get_pdf_from_input_kind input args.op k) + in + write_pdf false pdf + | _ -> error "copy-id: No input file specified" + end + | Some (ThinLines w) -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.thinlines range w pdf) + | Some BlackText -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.blacktext range pdf) + | Some BlackLines -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.blacklines range pdf) + | Some BlackFills -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.blackfills range pdf) + | Some RemoveAnnotations -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.remove_annotations range pdf) + | Some (CopyAnnotations getfrom) -> + begin match args.inputs with + | [(k, _, _, u, o) as input] -> + let input_pdf = get_pdf_from_input_kind input args.op k in + let range = parse_pagespec input_pdf (get_pagespec ()) in + let pdf = + Cpdf.copy_annotations + range + (pdfread_pdf_of_file (optstring u) (optstring o) getfrom) + input_pdf + in + write_pdf false pdf + | _ -> error "copy-annotations: No input file specified" + end + | Some ListAnnotations -> + Cpdf.list_annotations args.encoding (get_single_pdf args.op true) + | Some ListAnnotationsMore -> + Cpdf.list_annotations_more (get_single_pdf args.op true) + | Some Shift -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + begin match args.coord with (dx, dy) -> + write_pdf false (Cpdf.shift_pdf ~fast:args.fast dx dy pdf range) + end + | Some Scale -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + begin match args.coord with (sx, sy) -> + write_pdf false (Cpdf.scale_pdf ~fast:args.fast sx sy pdf range) + end + | Some ScaleToFit -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + let x, y = args.coord + and scale = args.scale in + write_pdf false (Cpdf.scale_to_fit_pdf ~fast:args.fast scale x y args.op pdf range) + | Some (ScaleContents scale) -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.scale_contents ~fast:args.fast args.position scale pdf range) + | Some ListAttachedFiles -> + let pdf = get_single_pdf args.op false in + let names, numbers = split (Cpdf.list_attached_files pdf) in + iter2 (Printf.printf "%i %s\n") numbers names; + flprint "" + | Some DumpAttachedFiles -> + let pdf = get_single_pdf args.op false in + begin match args.out with + | NoOutputSpecified -> dump_attached_files pdf "" + | File n -> dump_attached_files pdf n + | Stdout -> error "Can't dump attachments to stdout" + end + | Some RemoveAttachedFiles -> + write_pdf false (Cpdf.remove_attached_files (get_single_pdf args.op false)) + | Some (AttachFile files) -> + begin match args.inputs with + | [(k, _, _, _, _) as input] -> + let pdf = get_pdf_from_input_kind input args.op k in + (* Convert from string to int *) + let topage = + try + match args.topage with + | None -> None + | Some "end" -> Some (Pdfpage.endpage pdf) + | Some s -> Some (int_of_string s) + with _ -> error "Bad -to-page" + in + let pdf = fold_left (Cpdf.attach_file args.keepversion topage) pdf (rev files) in + write_pdf false pdf + | _ -> error "attach file: No input file specified" + end + | Some (SplitOnBookmarks level) -> + begin match args.out with + | File output_spec -> + let pdf = get_single_pdf args.op false + and filename = + match args.inputs with + | [(InFile f, _, _, _, _)] -> f + | _ -> "" + in + Cpdf.split_at_bookmarks filename args.linearize nobble level output_spec pdf + | Stdout -> error "Can't split to standard output" + | NoOutputSpecified -> error "Split: No output format specified" + end + | Some PadBefore -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.padbefore range pdf) + | Some PadAfter -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.padafter range pdf) + | Some (PadEvery n) -> + let pdf = get_single_pdf args.op false in + let range = + match keep (function m -> m mod n = 0) (ilist 1 (Pdfpage.endpage pdf)) with + | [] -> [] + | l -> if last l = Pdfpage.endpage pdf then all_but_last l else l + in + write_pdf false (Cpdf.padafter range pdf) + | Some (PadMultiple n) -> + let pdf = get_single_pdf args.op false in + write_pdf false (Cpdf.padmultiple n pdf) + | Some Draft -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.draft args.boxes range pdf) + | Some (AddText text) -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + let font = + match args.font with + | StandardFont f -> Some f + | OtherFont f -> None (* it's in fontname *) + in + let pdf = + if args.prerotate then Cpdf.upright ~fast:args.fast range pdf else pdf + and filename = + match args.inputs with + | (InFile inname, _, _, _, _)::_ -> inname + | _ -> "" + in + write_pdf false + (Cpdf.addtexts + false args.linewidth args.outline args.fast args.fontname font args.bates + args.color args.position args.linespacing args.fontsize + args.underneath text range args.orientation args.relative_to_cropbox args.opacity + args.justification args.midline filename pdf) + | Some RemoveText -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.removetext range pdf) + | Some AddRectangle -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false + (addrectangle + args.coord args.color args.position args.relative_to_cropbox args.underneath range pdf) + | Some (AddBookmarks file) -> + write_pdf false + (Cpdf.add_bookmarks true (Pdfio.input_of_channel (open_in_bin file)) + (get_single_pdf args.op false)) + | Some RemoveBookmarks -> + write_pdf false (Pdfmarks.remove_bookmarks (get_single_pdf args.op false)) + | Some TwoUp -> + write_pdf false (Cpdf.twoup (get_single_pdf args.op false)) + | Some TwoUpStack -> + write_pdf false (Cpdf.twoup_stack (get_single_pdf args.op false)) + | Some (StampOn over) -> + let overpdf = + match over with + | "stamp_use_stdin" -> pdf_of_stdin "" "" + | x -> pdfread_pdf_of_file None None x + in + let overpdf = if args.uprightstamp then Cpdf.upright ~fast:args.fast (ilist 1 (Pdfpage.endpage overpdf)) overpdf else overpdf in + let pdf = get_single_pdf args.op false in + let marks = Pdfmarks.read_bookmarks pdf in + let range = parse_pagespec pdf (get_pagespec ()) in + let pdf = + if args.ismulti + then + let overpdf = equalize_pages_extend pdf overpdf in + Cpdf.combine_pages args.fast pdf overpdf true false false + else + Cpdf.stamp args.fast args.scale_stamp_to_fit true range overpdf pdf + in + write_pdf false (Pdfmarks.add_bookmarks marks pdf) + | Some (StampUnder under) -> + let underpdf = + match under with + | "stamp_use_stdin" -> pdf_of_stdin "" "" + | x -> pdfread_pdf_of_file None None x + in + let underpdf = if args.uprightstamp then Cpdf.upright ~fast:args.fast (ilist 1 (Pdfpage.endpage underpdf)) underpdf else underpdf in + let pdf = get_single_pdf args.op false in + let marks = Pdfmarks.read_bookmarks pdf in + let range = parse_pagespec pdf (get_pagespec ()) in + let pdf = + if args.ismulti + then + let underpdf = equalize_pages_extend pdf underpdf in + Cpdf.combine_pages args.fast pdf underpdf true true false + else Cpdf.stamp args.fast args.scale_stamp_to_fit false range underpdf pdf + in + write_pdf false (Pdfmarks.add_bookmarks marks pdf) + | Some (CombinePages over) -> + write_pdf false + (Cpdf.combine_pages args.fast (get_single_pdf args.op false) (pdfread_pdf_of_file None None over) false false true) + | Some Encrypt -> + let pdf = get_single_pdf args.op false in + let pdf = Cpdf.recompress_pdf <| nobble pdf + and encryption = + {Pdfwrite.encryption_method = + (match args.crypt_method with + | "40bit" -> Pdfwrite.PDF40bit + | "128bit" -> Pdfwrite.PDF128bit + | "AES" -> Pdfwrite.AES128bit args.encrypt_metadata + | "AES256" -> Pdfwrite.AES256bit args.encrypt_metadata + | "AES256ISO" -> Pdfwrite.AES256bitISO args.encrypt_metadata + | _ -> assert false (* Pre-checked *)); + Pdfwrite.owner_password = args.owner; + Pdfwrite.user_password = args.user; + Pdfwrite.permissions = banlist_of_args ()} + in + Pdf.remove_unreferenced pdf; + if not args.keepversion then + begin + let newversion = + match args.crypt_method with + "40bit" -> 1 | "128bit" -> 4 | "AES" -> 6 | "AES256" | "AES256ISO" -> 7 | _ -> 0 + in + let newversion = if args.create_objstm then 5 else newversion in + pdf.Pdf.minor <- max pdf.Pdf.minor newversion + end; + begin match args.out with + | NoOutputSpecified -> + error "encrypt: no output specified" + | File outname -> + let outname = writing_ok outname in + Pdfwrite.pdf_to_file_options + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize (Some encryption) args.makenewid pdf outname + | Stdout -> + Pdfwrite.pdf_to_channel + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize (Some encryption) args.makenewid pdf stdout; + flush stdout; + end + | Some Decrypt -> + write_pdf false (get_single_pdf args.op false) + | Some RemoveMetadata -> + write_pdf false (Cpdf.remove_metadata (get_single_pdf args.op false)) + | Some ExtractImages -> + let output_spec = + begin match args.out with + | File output_spec -> output_spec + | _ -> "" + end + in + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + extract_images pdf range output_spec + | Some (ImageResolution f) -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + image_resolution pdf range f + | Some MissingFonts -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + missing_fonts pdf range + | Some CSP1 -> + write_pdf false (Cpdf.custom_csp1 (get_single_pdf (Some CSP1) false)) + | Some (CSP2 f) -> + write_pdf false (Cpdf.custom_csp2 f (get_single_pdf (Some (CSP2 f)) false)) + | Some CSP3 -> + begin match args.inputs with + | [InFile s, _, _, _, _] -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + calculate_margins s pdf range + | _ -> + Printf.eprintf "CSP3: Too many input files or input not a file" + end + | Some DumpData -> + let pdf = get_single_pdf args.op true in + dump_data pdf args.out + | Some (UpdateInfo source) -> + let pdf = get_single_pdf args.op false in + write_pdf false (update_info pdf source) + | Some ExtractText -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + let text = extract_text pdf range in + begin match args.out with + | File filename -> + let fh = open_out_bin filename in + output_string fh text; + close_out fh + | NoOutputSpecified | Stdout -> + print_string text; + print_newline () + end + | Some PrintLinearization -> + begin match args.inputs with + | (InFile inname, _, _, u, o)::_ -> + Pdfread.print_linearization (Pdfio.input_of_channel (open_in_bin inname)) + | _ -> raise (Arg.Bad "-print-linearization: supply a single file name") + end + +let parse_argv () = + Arg.parse_argv ~current:(ref 0) + +let align_specs s = + Arg.align s + +(* Main function. *) +let go_withargv argv = + if demo then + flprint "This demo is for evaluation only. http://www.coherentpdf.com/\n"; + try + (* Split the arguments into sets either side of ANDs *) + let sets = + let args = + (map (fun l -> "cpdf"::l) (split_around (eq "AND") (tl (Array.to_list argv)))) + in + match args with + | [] -> [] + | _ -> combine (map Array.of_list args) (map (eq (length args)) (ilist 1 (length args))) + in + iter + (fun (s, islast) -> + (*Printf.printf "AND:%b, %s\n" islast (Array.fold_left (fun x y -> x ^ " " ^ y) "" s); + flprint "\n";*) + reset_arguments (); + parse_argv () s (align_specs specs) anon_fun usage_msg; + parse_argv () (Array.of_list ("cpdf"::!control_args)) (align_specs specs) anon_fun usage_msg; + let addrange pdf = AlreadyInMemory pdf, args.dashrange, Pdfmerge.DNR, "", "" in + args.inputs <- rev (map addrange !output_pdfs) @ rev args.inputs; + output_pdfs := []; + go ()) + sets; + flush stdout (*r for Windows *) + with + | Arg.Bad s -> + let s' = + let chars = explode s in + implode (takewhile (neq '\n') chars) ^ " Use -help for help.\n\n" + in + prerr_string s'; + flush stderr; + exit 2 + | Arg.Help _ -> + Arg.usage (align_specs specs) usage_msg; + flush stderr (*r for Windows *) + | Sys_error s as e -> + prerr_string (s ^ "\n\n"); + flush stderr; + if args.debug then raise e else exit 2 + | Pdf.PDFError s as e -> + prerr_string + ("cpdf encountered an error. Technical details follow:\n\n" ^ s ^ "\n\n"); + flush stderr; + if args.debug then raise e else exit 2 + | Cpdf.SoftError s -> soft_error s + | Cpdf.HardError s -> error s + | e -> + prerr_string + ("cpdf encountered an unexpected error. Technical Details follow:\n" ^ + Printexc.to_string e ^ "\n\n"); + flush stderr; + if args.debug then raise e else exit 2 + +let go () = go_withargv Sys.argv + diff --git a/cpdfcommand.mli b/cpdfcommand.mli new file mode 100644 index 0000000..526661f --- /dev/null +++ b/cpdfcommand.mli @@ -0,0 +1,7 @@ +(* cpdfcommand.mli *) +val go : unit -> unit + +val go_withargv : string array -> unit + +val demo : bool + diff --git a/cpdfcommandrun.ml b/cpdfcommandrun.ml new file mode 100644 index 0000000..2a43a43 --- /dev/null +++ b/cpdfcommandrun.ml @@ -0,0 +1,2 @@ +let _ = Cpdfcommand.go () + diff --git a/cpdfstrftime.ml b/cpdfstrftime.ml new file mode 100644 index 0000000..03cd8fa --- /dev/null +++ b/cpdfstrftime.ml @@ -0,0 +1,93 @@ +(* C-Style strftime *) +open Pdfutil + +let strf_A t = + match t.Unix.tm_wday with + | 0 -> "Sunday" | 1 -> "Monday" | 2 -> "Tuesday" + | 3 -> "Wednesday" | 4 -> "Thursday" | 5 -> "Friday" + | 6 -> "Saturday" + | _ -> "strf_AFail" + +let strf_a t = + String.sub (strf_A t) 0 3 + +let strf_B t = + match t.Unix.tm_mon with + | 0 -> "January" | 1 -> "February" | 2 -> "March" | 3 -> "April" + | 4 -> "May" | 5 -> "June" | 6 -> "July" | 7 -> "August" + | 8 -> "September" | 9 -> "October" | 10 -> "November" + | 11 -> "December" | _ -> "strf_Bfail" + +let strf_b t = + String.sub (strf_B t) 0 3 + +let strf_d t = + let s = string_of_int t.Unix.tm_mday in + if String.length s = 1 then "0" ^ s else s + +let strf_e t = + let s = string_of_int t.Unix.tm_mday in + if String.length s = 1 then " " ^ s else s + +let strf_H t = + let s = string_of_int t.Unix.tm_hour in + if String.length s = 1 then "0" ^ s else s + +let strf_I t = + let s = string_of_int (t.Unix.tm_hour mod 12) in + if String.length s = 1 then "0" ^ s else s + +let strf_j t = + let s = string_of_int t.Unix.tm_yday in + match String.length s with + | 1 -> "00" ^ s + | 2 -> "0" ^ s + | _ -> s + +let strf_m t = + let s = string_of_int (t.Unix.tm_mon + 1) in + if String.length s = 1 then "0" ^ s else s + +let strf_M t = + let s = string_of_int t.Unix.tm_min in + if String.length s = 1 then "0" ^ s else s + +let strf_p t = + if t.Unix.tm_hour >= 12 then "p.m" else "a.m" + +let strf_S t = + let s = string_of_int t.Unix.tm_sec in + if String.length s = 1 then "0" ^ s else s + +let strf_T t = + strf_H t ^ ":" ^ strf_M t ^ ":" ^ strf_S t + +let strf_u t = + match t.Unix.tm_wday with + | 0 -> "7" + | n -> string_of_int (n + 1) + +let strf_w t = + string_of_int t.Unix.tm_wday + +let strf_Y t = + string_of_int (t.Unix.tm_year + 1900) + +let strf_percent _ = "%" + +let strftime_pairs = + ["%a", strf_a; "%A", strf_A; "%b", strf_b; "%B", strf_B; + "%d", strf_d; "%e", strf_e; "%H", strf_H; + "%I", strf_I; "%j", strf_j; "%m", strf_m; "%M", strf_M; + "%p", strf_p; "%S", strf_S; "%T", strf_T; "%u", strf_u; + "%w", strf_w; "%Y", strf_Y; "%%", strf_percent] + +let strftime text = + let time = Unix.localtime (Unix.gettimeofday ()) in + let text = ref text in + iter + (fun (search, replace_fun) -> + text := string_replace_all search (replace_fun time) !text) + strftime_pairs; + !text + diff --git a/cpdfstrftime.mli b/cpdfstrftime.mli new file mode 100644 index 0000000..e9a366a --- /dev/null +++ b/cpdfstrftime.mli @@ -0,0 +1,2 @@ +(* C-style strftime *) +val strftime : string -> string