diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cfaacd8 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +# Build the cpdf command line tools and top level +SOURCES = cpdfstrftime.mli cpdfstrftime.ml cpdf.mli cpdf.ml cpdfcommand.mli \ +cpdfcommand.ml cpdfcommandrun.ml + +RESULT = cpdf + +PACKS = camlpdf + +OCAMLNCFLAGS = -g +OCAMLBCFLAGS = -g +OCAMLLDFLAGS = -g + +all : native-code top htdoc + +-include OCamlMakefile + diff --git a/OCamlMakefile b/OCamlMakefile new file mode 100644 index 0000000..43dc3e2 --- /dev/null +++ b/OCamlMakefile @@ -0,0 +1,1299 @@ +########################################################################### +# OCamlMakefile +# Copyright (C) 1999- Markus Mottl +# +# For updates see: +# http://www.ocaml.info/home/ocaml_sources.html +# +########################################################################### + +# Modified by damien for .glade.ml compilation + +# Set these variables to the names of the sources to be processed and +# the result variable. Order matters during linkage! + +ifndef SOURCES + SOURCES := foo.ml +endif +export SOURCES + +ifndef RES_CLIB_SUF + RES_CLIB_SUF := _stubs +endif +export RES_CLIB_SUF + +ifndef RESULT + RESULT := foo +endif +export RESULT := $(strip $(RESULT)) + +export LIB_PACK_NAME + +ifndef DOC_FILES + DOC_FILES := $(filter %.mli, $(SOURCES)) +endif +export DOC_FILES +FIRST_DOC_FILE := $(firstword $(DOC_FILES)) + +export BCSUFFIX +export NCSUFFIX + +ifndef TOPSUFFIX + TOPSUFFIX := .top +endif +export TOPSUFFIX + +# Eventually set include- and library-paths, libraries to link, +# additional compilation-, link- and ocamlyacc-flags +# Path- and library information needs not be written with "-I" and such... +# Define THREADS if you need it, otherwise leave it unset (same for +# USE_CAMLP4)! + +export THREADS +export VMTHREADS +export ANNOTATE +export USE_CAMLP4 + +export INCDIRS +export LIBDIRS +export EXTLIBDIRS +export RESULTDEPS +export OCAML_DEFAULT_DIRS + +export LIBS +export CLIBS +export CFRAMEWORKS + +export OCAMLFLAGS +export OCAMLNCFLAGS +export OCAMLBCFLAGS + +export OCAMLLDFLAGS +export OCAMLNLDFLAGS +export OCAMLBLDFLAGS + +export OCAMLMKLIB_FLAGS + +ifndef OCAMLCPFLAGS + OCAMLCPFLAGS := a +endif +export OCAMLCPFLAGS + +ifndef DOC_DIR + DOC_DIR := doc +endif +export DOC_DIR + +export PPFLAGS + +export LFLAGS +export YFLAGS +export IDLFLAGS + +export OCAMLDOCFLAGS + +export OCAMLFIND_INSTFLAGS + +export DVIPSFLAGS + +export STATIC + +# Add a list of optional trash files that should be deleted by "make clean" +export TRASH + +ECHO := echo + +ifdef REALLY_QUIET + export REALLY_QUIET + ECHO := true + LFLAGS := $(LFLAGS) -q + YFLAGS := $(YFLAGS) -q +endif + +#################### variables depending on your OCaml-installation + +SYSTEM := $(shell ocamlc -config 2>/dev/null | grep system | sed 's/system: //') + # This may be + # - mingw + # - mingw64 + # - win32 + # - cygwin + # - some other string means Unix + # - empty means ocamlc does not support -config + +ifeq ($(SYSTEM),$(filter $(SYSTEM),mingw mingw64)) + MINGW=1 +endif +ifeq ($(SYSTEM),win32) + MSVC=1 +endif + +ifdef MINGW + export MINGW + WIN32 := 1 + # The default value 'cc' makes 'ocamlc -cc "cc"' raises the error 'The + # NTVDM CPU has encountered an illegal instruction'. + ifndef CC + MNO_CYGWIN := $(shell gcc -Wextra -v --help 2>/dev/null | grep -q '\-mno-cygwin'; echo $$?) + CC := gcc + else + MNO_CYGWIN := $(shell $$CC -Wextra -v --help 2>/dev/null | grep -q '\-mno-cygwin'; echo $$?) + endif + # We are compiling with cygwin tools: + ifeq ($(MNO_CYGWIN),0) + CFLAGS_WIN32 := -mno-cygwin + endif + # The OCaml C header files use this flag: + CFLAGS += -D__MINGW32__ +endif +ifdef MSVC + export MSVC + WIN32 := 1 + ifndef STATIC + CPPFLAGS_WIN32 := -DCAML_DLL + endif + CFLAGS_WIN32 += -nologo + EXT_OBJ := obj + EXT_LIB := lib + ifeq ($(CC),gcc) + # work around GNU Make default value + ifdef THREADS + CC := cl -MT + else + CC := cl + endif + endif + ifeq ($(CXX),g++) + # work around GNU Make default value + CXX := $(CC) + endif + CFLAG_O := -Fo +endif +ifdef WIN32 + EXT_CXX := cpp + EXE := .exe +endif + +ifndef EXT_OBJ + EXT_OBJ := o +endif +ifndef EXT_LIB + EXT_LIB := a +endif +ifndef EXT_CXX + EXT_CXX := cc +endif +ifndef EXE + EXE := # empty +endif +ifndef CFLAG_O + CFLAG_O := -o # do not delete this comment (preserves trailing whitespace)! +endif + +export CC +export CXX +export CFLAGS +export CXXFLAGS +export LDFLAGS +export CPPFLAGS + +ifndef RPATH_FLAG + ifdef ELF_RPATH_FLAG + RPATH_FLAG := $(ELF_RPATH_FLAG) + else + RPATH_FLAG := -R + endif +endif +export RPATH_FLAG + +ifndef MSVC +ifndef PIC_CFLAGS + PIC_CFLAGS := -fPIC +endif +ifndef PIC_CPPFLAGS + PIC_CPPFLAGS := -DPIC +endif +endif + +export PIC_CFLAGS +export PIC_CPPFLAGS + +BCRESULT := $(addsuffix $(BCSUFFIX), $(RESULT)) +NCRESULT := $(addsuffix $(NCSUFFIX), $(RESULT)) +TOPRESULT := $(addsuffix $(TOPSUFFIX), $(RESULT)) + +ifndef OCAMLFIND + OCAMLFIND := ocamlfind +endif +export OCAMLFIND + +ifndef OCAML + OCAML := ocaml +endif +export OCAML + +ifndef OCAMLC + OCAMLC := ocamlc +endif +export OCAMLC + +ifndef OCAMLOPT + OCAMLOPT := ocamlopt +endif +export OCAMLOPT + +ifndef OCAMLMKTOP + OCAMLMKTOP := ocamlmktop +endif +export OCAMLMKTOP + +ifndef OCAMLCP + OCAMLCP := ocamlcp +endif +export OCAMLCP + +ifndef OCAMLDEP + OCAMLDEP := ocamldep +endif +export OCAMLDEP + +ifndef OCAMLLEX + OCAMLLEX := ocamllex +endif +export OCAMLLEX + +ifndef OCAMLYACC + OCAMLYACC := ocamlyacc +endif +export OCAMLYACC + +ifndef OCAMLMKLIB + OCAMLMKLIB := ocamlmklib +endif +export OCAMLMKLIB + +ifndef OCAML_GLADECC + OCAML_GLADECC := lablgladecc2 +endif +export OCAML_GLADECC + +ifndef OCAML_GLADECC_FLAGS + OCAML_GLADECC_FLAGS := +endif +export OCAML_GLADECC_FLAGS + +ifndef CAMELEON_REPORT + CAMELEON_REPORT := report +endif +export CAMELEON_REPORT + +ifndef CAMELEON_REPORT_FLAGS + CAMELEON_REPORT_FLAGS := +endif +export CAMELEON_REPORT_FLAGS + +ifndef CAMELEON_ZOGGY + CAMELEON_ZOGGY := camlp4o pa_zog.cma pr_o.cmo +endif +export CAMELEON_ZOGGY + +ifndef CAMELEON_ZOGGY_FLAGS + CAMELEON_ZOGGY_FLAGS := +endif +export CAMELEON_ZOGGY_FLAGS + +ifndef OXRIDL + OXRIDL := oxridl +endif +export OXRIDL + +ifndef CAMLIDL + CAMLIDL := camlidl +endif +export CAMLIDL + +ifndef CAMLIDLDLL + CAMLIDLDLL := camlidldll +endif +export CAMLIDLDLL + +ifndef NOIDLHEADER + MAYBE_IDL_HEADER := -header +endif +export NOIDLHEADER + +export NO_CUSTOM + +ifndef CAMLP4 + CAMLP4 := camlp4 +endif +export CAMLP4 + +ifndef REAL_OCAMLFIND + ifdef PACKS + ifndef CREATE_LIB + ifdef THREADS + PACKS += threads + endif + endif + empty := + space := $(empty) $(empty) + comma := , + ifdef PREDS + PRE_OCAML_FIND_PREDICATES := $(subst $(space),$(comma),$(PREDS)) + PRE_OCAML_FIND_PACKAGES := $(subst $(space),$(comma),$(PACKS)) + OCAML_FIND_PREDICATES := -predicates $(PRE_OCAML_FIND_PREDICATES) + # OCAML_DEP_PREDICATES := -syntax $(PRE_OCAML_FIND_PREDICATES) + OCAML_FIND_PACKAGES := $(OCAML_FIND_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES) + OCAML_DEP_PACKAGES := $(OCAML_DEP_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES) + else + OCAML_FIND_PACKAGES := -package $(subst $(space),$(comma),$(PACKS)) + OCAML_DEP_PACKAGES := + endif + OCAML_FIND_LINKPKG := -linkpkg + REAL_OCAMLFIND := $(OCAMLFIND) + endif +endif + +export OCAML_FIND_PACKAGES +export OCAML_DEP_PACKAGES +export OCAML_FIND_LINKPKG +export REAL_OCAMLFIND + +ifndef OCAMLDOC + OCAMLDOC := ocamldoc +endif +export OCAMLDOC + +ifndef LATEX + LATEX := latex +endif +export LATEX + +ifndef DVIPS + DVIPS := dvips +endif +export DVIPS + +ifndef PS2PDF + PS2PDF := ps2pdf +endif +export PS2PDF + +ifndef OCAMLMAKEFILE + OCAMLMAKEFILE := OCamlMakefile +endif +export OCAMLMAKEFILE + +ifndef OCAMLLIBPATH + OCAMLLIBPATH := \ + $(shell $(OCAMLC) 2>/dev/null -where || echo /usr/local/lib/ocaml) +endif +export OCAMLLIBPATH + +ifndef OCAML_LIB_INSTALL + OCAML_LIB_INSTALL := $(OCAMLLIBPATH)/contrib +endif +export OCAML_LIB_INSTALL + +########################################################################### + +#################### change following sections only if +#################### you know what you are doing! + +# delete target files when a build command fails +.PHONY: .DELETE_ON_ERROR +.DELETE_ON_ERROR: + +# for pedants using "--warn-undefined-variables" +export MAYBE_IDL +export REAL_RESULT +export CAMLIDLFLAGS +export THREAD_FLAG +export RES_CLIB +export MAKEDLL +export ANNOT_FLAG +export C_OXRIDL +export SUBPROJS +export CFLAGS_WIN32 +export CPPFLAGS_WIN32 + +INCFLAGS := + +SHELL := /bin/sh + +MLDEPDIR := ._d +BCDIDIR := ._bcdi +NCDIDIR := ._ncdi + +FILTER_EXTNS := %.mli %.ml %.mll %.mly %.idl %.oxridl %.c %.m %.$(EXT_CXX) %.rep %.zog %.glade + +FILTERED := $(filter $(FILTER_EXTNS), $(SOURCES)) +SOURCE_DIRS := $(filter-out ./, $(sort $(dir $(FILTERED)))) + +FILTERED_REP := $(filter %.rep, $(FILTERED)) +DEP_REP := $(FILTERED_REP:%.rep=$(MLDEPDIR)/%.d) +AUTO_REP := $(FILTERED_REP:.rep=.ml) + +FILTERED_ZOG := $(filter %.zog, $(FILTERED)) +DEP_ZOG := $(FILTERED_ZOG:%.zog=$(MLDEPDIR)/%.d) +AUTO_ZOG := $(FILTERED_ZOG:.zog=.ml) + +FILTERED_GLADE := $(filter %.glade, $(FILTERED)) +DEP_GLADE := $(FILTERED_GLADE:%.glade=$(MLDEPDIR)/%.d) +AUTO_GLADE := $(FILTERED_GLADE:.glade=.ml) + +FILTERED_ML := $(filter %.ml, $(FILTERED)) +DEP_ML := $(FILTERED_ML:%.ml=$(MLDEPDIR)/%.d) + +FILTERED_MLI := $(filter %.mli, $(FILTERED)) +DEP_MLI := $(FILTERED_MLI:.mli=.di) + +FILTERED_MLL := $(filter %.mll, $(FILTERED)) +DEP_MLL := $(FILTERED_MLL:%.mll=$(MLDEPDIR)/%.d) +AUTO_MLL := $(FILTERED_MLL:.mll=.ml) + +FILTERED_MLY := $(filter %.mly, $(FILTERED)) +DEP_MLY := $(FILTERED_MLY:%.mly=$(MLDEPDIR)/%.d) $(FILTERED_MLY:.mly=.di) +AUTO_MLY := $(FILTERED_MLY:.mly=.mli) $(FILTERED_MLY:.mly=.ml) + +FILTERED_IDL := $(filter %.idl, $(FILTERED)) +DEP_IDL := $(FILTERED_IDL:%.idl=$(MLDEPDIR)/%.d) $(FILTERED_IDL:.idl=.di) +C_IDL := $(FILTERED_IDL:%.idl=%_stubs.c) +ifndef NOIDLHEADER + C_IDL += $(FILTERED_IDL:.idl=.h) +endif +OBJ_C_IDL := $(FILTERED_IDL:%.idl=%_stubs.$(EXT_OBJ)) +AUTO_IDL := $(FILTERED_IDL:.idl=.mli) $(FILTERED_IDL:.idl=.ml) $(C_IDL) + +FILTERED_OXRIDL := $(filter %.oxridl, $(FILTERED)) +DEP_OXRIDL := $(FILTERED_OXRIDL:%.oxridl=$(MLDEPDIR)/%.d) $(FILTERED_OXRIDL:.oxridl=.di) +AUTO_OXRIDL := $(FILTERED_OXRIDL:.oxridl=.mli) $(FILTERED_OXRIDL:.oxridl=.ml) $(C_OXRIDL) + +FILTERED_C_CXX := $(filter %.c %.m %.$(EXT_CXX), $(FILTERED)) +OBJ_C_CXX := $(FILTERED_C_CXX:.c=.$(EXT_OBJ)) +OBJ_C_CXX := $(OBJ_C_CXX:.m=.$(EXT_OBJ)) +OBJ_C_CXX := $(OBJ_C_CXX:.$(EXT_CXX)=.$(EXT_OBJ)) + +PRE_TARGETS += $(AUTO_MLL) $(AUTO_MLY) $(AUTO_IDL) $(AUTO_OXRIDL) $(AUTO_ZOG) $(AUTO_REP) $(AUTO_GLADE) + +ALL_DEPS := $(DEP_ML) $(DEP_MLI) $(DEP_MLL) $(DEP_MLY) $(DEP_IDL) $(DEP_OXRIDL) $(DEP_ZOG) $(DEP_REP) $(DEP_GLADE) + +MLDEPS := $(filter %.d, $(ALL_DEPS)) +MLIDEPS := $(filter %.di, $(ALL_DEPS)) +BCDEPIS := $(MLIDEPS:%.di=$(BCDIDIR)/%.di) +NCDEPIS := $(MLIDEPS:%.di=$(NCDIDIR)/%.di) + +ALLML := $(filter %.mli %.ml %.mll %.mly %.idl %.oxridl %.rep %.zog %.glade, $(FILTERED)) + +IMPLO_INTF := $(ALLML:%.mli=%.mli.__) +IMPLO_INTF := $(foreach file, $(IMPLO_INTF), \ + $(basename $(file)).cmi $(basename $(file)).cmo) +IMPLO_INTF := $(filter-out %.mli.cmo, $(IMPLO_INTF)) +IMPLO_INTF := $(IMPLO_INTF:%.mli.cmi=%.cmi) + +IMPLX_INTF := $(IMPLO_INTF:.cmo=.cmx) + +INTF := $(filter %.cmi, $(IMPLO_INTF)) +IMPL_CMO := $(filter %.cmo, $(IMPLO_INTF)) +IMPL_CMX := $(IMPL_CMO:.cmo=.cmx) +IMPL_ASM := $(IMPL_CMO:.cmo=.asm) +IMPL_S := $(IMPL_CMO:.cmo=.s) + +OBJ_LINK := $(OBJ_C_IDL) $(OBJ_C_CXX) +OBJ_FILES := $(IMPL_CMO:.cmo=.$(EXT_OBJ)) $(OBJ_LINK) + +EXECS := $(addsuffix $(EXE), \ + $(sort $(TOPRESULT) $(BCRESULT) $(NCRESULT))) +ifdef WIN32 + EXECS += $(BCRESULT).dll $(NCRESULT).dll +endif + +CLIB_BASE := $(RESULT)$(RES_CLIB_SUF) +ifneq ($(strip $(OBJ_LINK)),) + RES_CLIB := lib$(CLIB_BASE).$(EXT_LIB) +endif + +ifdef WIN32 +DLLSONAME := dll$(CLIB_BASE).dll +else +DLLSONAME := dll$(CLIB_BASE).so +endif + +NONEXECS := $(INTF) $(IMPL_CMO) $(IMPL_CMX) $(IMPL_ASM) $(IMPL_S) \ + $(OBJ_FILES) $(PRE_TARGETS) $(BCRESULT).cma $(NCRESULT).cmxa \ + $(NCRESULT).$(EXT_LIB) $(BCRESULT).cmi $(BCRESULT).cmo \ + $(NCRESULT).cmi $(NCRESULT).cmx $(NCRESULT).$(EXT_OBJ) \ + $(RES_CLIB) $(IMPL_CMO:.cmo=.annot) \ + $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(LIB_PACK_NAME).cmx \ + $(LIB_PACK_NAME).$(EXT_OBJ) + +ifndef STATIC + NONEXECS += $(DLLSONAME) +endif + +ifndef LIBINSTALL_FILES + LIBINSTALL_FILES := $(RESULT).mli $(RESULT).cmi $(RESULT).cma \ + $(RESULT).cmxa $(RESULT).$(EXT_LIB) $(RES_CLIB) + ifndef STATIC + ifneq ($(strip $(OBJ_LINK)),) + LIBINSTALL_FILES += $(DLLSONAME) + endif + endif +endif + +export LIBINSTALL_FILES + +ifdef WIN32 + # some extra stuff is created while linking DLLs + NONEXECS += $(BCRESULT).$(EXT_LIB) $(BCRESULT).exp $(NCRESULT).exp $(CLIB_BASE).exp $(CLIB_BASE).lib +endif + +TARGETS := $(EXECS) $(NONEXECS) + +# If there are IDL-files +ifneq ($(strip $(FILTERED_IDL)),) + MAYBE_IDL := -cclib -lcamlidl +endif + +ifdef USE_CAMLP4 + CAMLP4PATH := \ + $(shell $(CAMLP4) -where 2>/dev/null || echo /usr/local/lib/camlp4) + INCFLAGS := -I $(CAMLP4PATH) + CINCFLAGS := -I$(CAMLP4PATH) +endif + +INCFLAGS := $(INCFLAGS) $(INCDIRS:%=-I %) $(SOURCE_DIRS:%=-I %) $(OCAML_DEFAULT_DIRS:%=-I %) +CINCFLAGS += $(SOURCE_DIRS:%=-I%) $(INCDIRS:%=-I%) $(OCAML_DEFAULT_DIRS:%=-I%) + +ifndef MSVC + CLIBFLAGS += $(SOURCE_DIRS:%=-L%) $(LIBDIRS:%=-L%) \ + $(EXTLIBDIRS:%=-L%) $(OCAML_DEFAULT_DIRS:%=-L%) + + ifeq ($(ELF_RPATH), yes) + CLIBFLAGS += $(EXTLIBDIRS:%=-Wl,$(RPATH_FLAG)%) + endif +endif + +ifndef PROFILING + INTF_OCAMLC := $(OCAMLC) +else + ifndef THREADS + INTF_OCAMLC := $(OCAMLCP) -p $(OCAMLCPFLAGS) + else + # OCaml does not support profiling byte code + # with threads (yet), therefore we force an error. + ifndef REAL_OCAMLC + $(error Profiling of multithreaded byte code not yet supported by OCaml) + endif + INTF_OCAMLC := $(OCAMLC) + endif +endif + +ifndef MSVC + COMMON_LDFLAGS := $(LDFLAGS:%=-ccopt %) $(SOURCE_DIRS:%=-ccopt -L%) \ + $(LIBDIRS:%=-ccopt -L%) $(EXTLIBDIRS:%=-ccopt -L%) \ + $(EXTLIBDIRS:%=-ccopt -Wl $(OCAML_DEFAULT_DIRS:%=-ccopt -L%)) + + ifeq ($(ELF_RPATH),yes) + COMMON_LDFLAGS += $(EXTLIBDIRS:%=-ccopt -Wl,$(RPATH_FLAG)%) + endif +else + COMMON_LDFLAGS := -ccopt "/link -NODEFAULTLIB:LIBC $(LDFLAGS:%=%) $(SOURCE_DIRS:%=-LIBPATH:%) \ + $(LIBDIRS:%=-LIBPATH:%) $(EXTLIBDIRS:%=-LIBPATH:%) \ + $(OCAML_DEFAULT_DIRS:%=-LIBPATH:%) " +endif + +CLIBS_OPTS := $(CLIBS:%=-cclib -l%) $(CFRAMEWORKS:%=-cclib '-framework %') +ifdef MSVC + ifndef STATIC + # MSVC libraries do not have 'lib' prefix + CLIBS_OPTS := $(CLIBS:%=-cclib %.lib) + endif +endif + +ifneq ($(strip $(OBJ_LINK)),) + ifdef CREATE_LIB + OBJS_LIBS := -cclib -l$(CLIB_BASE) $(CLIBS_OPTS) $(MAYBE_IDL) + else + OBJS_LIBS := $(OBJ_LINK) $(CLIBS_OPTS) $(MAYBE_IDL) + endif +else + OBJS_LIBS := $(CLIBS_OPTS) $(MAYBE_IDL) +endif + +ifdef LIB_PACK_NAME + FOR_PACK_NAME := $(shell echo $(LIB_PACK_NAME) | awk '{print toupper(substr($$0,1,1))substr($$0,2)}') +endif + +# If we have to make byte-code +ifndef REAL_OCAMLC + BYTE_OCAML := y + + # EXTRADEPS is added dependencies we have to insert for all + # executable files we generate. Ideally it should be all of the + # libraries we use, but it's hard to find the ones that get searched on + # the path since I don't know the paths built into the compiler, so + # just include the ones with slashes in their names. + EXTRADEPS := $(addsuffix .cma,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i)))) + + + ifndef LIB_PACK_NAME + SPECIAL_OCAMLFLAGS := $(OCAMLBCFLAGS) + else + SPECIAL_OCAMLFLAGS := -for-pack $(FOR_PACK_NAME) $(OCAMLBCFLAGS) + endif + + REAL_OCAMLC := $(INTF_OCAMLC) + + REAL_IMPL := $(IMPL_CMO) + REAL_IMPL_INTF := $(IMPLO_INTF) + IMPL_SUF := .cmo + + DEPFLAGS := + MAKE_DEPS := $(MLDEPS) $(BCDEPIS) + + ifdef CREATE_LIB + override CFLAGS := $(PIC_CFLAGS) $(CFLAGS) + override CPPFLAGS := $(PIC_CPPFLAGS) $(CPPFLAGS) + ifndef STATIC + ifneq ($(strip $(OBJ_LINK)),) + MAKEDLL := $(DLLSONAME) + ALL_LDFLAGS := -dllib $(DLLSONAME) + endif + endif + endif + + ifndef NO_CUSTOM + ifneq "$(strip $(OBJ_LINK) $(THREADS) $(MAYBE_IDL) $(CLIBS) $(CFRAMEWORKS))" "" + ALL_LDFLAGS += -custom + endif + endif + + ALL_LDFLAGS += $(INCFLAGS) $(OCAMLLDFLAGS) $(OCAMLBLDFLAGS) \ + $(COMMON_LDFLAGS) $(LIBS:%=%.cma) + CAMLIDLDLLFLAGS := + + ifdef THREADS + ifdef VMTHREADS + THREAD_FLAG := -vmthread + else + THREAD_FLAG := -thread + endif + ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS) + ifndef CREATE_LIB + ifndef REAL_OCAMLFIND + ALL_LDFLAGS := unix.cma threads.cma $(ALL_LDFLAGS) + endif + endif + endif + +# we have to make native-code +else + EXTRADEPS := $(addsuffix .cmxa,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i)))) + ifndef PROFILING + SPECIAL_OCAMLFLAGS := $(OCAMLNCFLAGS) + PLDFLAGS := + else + SPECIAL_OCAMLFLAGS := -p $(OCAMLNCFLAGS) + PLDFLAGS := -p + endif + + ifndef LIB_PACK_NAME + SPECIAL_OCAMLFLAGS := $(OCAMLNCFLAGS) + else + SPECIAL_OCAMLFLAGS := -for-pack $(FOR_PACK_NAME) $(OCAMLNCFLAGS) + endif + REAL_IMPL := $(IMPL_CMX) + REAL_IMPL_INTF := $(IMPLX_INTF) + IMPL_SUF := .cmx + + override CPPFLAGS := -DNATIVE_CODE $(CPPFLAGS) + + DEPFLAGS := -native + MAKE_DEPS := $(MLDEPS) $(NCDEPIS) + + ALL_LDFLAGS := $(PLDFLAGS) $(INCFLAGS) $(OCAMLLDFLAGS) \ + $(OCAMLNLDFLAGS) $(COMMON_LDFLAGS) + CAMLIDLDLLFLAGS := -opt + + ifndef CREATE_LIB + ALL_LDFLAGS += $(LIBS:%=%.cmxa) + else + override CFLAGS := $(PIC_CFLAGS) $(CFLAGS) + override CPPFLAGS := $(PIC_CPPFLAGS) $(CPPFLAGS) + endif + + ifdef THREADS + THREAD_FLAG := -thread + ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS) + ifndef CREATE_LIB + ifndef REAL_OCAMLFIND + ALL_LDFLAGS := unix.cmxa threads.cmxa $(ALL_LDFLAGS) + endif + endif + endif +endif + +export MAKE_DEPS + +ifdef ANNOTATE + ANNOT_FLAG := -annot +else +endif + +ALL_OCAMLCFLAGS := $(THREAD_FLAG) $(ANNOT_FLAG) $(OCAMLFLAGS) \ + $(INCFLAGS) $(SPECIAL_OCAMLFLAGS) + +ifdef make_deps + -include $(MAKE_DEPS) + PRE_TARGETS := +endif + +########################################################################### +# USER RULES + +# Call "OCamlMakefile QUIET=" to get rid of all of the @'s. +QUIET=@ + +# generates byte-code (default) +byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes +bc: byte-code + +byte-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(BCRESULT)" make_deps=yes +bcnl: byte-code-nolink + +top: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(TOPRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes + +# generates native-code + +native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +nc: native-code + +native-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +ncnl: native-code-nolink + +# generates byte-code libraries +byte-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" \ + CREATE_LIB=yes \ + make_deps=yes +bcl: byte-code-library + +# generates native-code libraries +native-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).cmxa \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + CREATE_LIB=yes \ + make_deps=yes +ncl: native-code-library + +ifdef WIN32 +# generates byte-code dll +byte-code-dll: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).dll \ + REAL_RESULT="$(BCRESULT)" \ + make_deps=yes +bcd: byte-code-dll + +# generates native-code dll +native-code-dll: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).dll \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + make_deps=yes +ncd: native-code-dll +endif + +# generates byte-code with debugging information +debug-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" make_deps=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dc: debug-code + +debug-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(BCRESULT)" make_deps=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dcnl: debug-code-nolink + +# generates byte-code with debugging information (native code) +debug-native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ + REAL_RESULT="$(NCRESULT)" make_deps=yes \ + REAL_OCAMLC="$(OCAMLOPT)" \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dnc: debug-native-code + +debug-native-code-nolink: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ + REAL_RESULT="$(NCRESULT)" make_deps=yes \ + REAL_OCAMLC="$(OCAMLOPT)" \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dncnl: debug-native-code-nolink + +# generates byte-code libraries with debugging information +debug-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" make_deps=yes \ + CREATE_LIB=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dcl: debug-code-library + +# generates byte-code libraries with debugging information (native code) +debug-native-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).cmxa \ + REAL_RESULT="$(NCRESULT)" make_deps=yes \ + REAL_OCAMLC="$(OCAMLOPT)" \ + CREATE_LIB=yes \ + OCAMLFLAGS="-g $(OCAMLFLAGS)" \ + OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" +dncl: debug-native-code-library + +# generates byte-code for profiling +profiling-byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ + REAL_RESULT="$(BCRESULT)" PROFILING="y" \ + make_deps=yes +pbc: profiling-byte-code + +# generates native-code + +profiling-native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + PROFILING="y" \ + make_deps=yes +pnc: profiling-native-code + +# generates byte-code libraries +profiling-byte-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(BCRESULT).cma \ + REAL_RESULT="$(BCRESULT)" PROFILING="y" \ + CREATE_LIB=yes \ + make_deps=yes +pbcl: profiling-byte-code-library + +# generates native-code libraries +profiling-native-code-library: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(RES_CLIB) $(NCRESULT).cmxa \ + REAL_RESULT="$(NCRESULT)" PROFILING="y" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + CREATE_LIB=yes \ + make_deps=yes +pncl: profiling-native-code-library + +# packs byte-code objects +pack-byte-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT).cmo \ + REAL_RESULT="$(BCRESULT)" \ + PACK_LIB=yes make_deps=yes +pabc: pack-byte-code + +# packs native-code objects +pack-native-code: $(PRE_TARGETS) + $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ + $(NCRESULT).cmx $(NCRESULT).$(EXT_OBJ) \ + REAL_RESULT="$(NCRESULT)" \ + REAL_OCAMLC="$(OCAMLOPT)" \ + PACK_LIB=yes make_deps=yes +panc: pack-native-code + +# generates HTML-documentation +htdoc: $(DOC_DIR)/$(RESULT)/html/index.html + +# generates Latex-documentation +ladoc: $(DOC_DIR)/$(RESULT)/latex/doc.tex + +# generates PostScript-documentation +psdoc: $(DOC_DIR)/$(RESULT)/latex/doc.ps + +# generates PDF-documentation +pdfdoc: $(DOC_DIR)/$(RESULT)/latex/doc.pdf + +# generates all supported forms of documentation +doc: htdoc ladoc psdoc pdfdoc + +########################################################################### +# LOW LEVEL RULES + +$(REAL_RESULT): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(REAL_OCAMLC) \ + $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \ + $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@$(EXE) \ + $(REAL_IMPL) + +nolink: $(REAL_IMPL_INTF) $(OBJ_LINK) + +ifdef WIN32 +$(REAL_RESULT).dll: $(REAL_IMPL_INTF) $(OBJ_LINK) + $(CAMLIDLDLL) $(CAMLIDLDLLFLAGS) $(OBJ_LINK) $(CLIBS) \ + -o $@ $(REAL_IMPL) +endif + +%$(TOPSUFFIX): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) + $(REAL_OCAMLFIND) $(OCAMLMKTOP) \ + $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \ + $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@$(EXE) \ + $(REAL_IMPL) + +.SUFFIXES: .mli .ml .cmi .cmo .cmx .cma .cmxa .$(EXT_OBJ) \ + .mly .di .d .$(EXT_LIB) .idl %.oxridl .c .m .$(EXT_CXX) .h .so \ + .rep .zog .glade + +ifndef STATIC +ifdef MINGW +# From OCaml 3.11.0, ocamlmklib is available on windows +OCAMLMLIB_EXISTS = $(shell which $(OCAMLMKLIB)) +ifeq ($(strip $(OCAMLMLIB_EXISTS)),) +$(DLLSONAME): $(OBJ_LINK) + $(CC) $(CFLAGS) $(CFLAGS_WIN32) $(OBJ_LINK) -shared -o $@ \ + $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/lib%.a))) \ + '$(OCAMLLIBPATH)/ocamlrun.a' \ + -Wl,--whole-archive \ + -Wl,--export-all-symbols \ + -Wl,--allow-multiple-definition \ + -Wl,--enable-auto-import +else +$(DLLSONAME): $(OBJ_LINK) + $(OCAMLMKLIB) $(INCFLAGS) $(CLIBFLAGS) \ + -o $(CLIB_BASE) $(OBJ_LINK) $(CLIBS:%=-l%) \ + $(CFRAMEWORKS:%=-framework %) \ + $(OCAMLMKLIB_FLAGS) +endif +else +ifdef MSVC +$(DLLSONAME): $(OBJ_LINK) + link /NOLOGO /DLL /OUT:$@ $(OBJ_LINK) \ + $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/%.lib))) \ + '$(OCAMLLIBPATH)/ocamlrun.lib' + +else +$(DLLSONAME): $(OBJ_LINK) + $(OCAMLMKLIB) $(INCFLAGS) $(CLIBFLAGS) \ + -o $(CLIB_BASE) $(OBJ_LINK) $(CLIBS:%=-l%) $(CFRAMEWORKS:%=-framework %) \ + $(OCAMLMKLIB_FLAGS) +endif +endif +endif + +ifndef LIB_PACK_NAME +$(RESULT).cma: $(REAL_IMPL_INTF) $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@ $(REAL_IMPL) + +$(RESULT).cmxa $(RESULT).$(EXT_LIB): $(REAL_IMPL_INTF) $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@ $(REAL_IMPL) +else +# Packing a bytecode library +LIB_PACK_NAME_MLI = $(wildcard $(LIB_PACK_NAME).mli) +ifeq ($(LIB_PACK_NAME_MLI),) +LIB_PACK_NAME_CMI = $(LIB_PACK_NAME).cmi +else +# $(LIB_PACK_NAME).mli exists, it likely depends on other compiled interfaces +LIB_PACK_NAME_CMI = +$(LIB_PACK_NAME).cmi: $(REAL_IMPL_INTF) +endif +ifdef BYTE_OCAML +$(LIB_PACK_NAME_CMI) $(LIB_PACK_NAME).cmo: $(REAL_IMPL_INTF) + $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack -o $(LIB_PACK_NAME).cmo $(OCAMLLDFLAGS) $(REAL_IMPL) +# Packing into a unit which can be transformed into a library +# Remember the .ml's must have been compiled with -for-pack $(LIB_PACK_NAME) +else +$(LIB_PACK_NAME_CMI) $(LIB_PACK_NAME).cmx: $(REAL_IMPL_INTF) + $(REAL_OCAMLFIND) $(OCAMLOPT) -pack -o $(LIB_PACK_NAME).cmx $(OCAMLLDFLAGS) $(REAL_IMPL) +endif + +$(RESULT).cma: $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(OBJS_LIBS) $(ALL_LDFLAGS) -o $@ $(LIB_PACK_NAME).cmo + +$(RESULT).cmxa $(RESULT).$(EXT_LIB): $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmx $(EXTRADEPS) $(RESULTDEPS) + $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(OBJS_LIBS) $(filter-out -custom, $(ALL_LDFLAGS)) -o $@ $(LIB_PACK_NAME).cmx +endif + +$(RES_CLIB): $(OBJ_LINK) +ifndef MSVC + ifneq ($(strip $(OBJ_LINK)),) + $(AR) rcs $@ $(OBJ_LINK) + endif +else + ifneq ($(strip $(OBJ_LINK)),) + lib -nologo -debugtype:cv -out:$(RES_CLIB) $(OBJ_LINK) + endif +endif + +%.cmi: %.mli $(EXTRADEPS) + $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(ECHO) $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c $(THREAD_FLAG) $(ANNOT_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c $(THREAD_FLAG) $(ANNOT_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + else \ + $(ECHO) $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c -pp \"$$pp $(PPFLAGS)\" $(THREAD_FLAG) $(ANNOT_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c -pp "$$pp $(PPFLAGS)" $(THREAD_FLAG) $(ANNOT_FLAG) \ + $(OCAMLFLAGS) $(INCFLAGS) $<; \ + fi + +%.cmi: %$(IMPL_SUF); + +%$(IMPL_SUF) %.$(EXT_OBJ): %.ml $(EXTRADEPS) + $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(ECHO) $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c $(ALL_OCAMLCFLAGS) $<; \ + $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c $(ALL_OCAMLCFLAGS) $<; \ + else \ + $(ECHO) $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c -pp \"$$pp $(PPFLAGS)\" $(ALL_OCAMLCFLAGS) $<; \ + $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ + -c -pp "$$pp $(PPFLAGS)" $(ALL_OCAMLCFLAGS) $<; \ + fi + +.PRECIOUS: %.ml +%.ml: %.mll + $(OCAMLLEX) $(LFLAGS) $< + +.PRECIOUS: %.ml %.mli +%.ml %.mli: %.mly + $(OCAMLYACC) $(YFLAGS) $< + $(QUIET)pp=`sed -n -e 's/.*(\*pp \([^*]*\) \*).*/\1/p;q' $<`; \ + if [ ! -z "$$pp" ]; then \ + mv $*.ml $*.ml.temporary; \ + echo "(*pp $$pp $(PPFLAGS)*)" > $*.ml; \ + cat $*.ml.temporary >> $*.ml; \ + rm $*.ml.temporary; \ + mv $*.mli $*.mli.temporary; \ + echo "(*pp $$pp $(PPFLAGS)*)" > $*.mli; \ + cat $*.mli.temporary >> $*.mli; \ + rm $*.mli.temporary; \ + fi + + +.PRECIOUS: %.ml +%.ml: %.rep + $(CAMELEON_REPORT) $(CAMELEON_REPORT_FLAGS) -gen $< + +.PRECIOUS: %.ml +%.ml: %.zog + $(CAMELEON_ZOGGY) $(CAMELEON_ZOGGY_FLAGS) -impl $< > $@ + +.PRECIOUS: %.ml +%.ml: %.glade + $(OCAML_GLADECC) $(OCAML_GLADECC_FLAGS) $< > $@ + +.PRECIOUS: %.ml %.mli +%.ml %.mli: %.oxridl + $(OXRIDL) $< + +.PRECIOUS: %.ml %.mli %_stubs.c %.h +%.ml %.mli %_stubs.c %.h: %.idl + $(CAMLIDL) $(MAYBE_IDL_HEADER) $(IDLFLAGS) \ + $(CAMLIDLFLAGS) $< + $(QUIET)if [ $(NOIDLHEADER) ]; then touch $*.h; fi + +%.$(EXT_OBJ): %.c + $(OCAMLC) -c -cc "$(CC)" -ccopt "$(CFLAGS) \ + $(CPPFLAGS) $(CPPFLAGS_WIN32) \ + $(CFLAGS_WIN32) $(CINCFLAGS) $(CFLAG_O)$@ " $< + +%.$(EXT_OBJ): %.m + $(CC) -c $(CFLAGS) $(CINCFLAGS) $(CPPFLAGS) \ + -I'$(OCAMLLIBPATH)' \ + $< $(CFLAG_O)$@ + +%.$(EXT_OBJ): %.$(EXT_CXX) + $(CXX) -c $(CXXFLAGS) $(CINCFLAGS) $(CPPFLAGS) \ + -I'$(OCAMLLIBPATH)' \ + $< $(CFLAG_O)$@ + +$(MLDEPDIR)/%.d: %.ml + $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi + $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(ECHO) $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ + $(INCFLAGS) $< \> $@; \ + $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ + $(INCFLAGS) $< > $@; \ + else \ + $(ECHO) $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ + -pp \"$$pp $(PPFLAGS)\" $(INCFLAGS) $< \> $@; \ + $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ + -pp "$$pp $(PPFLAGS)" $(INCFLAGS) $< > $@; \ + fi + +$(BCDIDIR)/%.di $(NCDIDIR)/%.di: %.mli + $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi + $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \ + if [ -z "$$pp" ]; then \ + $(ECHO) $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) $(INCFLAGS) $< \> $@; \ + $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) $(INCFLAGS) $< > $@; \ + else \ + $(ECHO) $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) \ + -pp \"$$pp $(PPFLAGS)\" $(INCFLAGS) $< \> $@; \ + $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) \ + -pp "$$pp $(PPFLAGS)" $(INCFLAGS) $< > $@; \ + fi + +$(DOC_DIR)/$(RESULT)/html: + mkdir -p $@ + +$(DOC_DIR)/$(RESULT)/html/index.html: $(DOC_DIR)/$(RESULT)/html $(DOC_FILES) + rm -rf $ "" + | Some x -> string_of_int x + +let string_of_precision = function + | None -> "" + | Some x -> "." ^ string_of_int x + +let string_of_sizespec = function + | None -> "" + | Some s-> + match s with + | Sll -> "ll" | Sl -> "l" | SL -> "L" | Sh -> "h" + | Shh -> "hh" | Sj -> "j" | Sz -> "z" | St -> "t" + +let string_of_convop = function + | Ca -> "a" | CA -> "A" | Cc -> "c" | Cd -> "d" + | Ce -> "e" | CE -> "E" | Cf -> "f" | Cg -> "g" + | CG -> "G" | Ci -> "i" | Cn -> "n" | Co -> "o" + | Cp -> "p" | Cs -> "s" | Cu -> "u" | Cx -> "x" + | CX -> "X" | CPercent -> "%" + +let string_of_format f = + "%" ^ + string_of_options f ^ + string_of_minwidth f.minwidth ^ + string_of_precision f.precision ^ + string_of_sizespec f.sizespec ^ + string_of_convop f.convop + +type section = + | Format of pformat + | String of string + +let sec_of_format cs = + (* 1. Read zero or more flags -, +, 0, #, *) + let cs = ref cs in + let lj, pz, sa, sp, va, fin = + ref false, ref false, ref false, ref false, ref false, ref false + in + while not !fin do + match !cs with + | '-'::_ -> set lj; cs := tl !cs + | '+'::_ -> set sa; cs := tl !cs + | '0'::_ -> set pz; cs := tl !cs + | ' '::_ -> set sp; cs := tl !cs + | '#'::_ -> set va; cs := tl !cs + | _ -> set fin + done; + (* 2. Read a possible minimum field width *) + let minwidth = + let fwchars, rest = cleavewhile isdigit !cs in + cs := rest; + if fwchars = [] then None else Some (int_of_string (implode fwchars)) + in + (* 3. Read an optional precision specification *) + let precision = + match !cs with + | '.'::more -> + cs := more; + let pchars, rest = cleavewhile isdigit !cs in + cs := rest; + if pchars = [] then None else Some (int_of_string (implode pchars)) + | _ -> None + in + (* 4. Read an optional size specification *) + let sizespec = + match !cs with + | 'l'::'l'::r -> cs := r; Some Sll + | 'l'::r -> cs := r; Some Sl + | 'L'::r -> cs := r; Some SL + | 'h'::'h'::r -> cs := r; Some Shh + | 'h'::r -> cs := r; Some Sh + | 'j'::r -> cs := r; Some Sj + | 'z'::r -> cs := r; Some Sz + | 't'::r -> cs := r; Some St + | _ -> None + in + (* 5. Read the conversion operation *) + let convop = + match !cs with + | 'a'::r -> cs := r; Ca + | 'A'::r -> cs := r; CA + | 'c'::r -> cs := r; Cc + | 'd'::r -> cs := r; Cd + | 'e'::r -> cs := r; Ce + | 'E'::r -> cs := r; CE + | 'f'::r -> cs := r; Cf + | 'g'::r -> cs := r; Cg + | 'G'::r -> cs := r; CG + | 'i'::r -> cs := r; Ci + | 'n'::r -> cs := r; Cn + | 'o'::r -> cs := r; Co + | 'p'::r -> cs := r; Cp + | 's'::r -> cs := r; Cs + | 'u'::r -> cs := r; Cu + | 'x'::r -> cs := r; Cx + | 'X'::r -> cs := r; CX + | '%'::r -> cs := r; CPercent + | _ -> raise (Failure "sec_of_format") + in + {leftjustify = !lj; + padzero = !pz; + signalways = !sa; + space = !sp; + variant = !va; + minwidth = minwidth; + precision = precision; + sizespec = sizespec; + convop = convop}, + !cs + +let rec sections_of_string_inner secs currstr = function + | '%'::m -> + let sec, rest = sec_of_format m in + if currstr = [] + then sections_of_string_inner (Format sec::secs) currstr rest + else sections_of_string_inner (Format sec::String (implode (rev currstr))::secs) [] rest + | x::xs -> + sections_of_string_inner secs (x::currstr) xs + | [] -> + if currstr = [] then rev secs else rev (String (implode (rev currstr))::secs) + +(* Take a format string, and split it into sections *) +let sections_of_string s = + try + sections_of_string_inner [] [] (explode s) + with + _ -> raise (PrintfFailure "Couldn't parse Printf format") + +(* Substitute an integer into a format, returning the empty string if the format is not suitable. *) + +(* For now, just 'd', 'u', 'i' *) +let sub_int i f = + (*i Printf.printf "Substituting format |%s|\n" (string_of_format f); i*) + let str = string_of_int i + in let padding = if f.padzero then '0' else ' ' in + if f.minwidth <> None && String.length str < unopt f.minwidth then + let padding = many padding (unopt f.minwidth - String.length str) in + if f.leftjustify then str ^ implode padding else implode padding ^ str + else + str + +(* Given a list of integers, substitute into integer formats *) +let rec substitute_inner donesections sections = function + | [] -> rev donesections @ sections + | i::is -> + match sections with + | [] -> rev donesections @ sections + | String s::more -> substitute_inner (String s::donesections) more (i::is) + | Format f::more -> substitute_inner (String (sub_int i f)::donesections) more is + +let substitute x = + try substitute_inner [] x with + _ -> raise (PrintfFailure "Failed to substitute integer") + +(* Flatten a set of sections to a string *) +let string_of_section = function + | String s -> s + | Format f -> string_of_format f + +let string_of_sections sections = + try fold_left ( ^ ) "" (map string_of_section sections) with + _ -> raise (PrintfFailure "Failed to build string from Printf sections") + +type encoding = + | Raw + | UTF8 + | Stripped + +(* Just strip everything which isn't 7 bit ASCII *) +let crude_de_unicode s = + implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s))) + +let encode_output enc s = + match enc with + | Raw -> s + | UTF8 -> Pdftext.utf8_of_pdfdocstring s + | Stripped -> crude_de_unicode s + +(* Get the number of pages in file. Doesn't need decryption. *) +let endpage_io i user_pw owner_pw = + let pdf = Pdfread.pdf_of_input_lazy user_pw owner_pw i in + Pdfpage.endpage pdf + +(* Raised when syntax is ok, but endpage is too low. Caught by validator. +Caught and reraised as normal failure by parse_pagespec. *) +exception PageSpecUnknownPage of int + +(* There would be no pages *) +exception PageSpecWouldBeNoPages + +(* Raised when syntax is wrong. Caught and reraised by parse_pagespec and +validator. *) +exception PageSpecBadSyntax + +(* Parsing range specifications *) +let rec splitat_commas toks = + match cleavewhile (neq (Pdfgenlex.LexName ",")) toks with + | [], [] -> [] + | [], _ -> raise PageSpecBadSyntax + | some, [] -> [some] + | _::_ as before, _::rest -> before::splitat_commas rest + +let rec mk_numbers endpage = function + | [Pdfgenlex.LexInt n] -> [n] + | [Pdfgenlex.LexName "end"] -> [endpage] + | [Pdfgenlex.LexInt n; Pdfgenlex.LexName "-"; Pdfgenlex.LexInt n'] -> + if n > n' then rev (ilist n' n) else ilist n n' + | [Pdfgenlex.LexName "end"; Pdfgenlex.LexName "-"; Pdfgenlex.LexInt n] -> + if n <= endpage + then rev (ilist n endpage) + else raise (PageSpecUnknownPage n) + | [Pdfgenlex.LexInt n; Pdfgenlex.LexName "-"; Pdfgenlex.LexName "end"] -> + if n <= endpage + then ilist n endpage + else raise (PageSpecUnknownPage n) + | [Pdfgenlex.LexName "end"; Pdfgenlex.LexName "-"; Pdfgenlex.LexName "end"] -> + [endpage] + | [Pdfgenlex.LexName "even"] -> + drop_odds (ilist 1 endpage) + | [Pdfgenlex.LexName "odd"] -> + really_drop_evens (ilist 1 endpage) + | [Pdfgenlex.LexName "all"] -> + ilist 1 endpage + | [Pdfgenlex.LexName "reverse"] -> + rev (ilist 1 endpage) + | toks -> + let ranges = splitat_commas toks in + if ranges = [toks] then raise PageSpecBadSyntax else + flatten (map (mk_numbers endpage) ranges) + +(* Space dashes and commas *) +let rec add_spaces = function + | [] -> [] + | ('-' | ',') as h::t -> ' '::h::' '::add_spaces t + | h::t -> h::add_spaces t + +let space_string s = + implode (add_spaces (explode s)) + +let fixup_negatives endpage = function + | Pdfgenlex.LexName s when String.length s > 1 && s.[0] = '~' -> + Pdfgenlex.LexInt (endpage + 1 + ~-(int_of_string (implode (tl (explode s))))) + | x -> x + +let parse_pagespec_inner endpage spec = + let spec = space_string spec in + if endpage < 1 then raise (Pdf.PDFError "This PDF file has no pages and is therefore malformed") else + let numbers = + try + match rev (explode spec) with + | ['n'; 'e'; 'v'; 'e'] -> keep even (ilist 1 endpage) + | ['d'; 'd'; 'o'] -> keep odd (ilist 1 endpage) + | 'n'::'e'::'v'::'e'::more -> keep even (mk_numbers endpage (Pdfgenlex.lex_string (implode (rev more)))) + | 'd'::'d'::'o'::more -> keep odd (mk_numbers endpage (Pdfgenlex.lex_string (implode (rev more)))) + | _ -> mk_numbers endpage (map (fixup_negatives endpage) (Pdfgenlex.lex_string spec)) + with + e -> raise PageSpecBadSyntax + in + if numbers = [] then raise PageSpecWouldBeNoPages else + iter + (fun n -> + if n <= 0 || n > endpage then raise (PageSpecUnknownPage n)) + numbers; + numbers + +let parse_pagespec pdf spec = + try parse_pagespec_inner (Pdfpage.endpage pdf) spec with + | PageSpecUnknownPage n -> + raise (Pdf.PDFError ("Page " ^ string_of_int n ^ " does not exist.")) + | PageSpecWouldBeNoPages -> + raise (Pdf.PDFError ("Page range specifies no pages")) + | e -> + raise + (Pdf.PDFError + ("Bad page specification " ^ spec ^ + ". Raw error was " ^ Printexc.to_string e ^ + ". Last page was " ^ string_of_int (Pdfpage.endpage pdf))) + +(* To validate a pagespec as being syntactically correct without the PDF in +question. This is nasty, since the parser above includes checking based on the +endpage of the PDF (which we don't have). Pass 100 as the endpage, doubling on +page range exception, bailng out above 500000. *) +let rec validate_pagespec_inner n spec = + try + ignore (parse_pagespec_inner n spec); true + with + | PageSpecUnknownPage _ -> if n < 500000 then validate_pagespec_inner (n * 2) spec else false + | PageSpecBadSyntax | _ -> false + +let validate_pagespec spec = + validate_pagespec_inner 100 spec + +(* Convert an integer list representing a set to a page specification, in order. *) +let string_of_pagespec pdf = function + | [] -> "" + | is -> + let iseven len is = + drop_odds (ilist 1 len) = is + in let isodd len is = + really_drop_evens (ilist 1 len) = is + in let isall len is = + ilist 1 len = is + in let is = sort compare is + in let len = Pdfpage.endpage pdf in + let rec mkranges prev = function + | [] -> map extremes (rev (map rev prev)) + | h::t -> + match prev with + | (ph::pht)::pt when h = ph + 1 -> mkranges ((h::ph::pht)::pt) t + | (_::_)::_ -> mkranges ([h]::prev) t + | []::_ -> assert false + | [] -> mkranges [[h]] t + in + if iseven len is && len > 3 then "even" else + if isodd len is && len > 2 then "odd" else + if isall len is then "all" else + let ranges = mkranges [] is in + let rangestrings = + map + (function (s, e) -> + if s = e + then string_of_int s + else string_of_int s ^ "-" ^ string_of_int e) + ranges + in + fold_left ( ^ ) "" (interleave "," rangestrings) + +let string_of_range r = + fold_left (fun a b -> a ^ " " ^ b) "" (map string_of_int r) + +let print_pdf_objs pdf = + Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict); + Printf.printf "Root: %i\n" pdf.Pdf.root; + begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some catalog -> + Printf.printf "Catalog: %s\n" (Pdfwrite.string_of_pdf catalog); + begin match Pdf.lookup_direct pdf "/Pages" catalog with + | Some pages -> + Printf.printf "Pages: %s\n" (Pdfwrite.string_of_pdf pages) + | None -> + flprint "no catalog\n" + end + | None -> + flprint "No catalog!\n" + end; + Pdf.objiter + (fun n obj -> + Printf.printf "%i 0 obj:\n\n" n; + Printf.printf "%s\n" (Pdfwrite.string_of_pdf obj)) + pdf + +(* Recompress anything which isn't compressed, unless it's metadata. *) +let recompress_stream pdf = function + (* If there is no compression, compress with /FlateDecode *) + | Pdf.Stream {contents = (dict, _)} as stream -> + begin match + Pdf.lookup_direct pdf "/Filter" dict, + Pdf.lookup_direct pdf "/Type" dict + with + | _, Some (Pdf.Name "/Metadata") -> () + | (None | Some (Pdf.Array [])), _ -> + Pdfcodec.encode_pdfstream pdf Pdfcodec.Flate stream + | _ -> () + end + | _ -> assert false + +let recompress_pdf pdf = + if not (Pdfcrypt.is_encrypted pdf) then + Pdf.iter_stream (recompress_stream pdf) pdf; + pdf + +let decompress_pdf pdf = + if not (Pdfcrypt.is_encrypted pdf) then + (Pdf.iter_stream (Pdfcodec.decode_pdfstream_until_unknown pdf) pdf); + pdf + +(* Return page label at pdf page num, or page number in arabic if no label *) +let pagelabel pdf num = + Pdfpagelabels.pagelabeltext_of_pagenumber + num + (Pdfpagelabels.complete (Pdfpagelabels.read pdf)) + + +let rec process_text text m = + match m with + | ([] : (string * string) list) -> Cpdfstrftime.strftime text + | (s, r)::t -> process_text (string_replace_all s r text) t + +let expand_date = function + | "now" -> Cpdfstrftime.strftime "D:%Y%m%d%t%M%S" + | x -> x + +let process_pages f pdf range = + let pages = Pdfpage.pages_of_pagetree pdf in + let pages' = + map2 + (fun n p -> if mem n range then f n p else p) + (ilist 1 (length pages)) + pages + in + Pdfpage.change_pages true pdf pages' + +let iter_pages f pdf range = + let pages = Pdfpage.pages_of_pagetree pdf in + iter2 + (fun n p -> if mem n range then f n p) + (ilist 1 (length pages)) + pages + +let map_pages f pdf range = + let pages = Pdfpage.pages_of_pagetree pdf in + option_map2 + (fun n p -> if mem n range then Some (f n p) else None) + (ilist 1 (length pages)) + pages + +(* Add stack operators to a content stream to ensure it is composeable. FIXME: +This will go away once we're using postpend_content or similar for twoup and do_stamp... *) +let protect_removeme pdf resources content = + let ops = Pdfops.parse_operators pdf resources content in + let qs = length (keep (eq Pdfops.Op_q) ops) + and bigqs = length (keep (eq Pdfops.Op_Q) ops) in + let deficit = if qs > bigqs then qs - bigqs else 0 in + if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n" deficit; + Pdfops.stream_of_ops ([Pdfops.Op_q] @ ops @ many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) + +exception SoftError of string + +let error s = raise (SoftError s) + +exception HardError of string + +(* Union two resource dictionaries from the same PDF. *) +let combine_pdf_resources pdf a b = + let a_entries = + match a with + | Pdf.Dictionary entries -> entries + | _ -> [] + in let b_entries = + match b with + | Pdf.Dictionary entries -> entries + | _ -> [] + in + let resource_keys = + ["/Font"; "/ExtGState"; "/ColorSpace"; "/Pattern"; + "/Shading"; "/XObject"; "/Properties"] + in + let combine_entries key = + let a_entries = + match Pdf.lookup_direct pdf key a with + | Some (Pdf.Dictionary d) -> d + | _ -> [] + in let b_entries = + match Pdf.lookup_direct pdf key b with + | Some (Pdf.Dictionary d) -> d + | _ -> [] + in + key, Pdf.Dictionary (a_entries @ b_entries) + in + let unknown_keys_a = + lose (fun (k, _) -> mem k resource_keys) a_entries + in let unknown_keys_b = + lose (fun (k, _) -> mem k resource_keys) b_entries + in let combined_known_entries = + map combine_entries resource_keys + in + Pdf.Dictionary (unknown_keys_a @ unknown_keys_b @ combined_known_entries) + +(* \section{Build PDF Presentations} *) +let change_page_effect t d horizontal inward direction effect_duration page = + let checkname = function + | "Split" | "Blinds" | "Box" | "Wipe" | "Dissolve" | "Glitter" -> () + | _ -> error "Unknown presentation type" + in + let rest = page.Pdfpage.rest in + let transdict = + match t with + | None -> + Pdf.Dictionary [] + | Some name -> + checkname name; + Pdf.Dictionary [("/S", Pdf.Name ("/" ^ name))] + in + let transdict = Pdf.add_dict_entry transdict "/D" (Pdf.Real effect_duration) in + let transdict = + match t with + | Some ("Split" | "Blinds") -> + Pdf.add_dict_entry + transdict "/Dm" (Pdf.Name (if horizontal then "/H" else "/V")) + | _ -> transdict + in + let transdict = + match t with + | Some ("Split" | "Box") -> + Pdf.add_dict_entry + transdict "/M" (Pdf.Name (if inward then "/I" else "/O")) + | _ -> transdict + in + let transdict = + match t with + | Some ("Wipe" | "Glitter") -> + Pdf.add_dict_entry transdict "/Di" (Pdf.Integer direction) + | _ -> transdict + in + let rest = Pdf.add_dict_entry rest "/Trans" transdict in + let rest = + match d with + | None -> Pdf.remove_dict_entry rest "/Dur" + | Some delay -> Pdf.add_dict_entry rest "/Dur" (Pdf.Real delay) + in + {page with Pdfpage.rest = rest} + +let presentation range t d h i dir effect_dur pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + let pages' = + map2 + (fun page num -> + if mem num range + then change_page_effect t d h i dir effect_dur page + else page) + pages + (indx pages) + in + Pdfpage.change_pages true pdf pages' + +(* \section{Attaching files} *) +let make_filestream file = + let data = + let ch = open_in_bin file in + let len = in_channel_length ch in + let stream = mkbytes len in + for x = 0 to bytes_size stream - 1 do + bset stream x (input_byte ch) + done; + close_in ch; + stream + in + Pdf.Stream + (ref (Pdf.Dictionary + [("/Length", Pdf.Integer (bytes_size data)); + ("/Type", Pdf.Name "/EmbeddedFile")], + Pdf.Got data)) + +let attach_file keepversion topage pdf file = + let filestream = make_filestream file in + let filestream_num = Pdf.addobj pdf filestream in + let filespec = + Pdf.Dictionary + [("/EF", Pdf.Dictionary ["/F", Pdf.Indirect filestream_num]); + ("/F", Pdf.String (Filename.basename file)); + ("/Type", Pdf.Name "/F")] + in + match topage with + | None -> + (* Look up /Names and /EmbeddedFiles and /Names. *) + let rootdict = Pdf.lookup_obj pdf pdf.Pdf.root in + let namedict = + match Pdf.lookup_direct pdf "/Names" rootdict with + | None -> Pdf.Dictionary [] + | Some namedict -> namedict + in + let embeddednamedict = + match Pdf.lookup_direct pdf "/EmbeddedFiles" namedict with + | None -> Pdf.Dictionary [] + | Some embeddednamedict -> embeddednamedict + in + let elts = + match Pdf.lookup_direct pdf "/Names" embeddednamedict with + | Some (Pdf.Array elts) -> elts + | _ -> [] + in + let names' = Pdf.Array (elts @ [Pdf.String (Filename.basename file); filespec]) in + let embeddednamedict' = Pdf.add_dict_entry embeddednamedict "/Names" names' in + let namedict' = Pdf.add_dict_entry namedict "/EmbeddedFiles" embeddednamedict' in + let rootdict' = Pdf.add_dict_entry rootdict "/Names" namedict' in + let rootnum = Pdf.addobj pdf rootdict' in + {pdf with + Pdf.minor = if keepversion then pdf.Pdf.minor else max pdf.Pdf.minor 4; + Pdf.root = rootnum; + Pdf.trailerdict = + Pdf.add_dict_entry + pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)} + | Some pagenumber -> + let pages = Pdfpage.pages_of_pagetree pdf in + if pagenumber < 0 || pagenumber > length pages then error "attach_file: Page not found" else + let page = select pagenumber pages in + let annots = + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> annots + | _ -> [] + in + let rect = + let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in + Pdf.Array [Pdf.Real 18.; Pdf.Real (maxy -. 45.); Pdf.Real 45.; Pdf.Real (maxy -. 18.)] + in + let annot = + Pdf.Dictionary + [("/FS", filespec); + ("/Subtype", Pdf.Name "/FileAttachment"); + ("/Contents", Pdf.String (Filename.basename file)); + ("/Rect", rect)] + in + let annots' = Pdf.Array (annot::annots) in + let page' = + {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest "/Annots" annots'} + in + let pages' = replace_number pagenumber page' pages in + let pdf = Pdfpage.change_pages false pdf pages' in + {pdf with + Pdf.minor = if keepversion then pdf.Pdf.minor else max pdf.Pdf.minor 4} + +let list_attached_files pdf = + let toplevel = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | None -> [] + | Some rootdict -> + match Pdf.lookup_direct pdf "/Names" rootdict with + | None -> [] + | Some namedict -> + match Pdf.lookup_direct pdf "/EmbeddedFiles" namedict with + | Some nametree -> + map + (function x -> x, 0) + (option_map + (function (Pdf.String s, _) -> Some s | _ -> None) + (Pdf.contents_of_nametree pdf nametree)) + | _ -> [] + in let pagelevel = + let pages = Pdfpage.pages_of_pagetree pdf in + flatten + (map2 + (fun page pagenumber -> + option_map + (function annot -> + match Pdf.lookup_direct pdf "/Subtype" annot with + | Some (Pdf.Name "/FileAttachment") -> + (match Pdf.lookup_direct pdf "/Contents" annot with + | Some (Pdf.String s) -> Some (s, pagenumber) + | _ -> None) + | _ -> None) + (match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> annots + | _ -> [])) + pages + (indx pages)) + in + toplevel @ pagelevel + +(* \section{Remove Attached files} *) +let remove_attached_files_on_pages pdf = + let remove_from_page page = + {page with Pdfpage.rest = + Pdf.add_dict_entry page.Pdfpage.rest "/Annots" + (Pdf.Array + (option_map + (function annot -> + match Pdf.lookup_direct pdf "/Subtype" annot with + | Some (Pdf.Name "/FileAttachment") -> None + | _ -> Some annot) + (match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> annots + | _ -> [])))} + in + Pdfpage.change_pages true pdf (map remove_from_page (Pdfpage.pages_of_pagetree pdf)) + +let remove_attached_files pdf = + let pdf = remove_attached_files_on_pages pdf in + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | None -> pdf + | Some rootdict -> + match Pdf.lookup_direct pdf "/Names" rootdict with + | None -> pdf + | Some namedict -> + let namedict' = Pdf.remove_dict_entry namedict "/EmbeddedFiles" in + let rootdict' = Pdf.add_dict_entry rootdict "/Names" namedict' in + let rootdict'num = Pdf.addobj pdf rootdict' in + {pdf with + Pdf.root = + rootdict'num; + Pdf.trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootdict'num)} + +(* \section{Copy an /ID from one file to another} *) +let copy_id keepversion copyfrom copyto = + match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with + | None -> copyto (* error "Source PDF file has no /ID entry to copy from" *) + | Some id -> + copyto.Pdf.trailerdict <- + Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id; + copyto.Pdf.minor <- + if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1; + copyto + +(* \section{Remove bookmarks} *) + +(* \section{Add bookmarks} *) +let read_lines input = + let lines = ref [] in + try + while true do + let c = read_line input in + lines =| c + done; [] + with + _ -> rev !lines + +(* Verify a list of bookmarks. Positive jumps of > 1 not allowed, no numbers +smaller than 0. *) +let rec verify_bookmarks pdf lastlevel endpage = function + | [] -> true + | {Pdfmarks.level = level; Pdfmarks.target = target}::more -> + let page = Pdfpage.pagenumber_of_target pdf target in + level < lastlevel + 2 && + level >= 0 && + page <= endpage && + page >= 0 && + verify_bookmarks pdf level endpage more + +(* Parse a line of the bookmarks file. *) + +(* Un-escape things which are escaped. Quotes, newlines and backslashes *) +let rec fixup_characters prev = function + | [] -> rev prev + | '\\'::'\\'::t -> fixup_characters ('\\'::prev) t + | '\\'::'\"'::t -> fixup_characters ('\"'::prev) t + | '\\'::'\n'::t -> fixup_characters ('\n'::prev) t + | h::t -> fixup_characters (h::prev) t + +let parse_bookmark_file verify pdf input = + try + let lines = Pdfio.read_lines input in + (*i Printf.printf "Read %i lines\n" (length lines); + iter (function x -> Printf.printf "%s\n" x) lines; i*) + let currline = ref 0 in + let bookmarks = ref [] in + iter + (function line -> + match + incr currline; + Pdfgenlex.lex_string line + with + | [Pdfgenlex.LexInt i; Pdfgenlex.LexString s; Pdfgenlex.LexInt i'; Pdfgenlex.LexName "open"] -> + bookmarks =| + {Pdfmarks.level = i; + Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))); + Pdfmarks.target = Pdfpage.target_of_pagenumber pdf i'; + Pdfmarks.isopen = true} + | [Pdfgenlex.LexInt i; Pdfgenlex.LexString s; Pdfgenlex.LexInt i'; Pdfgenlex.LexName "closed"] + | [Pdfgenlex.LexInt i; Pdfgenlex.LexString s; Pdfgenlex.LexInt i'] -> + bookmarks =| + {Pdfmarks.level = i; + Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (fixup_characters [] (explode s))); + Pdfmarks.target = Pdfpage.target_of_pagenumber pdf i'; + Pdfmarks.isopen = false} + | [] -> () (* ignore blank lines *) + | _ -> + (*i flprint (Pdfgenlex.string_of_tokens n); i*) + error ("Bad bookmark file, line " ^ (string_of_int !currline))) + lines; + let bookmarks = rev !bookmarks in + if verify then + if verify_bookmarks pdf 0 (Pdfpage.endpage pdf) bookmarks + then bookmarks + else + error + "Bad bookmark file (References non-existant pages or is malformed)" + else + bookmarks + with + _ -> (*i Printf.printf "%s\n" (Printexc.to_string e); i*) error "Bad bookmark file (syntax)" + + +let add_bookmarks verify input pdf = + let parsed = parse_bookmark_file verify pdf input in + (*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*) + Pdfmarks.add_bookmarks parsed pdf + +(* \section{Set page mode} *) +let set_page_mode pdf s = + match s with + | "UseNone" | "UseOutlines" | "UseThumbs" + | "FullScreen" | "UseOC" | "UseAttachments" -> + begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some catalog -> + let catalog' = + Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s)) + in + let catalognum = Pdf.addobj pdf catalog' in + let trailerdict' = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) + in + {pdf with + Pdf.root = catalognum; + Pdf.trailerdict = trailerdict'} + | None -> error "bad root" + end + | _ -> error "Unknown page mode" + +(* \section{Set viewer preferences} *) +let set_viewer_preference (key, value, version) pdf = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some catalog -> + let viewer_preferences = + match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with + | Some d -> d + | None -> Pdf.Dictionary [] + in + let viewer_preferences' = + Pdf.add_dict_entry viewer_preferences key value + in + let catalog' = + Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences' + in + let catalognum = Pdf.addobj pdf catalog' in + let trailerdict' = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) + in + {pdf with + Pdf.minor = max pdf.Pdf.minor version; + Pdf.root = catalognum; + Pdf.trailerdict = trailerdict'} + | None -> error "bad root" + +(* \section{Set an entry in the /Info dictionary} *) +let set_pdf_info (key, value, version) pdf = + let infodict = + match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with + | Some d -> d + | None -> Pdf.Dictionary [] + in + let infodict' = Pdf.add_dict_entry infodict key value in + let objnum = Pdf.addobj pdf infodict' in + pdf.Pdf.trailerdict <- + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum); + pdf.Pdf.minor <- + max pdf.Pdf.minor version; + pdf + +(* \section{Set page layout} *) +let set_page_layout pdf s = + match s with + | "SinglePage" | "OneColumn" | "TwoColumnLeft" + | "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" -> + begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some catalog -> + let catalog' = + Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s)) + in + let catalognum = Pdf.addobj pdf catalog' in + let trailerdict' = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum) + in + {pdf with + Pdf.root = catalognum; + Pdf.trailerdict = trailerdict'} + | None -> error "bad root" + end + | _ -> error "Unknown page layout" + +(* \section{Set or replace metadata} *) +let set_metadata_from_bytes keepversion data pdf = + let metadata_stream = + Pdf.Stream + {contents = + (Pdf.Dictionary + ["/Length", Pdf.Integer (bytes_size data); + "/Type", Pdf.Name "/Metadata"; + "/Subtype", Pdf.Name "/XML"], + Pdf.Got data)} + in + let objnum = Pdf.addobj pdf metadata_stream in + let document_catalog = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | Some s -> s + | None -> error "Malformed PDF: No root." + in + let document_catalog' = + Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum) + in + let rootnum = Pdf.addobj pdf document_catalog' in + let trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum) + in + {pdf with + Pdf.trailerdict = trailerdict; + Pdf.root = rootnum; + Pdf.minor = + if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor} + +let set_metadata keepversion filename pdf = + let ch = open_in_bin filename in + let data = mkbytes (in_channel_length ch) in + for x = 0 to bytes_size data - 1 do + bset data x (input_byte ch) + done; + set_metadata_from_bytes keepversion data pdf + +(* \section{Remove metadata} *) +let remove_metadata pdf = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | None -> error "malformed file" + | Some root -> + let root' = Pdf.remove_dict_entry root "/Metadata" in + let rootnum = Pdf.addobj pdf root' in + {pdf with + Pdf.trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum); + Pdf.root = + rootnum} + +(* List bookmarks *) + +(* List the bookmarks, optionally deunicoding the text, in the given range to the given output *) +let list_bookmarks encoding range pdf output = + let process_string s = + let rec replace c cs = function + | [] -> [] + | h::t when h = c -> cs @ replace c cs t + | h::t -> h::replace c cs t + in + (* Convert to UTF8, raw, or stripped, and escape backslashed and quotation marks *) + let codepoints = Pdftext.codepoints_of_pdfdocstring s in + let escaped = + let bs = int_of_char '\\' + and nl = int_of_char '\n' + and n = int_of_char 'n' + and q = int_of_char '\"' in + replace bs [bs; bs] (replace nl [bs; n] (replace q [bs; q] codepoints)) + in + match encoding with + | UTF8 -> Pdftext.utf8_of_codepoints escaped + | Stripped -> implode (map char_of_int (lose (fun x -> x > 127) escaped)) + | Raw -> s + in + let bookmarks = Pdfmarks.read_bookmarks pdf in + let inrange = + keep + (function x -> + x.Pdfmarks.target = Pdfdest.NullDestination || + mem (Pdfpage.pagenumber_of_target pdf x.Pdfmarks.target) range) bookmarks + in + iter + (function mark -> + output.Pdfio.output_string + (Printf.sprintf "%i \"%s\" %i %s\n" + mark.Pdfmarks.level + (process_string mark.Pdfmarks.text) + (Pdfpage.pagenumber_of_target pdf mark.Pdfmarks.target) + (if mark.Pdfmarks.isopen then "open" else ""))) + inrange + +(* \section{Stamping files} *) +(* o is the stamp, u is the main pdf page *) +let do_stamp fast scale_to_fit isover pdf o u opdf = + (* Scale page stamp o to fit page u *) + let o = + if scale_to_fit then + let sxmin, symin, sxmax, symax = + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" o.Pdfpage.rest with | Some r -> r | None -> o.Pdfpage.mediabox) + in let txmin, tymin, txmax, tymax = + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" u.Pdfpage.rest with | Some r -> r | None -> u.Pdfpage.mediabox) + in + let xmag = (txmax -. txmin) /. (sxmax -. sxmin) + in let ymag = (tymax -. tymin) /. (symax -. symin) in + let scale = + if xmag < 0.999 && ymag < 0.999 then + if xmag > ymag then xmag else ymag + else if xmag >= 1.001 && ymag >= 1.001 then + if xmag > ymag then ymag else xmag + else if xmag >= 1.001 then ymag + else xmag + in + let dx = txmin +. ((txmax -. txmin) -. (sxmax -. sxmin) *. scale) /. 2. + in let dy = tymin +. ((tymax -. tymin) -. (symax -. symin) *. scale) /. 2. in + let scale_op = + Pdfops.Op_cm + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (dx, dy); + Pdftransform.Scale ((sxmin, symin), scale, scale)]) + in + Pdfpage.prepend_operators opdf [scale_op] ~fast o + else + o + in + {u with + Pdfpage.content = + (if isover then ( @ ) else ( @@ )) + [protect_removeme pdf u.Pdfpage.resources u.Pdfpage.content] + [protect_removeme pdf o.Pdfpage.resources o.Pdfpage.content]; + Pdfpage.resources = + combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} + +let stamp (fast : bool) scale_to_fit isover range over pdf = + let marks = Pdfmarks.read_bookmarks pdf in + let pdf = Pdfmarks.remove_bookmarks pdf in + let over = Pdfmarks.remove_bookmarks over in + let pageseqs = ilist 1 (Pdfpage.endpage pdf) in + let over_firstpage_pdf = + match Pdfpage.pages_of_pagetree over with + | [] -> error "empty PDF" + | h::_ -> Pdfpage.change_pages true over [h] + in + let merged = Pdfmerge.merge_pdfs ~rotations:[Pdfmerge.DNR; Pdfmerge.DNR] false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]] in + let renamed_pdf = + Pdfpage.change_pages true + merged (Pdfpage.renumber_pages merged (Pdfpage.pages_of_pagetree merged)) + in + let renamed_pages = Pdfpage.pages_of_pagetree renamed_pdf in + let under_pages, over_page = + all_but_last renamed_pages, last renamed_pages + in + let new_pages = + map2 + (fun pageseq under_page -> + do_stamp fast scale_to_fit isover renamed_pdf + (if mem pageseq range then over_page else + Pdfpage.blankpage Pdfpaper.a4) + under_page over) + pageseqs + under_pages + in + Pdfmarks.add_bookmarks marks (Pdfpage.change_pages true renamed_pdf new_pages) + +(* Combine pages from two PDFs. For now, assume equal length. *) + +(* If [over] has more pages than [under], chop the excess. If the converse, pad +[over] to the same length *) +let equalize_pages under over = + let length_under = Pdfpage.endpage under + in let length_over = Pdfpage.endpage over + in + if length_over > length_under then + under, + (Pdfpage.change_pages true over (take (Pdfpage.pages_of_pagetree over) length_under)) + else if length_under > length_over then + under, + Pdfpage.change_pages true + over + (Pdfpage.pages_of_pagetree over @ + (many (Pdfpage.blankpage Pdfpaper.a4) (length_under - length_over))) + else + under, over + +let combine_pages (fast : bool) under over scaletofit swap equalize = + let marks_under = Pdfmarks.read_bookmarks under in + let marks_over = Pdfmarks.read_bookmarks over in + let under, over = if equalize then equalize_pages under over else under, over in + let under_length = Pdfpage.endpage under + in let over_length = Pdfpage.endpage over in + if under_length <> over_length then raise (Pdf.PDFError "combine_pages: not of equal length") else + let pageseqs_under = ilist 1 (Pdfpage.endpage under) + in let pageseqs_over = ilist 1 (Pdfpage.endpage over) in + let merged = + Pdfmerge.merge_pdfs ~rotations: [Pdfmerge.DNR; Pdfmerge.DNR] false false ["a"; "b"] [under; over] [pageseqs_under; pageseqs_over] in + let renamed_pdf = + Pdfpage.change_pages true + merged (Pdfpage.renumber_pages merged (Pdfpage.pages_of_pagetree merged)) + in + let under_pages, over_pages = + cleave (Pdfpage.pages_of_pagetree renamed_pdf) under_length + in + let new_pages = + map2 (fun o u -> do_stamp fast scaletofit (not swap) renamed_pdf o u over) over_pages under_pages + in + Pdfmarks.add_bookmarks (marks_under @ marks_over) (Pdfpage.change_pages true renamed_pdf new_pages) + +(* \section{Split at bookmarks} *) + +(* Returns empty string on failure. Should only be used in conjunction with +split at bookmarks code, so should never fail, by definiton. *) +let remove_unsafe_characters s = + let chars = + lose + (function x -> + match x with + '/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true + | x when int_of_char x < 32 || int_of_char x > 126 -> true + | _ -> false) + (explode s) + in + match chars with + | '.'::more -> implode more + | chars -> implode chars + +let get_bookmark_name pdf marks splitlevel n _ = + match keep (function m -> n = Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with + | {Pdfmarks.text = title}::_ -> remove_unsafe_characters title + | _ -> "" + +(* @F means filename without extension *) +(* @N means sequence number with no padding *) +(* @S means start page of this section *) +(* @E means end page of this section *) +(* @B means bookmark name at start page *) +let process_others marks pdf splitlevel filename sequence startpage endpage s = + let rec procss prev = function + | [] -> rev prev + | '@'::'F'::t -> procss (rev (explode filename) @ prev) t + | '@'::'N'::t -> procss (rev (explode (string_of_int sequence)) @ prev) t + | '@'::'S'::t -> procss (rev (explode (string_of_int startpage)) @ prev) t + | '@'::'E'::t -> procss (rev (explode (string_of_int endpage)) @ prev) t + | '@'::'B'::t -> procss (rev (explode (get_bookmark_name pdf marks splitlevel startpage pdf)) @ prev) t + | h::t -> procss (h::prev) t + in + implode (procss [] (explode s)) + +let name_of_spec printf marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage = + if printf then + let spec = + string_of_sections (substitute (sections_of_string spec) [n]) + in + process_others marks pdf splitlevel filename n startpage endpage spec + else + let fill l n = + let chars = explode (string_of_int n) in + if length chars > l + then implode (drop chars (length chars - l)) + else implode ((many '0' (l - length chars)) @ chars) + in + let chars = explode spec in + let before, including = cleavewhile (neq '%') chars in + let percents, after = cleavewhile (eq '%') including in + if percents = [] + then + process_others marks pdf splitlevel filename n startpage endpage spec + else + process_others marks pdf splitlevel filename n startpage endpage + (implode before ^ fill (length percents) n ^ implode after) + +(* Find the stem of a filename *) +let stem s = + implode (rev (tail_no_fail (dropwhile (neq '.') (rev (explode (Filename.basename s)))))) + +let fast_write_split_pdfs enc printf splitlevel original_filename linearize nobble spec main_pdf pagenums pdf_pages = + let marks = Pdfmarks.read_bookmarks main_pdf in + iter2 + (fun number pagenums -> + let pdf = nobble (Pdfpage.pdf_of_pages main_pdf pagenums) in + let startpage, endpage = extremes pagenums in + let name = name_of_spec printf marks main_pdf splitlevel spec number (stem original_filename) startpage endpage in + Pdf.remove_unreferenced pdf; + Pdfwrite.pdf_to_file_options linearize enc (not (enc = None)) pdf name) + (indx pagenums) + pagenums + +let split_pdf enc printf original_filename chunksize linearize nobble spec pdf = + let pdf_pages = Pdfpage.pages_of_pagetree pdf in + fast_write_split_pdfs enc printf 0 original_filename linearize nobble spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages + +(* Return list, in order, a *set* of page numbers of bookmarks at a given level *) +let bookmark_pages level pdf = + setify_preserving_order + (option_map + (function l when l.Pdfmarks.level = level -> Some (Pdfpage.pagenumber_of_target pdf l.Pdfmarks.target) | _ -> None) + (Pdfmarks.read_bookmarks pdf)) + +let split_at_bookmarks original_filename linearize nobble level spec pdf = + (*flprint "split_at_bookmarks\n";*) + let pdf_pages = Pdfpage.pages_of_pagetree pdf + in let points = bookmark_pages level pdf in + let points = + lose (fun x -> x <= 0 || x > Pdfpage.endpage pdf) (map pred points) (* FIXME: What actually causes these problems? *) + in + (*flprint "Points: "; + iter (Printf.printf "%i ,") points; + flprint "\n";*) + let pts = splitat points (indx pdf_pages) in + (*flprint "Calling fast_write_split_pdfs\n";*) + fast_write_split_pdfs None false level + original_filename linearize nobble spec pdf pts pdf_pages + +(* Called from cpdflib.ml - different from above *) +let split_on_bookmarks pdf level = + let points = lose (eq 0) (map pred (bookmark_pages level pdf)) + in let pdf_pages = Pdfpage.pages_of_pagetree pdf in + let ranges = splitat points (indx pdf_pages) in + map (fun rs -> Pdfpage.pdf_of_pages pdf rs) ranges + +(* Output information for each page *) +let output_page_info pdf = + let pages = Pdfpage.pages_of_pagetree pdf + in let getbox page box = + if box = "/MediaBox" then + match page.Pdfpage.mediabox with + | Pdf.Array [a; b; c; d] -> + Printf.sprintf "%f %f %f %f" + (Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d) + | _ -> "" + else + match Pdf.lookup_direct pdf box page.Pdfpage.rest with + | Some (Pdf.Array [a; b; c; d]) -> + Printf.sprintf "%f %f %f %f" + (Pdf.getnum a) (Pdf.getnum b) (Pdf.getnum c) (Pdf.getnum d) + | _ -> "" + in let rotation page = + Pdfpage.int_of_rotation page.Pdfpage.rotate + in + for pnum = 1 to Pdfpage.endpage pdf do + let page = select pnum pages in + Printf.printf "Page %i:\n" pnum; + Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox"); + Printf.printf "CropBox: %s\n" (getbox page "/CropBox"); + Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox"); + Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox"); + Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox"); + Printf.printf "Rotation: %i\n" (rotation page) + done + +(* Does the page have a defined box e.g "/CropBox" *) +let hasbox pdf page boxname = + let pages = Pdfpage.pages_of_pagetree pdf in + if page > length pages || page < 1 then raise (Failure "hasbox: bad page") else + let p = select page pages in + match Pdf.lookup_direct pdf boxname p.Pdfpage.rest with + | Some _ -> true + | _ -> false + +(* \section{Print metadata} *) +let get_metadata pdf = + match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with + | None -> error "malformed file" + | Some root -> + match Pdf.lookup_direct pdf "/Metadata" root with + | Some ((Pdf.Stream _) as s) -> + Pdf.getstream s; + begin match s with + | Pdf.Stream {contents = (_, Pdf.Got data)} -> data + | _ -> assert false + end + | _ -> mkbytes 0 + +let print_metadata pdf = + let data = get_metadata pdf in + for x = 0 to bytes_size data - 1 do + Printf.printf "%c" (char_of_int (bget data x)) + done + +(* \section{Print font data} *) +let list_font pdf page (name, dict) = + let subtype = + match Pdf.lookup_direct pdf "/Subtype" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in let basefont = + match Pdf.lookup_direct pdf "/BaseFont" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in let encoding = + match Pdf.lookup_direct pdf "/Encoding" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in + (*i Printf.printf + "%i %s %s %s %s\n" i*) + page, name, subtype, basefont, encoding + +let list_fonts pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + flatten + (map + (fun (num, page) -> + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some (Pdf.Dictionary fontdict) -> + map (list_font pdf num) fontdict + | _ -> []) + (combine (ilist 1 (length pages)) pages)) + +let string_of_font (p, n, s, b, e) = + Printf.sprintf "%i %s %s %s %s\n" p n s b e + +let print_fonts pdf = + flprint + (fold_left ( ^ ) "" (map string_of_font (list_fonts pdf))) + +(* \section{Nobbling for Demo Version} *) +let nobble_page pdf _ page = + let minx, miny, maxx, maxy = + (* Use cropbox if available *) + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> r + | None -> page.Pdfpage.mediabox) + in + let fontdict = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | None -> Pdf.Dictionary [] + | Some d -> d + in + let fontname = Pdf.unique_key "F" fontdict in + let width = maxx -. minx in let height = maxy -. miny in + let scalex = + (width *. 1000.) /. float (Pdfstandard14.textwidth false Pdftext.Helvetica "DEMO") + in + let page' = + let font = + Pdf.Dictionary + [("/Type", Pdf.Name "/Font"); + ("/Subtype", Pdf.Name "/Type1"); + ("/BaseFont", Pdf.Name "/Helvetica")] + in let ops = + [Pdfops.Op_BMC "/CPDFSTAMP"; + Pdfops.Op_cm + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (minx, miny +. height /. 2.)]); + Pdfops.Op_gs "/gs0"; + Pdfops.Op_BT; + Pdfops.Op_Tf (fontname, scalex); + Pdfops.Op_Tj "DEMO"; + Pdfops.Op_ET; + Pdfops.Op_EMC] + in + {(Pdfpage.blankpage Pdfpaper.a4) with + Pdfpage.mediabox = page.Pdfpage.mediabox; + Pdfpage.content = [Pdfops.stream_of_ops ops]; + Pdfpage.resources = + Pdf.Dictionary + [("/Font", Pdf.Dictionary [(fontname, font)]); + ("/ExtGState", Pdf.Dictionary + ["/gs0", + Pdf.Dictionary["/Type", Pdf.Name "/ExtGState"; "/ca", Pdf.Real 0.2]]); + ] + } + in + do_stamp false false true pdf page' page (Pdf.empty ()) + +(* \section{Superimpose text, page numbers etc.} *) + +type position = + | PosCentre of float * float + | PosLeft of float * float + | PosRight of float * float + | Top of float + | TopLeft of float + | TopRight of float + | Left of float + | BottomLeft of float + | Bottom of float + | BottomRight of float + | Right of float + | Diagonal + | ReverseDiagonal + +let string_of_position = function + | PosCentre (a, b) -> Printf.sprintf "PosCentre %f %f" a b + | PosLeft (a, b) -> Printf.sprintf "PosLeft %f %f" a b + | PosRight (a, b) -> Printf.sprintf "PosRight %f %f" a b + | Top a -> Printf.sprintf "Top %f" a + | TopLeft a -> Printf.sprintf "TopLeft %f" a + | TopRight a -> Printf.sprintf "TopRight %f" a + | Left a -> Printf.sprintf "Left %f" a + | BottomLeft a -> Printf.sprintf "BottomLeft %f" a + | Bottom a -> Printf.sprintf "Bottom %f" a + | BottomRight a -> Printf.sprintf "BottomRight %f" a + | Right a -> Printf.sprintf "Right %f" a + | Diagonal -> "Diagonal" + | ReverseDiagonal -> "Reverse Diagonal" + +type orientation = + | Horizontal + | Vertical + | VerticalDown + +type justification = LeftJustify | CentreJustify | RightJustify + +(* Given the mediabox, calculate an absolute position for the text. *) +let calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos = + (*i Printf.printf "calculate_position %b %f %f %f %f %f %b\n" ignore_d w xmin ymin xmax ymax shorterside; i*) + let rot = if orientation = VerticalDown then rad_of_deg 270. else 0. in + match pos with + | Diagonal -> + let angle = atan ((ymax -. ymin) /. (xmax -. xmin)) + in let cx, cy = (xmax +. xmin) /. 2., (ymax +. ymin) /. 2. in + let dl = w /. 2. in + let dx = dl *. cos angle + in let dy = dl *. sin angle in + (*i Printf.printf "Diagonal: angle = %f, cx = %f, cy = %f, dx = %f, dy = %f\n" angle cx cy dx dy; i*) + cx -. dx, cy -. dy, angle + | ReverseDiagonal -> + (*flprint "REVERSE DIAGONAL IN CALCULATE POSITION\n";*) + let angle = atan ((ymax -. ymin) /. (xmax -. xmin)) + in let cx, cy = (xmax +. xmin) /. 2., (ymax +. ymin) /. 2. in + let dl = w /. 2. in + let dx = dl *. cos angle + in let dy = dl *. sin angle in + (*Printf.printf "Diagonal: angle = %f\n" (deg_of_rad angle);*) + cx -. dx, (ymax +. ymin) -. (cy -. dy), angle -. ((2. *. pi) -. ((pi -. (2. *. angle)) *. 2.) /. 2.) +. pi + | PosLeft (x, y) -> xmin +. x, ymin +. y, rot + | PosCentre (x, y) -> xmin +. x -. (w /. 2.), ymin +. y, rot + | PosRight (x, y) -> xmin +. x -. w, ymin +. y, rot + | Top d -> + let d = if ignore_d then 0. else d in + (xmin +. xmax) /. 2. -. w /. 2., ymax -. d, rot + | TopLeft d -> + let d = if ignore_d then 0. else d in + xmin +. d, ymax -. d, rot + | TopRight d -> + let d = if ignore_d then 0. else d in + xmax -. d -. w, ymax -. d, rot + | Left d -> + let d = if ignore_d then 0. else d in + xmin +. d, (ymax +. ymin) /. 2., rot + | BottomLeft d -> + let d = if ignore_d then 0. else d in + xmin +. d, ymin +. d, rot + | Bottom d -> + let d = if ignore_d then 0. else d in + (xmin +. xmax) /. 2. -. w /. 2., ymin +. d, rot + | BottomRight d -> + let d = if ignore_d then 0. else d in + xmax -. d -. w, ymin +. d, rot + | Right d -> + let d = if ignore_d then 0. else d in + xmax -. d -. w, (ymax +. ymin) /. 2., rot + +(* Process UTF8 text to /WinAnsiEncoding string. *) +let winansi_of_utf8 s = + (*flprint "winansi_of_utf8:"; + iter (Printf.printf "%C ") (explode s); + flprint "\n";*) + let extractor = Pdftext.charcode_extractor_of_encoding Pdftext.WinAnsiEncoding + and codepoints = Pdftext.codepoints_of_utf8 s in + (*flprint "codepoints after Pdftext.codepoints_of_utf8\n"; + iter (Printf.printf "%i ") codepoints; + flprint "\ndone\n";*) + implode (map char_of_int (option_map extractor codepoints)) + +(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *) +let utf8_of_winansi s = + let text_extractor = + Pdftext.text_extractor_of_font + (Pdf.empty ()) + (Pdf.Dictionary + [("/BaseFont", Pdf.Name "/TimesRoman"); + ("/Subtype", Pdf.Name "/Type1"); + ("/Encoding", Pdf.Name "/WinAnsiEncoding")]) + in + let codepoints = Pdftext.codepoints_of_text text_extractor s in + Pdftext.utf8_of_codepoints codepoints + +(* Get the width of some text in the given font *) +let width_of_text font text = + match font with + | Pdftext.SimpleFont {Pdftext.fontmetrics = Some fontmetrics} -> + begin try + fold_left ( +. ) 0. (map (fun c -> fontmetrics.(int_of_char c)) (explode text)) + with + _ -> 0. + end + | _ -> 0. + +type ops_metrics = + {metrics_text : string; + metrics_x : float; + metrics_y : float; + metrics_rot : float} + +let ops_metrics : ops_metrics list ref = ref [] + +let ops_baseline_adjustment = ref 0. + +let metrics_howmany () = length !ops_metrics + +let metrics_text n = + utf8_of_winansi (select n !ops_metrics).metrics_text + +let metrics_x n = + (select n !ops_metrics).metrics_x + +let metrics_y n = + (select n !ops_metrics).metrics_y + +let metrics_rot n = + (select n !ops_metrics).metrics_rot + +let metrics_baseline_adjustment () = !ops_baseline_adjustment + +let ops longest_w metrics x y rotate hoffset voffset outline linewidth unique_fontname unique_extgstatename colour fontsize text = + if metrics then + ops_metrics := + {metrics_text = text; metrics_x = x -. hoffset; metrics_y = y -. voffset; metrics_rot = rotate} + ::!ops_metrics; + [Pdfops.Op_q; + Pdfops.Op_BMC "/CPDFSTAMP"; + Pdfops.Op_cm + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (x -. hoffset, y -. voffset); + Pdftransform.Rotate ((0., 0.), rotate)]); + Pdfops.Op_BT; + ] @ + (if outline then [Pdfops.Op_w linewidth; Pdfops.Op_Tr 1] else [Pdfops.Op_Tr 0]) @ + [ + (match colour with (r, g, b) -> Pdfops.Op_rg (r, g, b)); + (match colour with (r, g, b) -> Pdfops.Op_RG (r, g, b))] + @ + (match unique_extgstatename with None -> [] | Some n -> [Pdfops.Op_gs n]) + @ + [Pdfops.Op_Tf (unique_fontname, fontsize); + Pdfops.Op_Tj text; + Pdfops.Op_ET; + Pdfops.Op_EMC; + Pdfops.Op_Q] + +(* Find the h-offset for justification based on the longest width, the current +width, the justification and the position. *) +let find_justification_offsets longest_w w position = function + | LeftJustify -> + begin match position with + | TopLeft _ | Left _ | PosLeft _ | BottomLeft _ -> 0. + | Top _ | PosCentre _ | Bottom _ -> (longest_w -. w) /. 2. + | TopRight _ | BottomRight _ | PosRight _ | Right _ -> longest_w -. w + | Diagonal -> 0. + | ReverseDiagonal -> 0. + end + | RightJustify -> + begin match position with + | TopLeft _ | Left _ | PosLeft _ | BottomLeft _ -> ~-.(longest_w -. w) + | Top _ | PosCentre _ | Bottom _ -> ~-.((longest_w -. w) /. 2.) + | TopRight _ | BottomRight _ | PosRight _ | Right _ -> 0. + | Diagonal -> 0. + | ReverseDiagonal -> 0. + end + | CentreJustify -> + begin match position with + | TopLeft _ | Left _ | PosLeft _ | BottomLeft _ -> ~-.((longest_w -. w) /. 2.) + | Top _ | PosCentre _ | Bottom _ -> 0. + | TopRight _ | BottomRight _ | PosRight _ | Right _ -> (longest_w -. w) /. 2. + | Diagonal -> 0. + | ReverseDiagonal -> 0. + end + +let addtext + metrics lines linewidth outline fast colour fontname bates fontsize font + underneath position hoffset voffset text pages orientation cropbox opacity + justification filename pdf += + let endpage = Pdfpage.endpage pdf in + let replace_pairs pdf filename bates num = + ["%Page", string_of_int num; + "%Roman", roman_upper num; + "%roman", roman_lower num; + "%filename", filename; + "%Label", pagelabel pdf num; + "%EndPage", string_of_int endpage; + "%EndLabel", pagelabel pdf endpage; + "%Bates", string_of_int (bates + num - 1)] in + let addtext_page num page = + let resources', unique_extgstatename = + if opacity < 1.0 then + let dict = + match Pdf.lookup_direct pdf "/ExtGState" page.Pdfpage.resources with + | Some d -> d + | None -> Pdf.Dictionary [] + in + let unique_extgstatename = Pdf.unique_key "gs" dict in + let dict' = + Pdf.add_dict_entry dict unique_extgstatename + (Pdf.Dictionary [("/ca", Pdf.Real opacity); ("/CA", Pdf.Real opacity)]) + in + Pdf.add_dict_entry page.Pdfpage.resources "/ExtGState" dict', Some unique_extgstatename + else + page.Pdfpage.resources, None + in + let fontdict = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | None -> Pdf.Dictionary [] + | Some d -> d + in + let unique_fontname = Pdf.unique_key "F" fontdict in + let ops = + let text = process_text text (replace_pairs pdf filename bates num) in + let calc_textwidth text = + match font with + | Some f -> + (* FIXME This is a bit wrong in the prescence of special + characters due to standard encoding not win encoding being + used in textwidth. When we have new AFM parsing up and + running, can improve. *) + let rawwidth = Pdfstandard14.textwidth false f text in + (float rawwidth *. fontsize) /. 1000. + | None -> + let font = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some fontdict -> + begin match Pdf.lookup_direct pdf fontname fontdict with + | Some font -> font + | _ -> failwith "addtext: bad font" + end + | _ -> failwith "addtext: bad font" + in + let rawwidth = width_of_text (Pdftext.read_font pdf font) text in + (rawwidth *. fontsize) /. 1000. + in + let expanded_lines = + map (function text -> process_text text (replace_pairs pdf filename bates num)) lines + in + let textwidth = calc_textwidth text + and allwidths = map calc_textwidth expanded_lines in + let longest_w = last (sort compare allwidths) in + let joffset = find_justification_offsets longest_w textwidth position justification in + let mediabox = + if cropbox then + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some pdfobject -> Pdf.parse_rectangle (Pdf.direct pdf pdfobject) + | None -> Pdf.parse_rectangle page.Pdfpage.mediabox + else + Pdf.parse_rectangle page.Pdfpage.mediabox + in + let x, y, rotate = calculate_position false textwidth mediabox orientation position in + let hoffset, voffset = + if position = Diagonal || position = ReverseDiagonal + then -. (cos ((pi /. 2.) -. rotate) *. voffset), sin ((pi /. 2.) -. rotate) *. voffset + else hoffset, voffset + in + match font with + | Some f -> + ops longest_w metrics x y rotate (hoffset +. joffset) voffset outline linewidth + unique_fontname unique_extgstatename colour fontsize text + | None -> + ops longest_w metrics x y rotate (hoffset +. joffset) voffset outline linewidth + fontname None colour fontsize text + in + let newresources = + match font with + | Some _ -> + let thefont = + Pdf.Dictionary + [("/Type", Pdf.Name "/Font"); + ("/Encoding", Pdf.Name "/WinAnsiEncoding"); + ("/Subtype", Pdf.Name "/Type1"); + ("/BaseFont", Pdf.Name ("/" ^ fontname))] + in + let newfontdict = Pdf.add_dict_entry fontdict unique_fontname thefont in + Pdf.add_dict_entry resources' "/Font" newfontdict + | None -> page.Pdfpage.resources + in + let page = {page with Pdfpage.resources = newresources} in + if underneath + then Pdfpage.prepend_operators pdf ops ~fast:fast page + else Pdfpage.postpend_operators pdf ops ~fast:fast page + in + if metrics then + (ignore (iter_pages (fun a b -> ignore (addtext_page a b)) pdf pages); pdf) + else + process_pages addtext_page pdf pages + +(* Prev is a list of lists of characters *) +let split_at_newline t = + let rec split_at_newline_inner prev = function + | [] -> rev (map implode (map rev prev)) + | '\\'::'n'::t -> split_at_newline_inner ([]::prev) t + | h::t -> split_at_newline_inner ((h::hd prev)::tl prev) t + in + split_at_newline_inner [[]] (explode t) + +let rec unescape_chars prev = function + | [] -> rev prev + | '\\'::('0'..'9' as a)::('0'..'9' as b)::('0'..'9' as c)::t -> + let chr = char_of_int (int_of_string ("0o" ^ implode [a;b;c])) in + unescape_chars (chr::prev) t + | '\\'::c::t when c <> 'n' -> unescape_chars (c::prev) t + | h::t -> unescape_chars (h::prev) t + +let unescape_string s = + implode (unescape_chars [] (explode s)) + +let + addtexts metrics linewidth outline fast fontname font bates colour position linespacing + fontsize underneath text pages orientation cropbox opacity justification midline filename pdf += + (*flprint "addtexts:\n"; + iter (Printf.printf "%C ") (explode text); + flprint "\n"; + Printf.printf "\nCpdf.addtexts: metrics = %b" metrics; + flprint "\n";*) + (*Printf.printf "linewidth = %f\n" linewidth; + Printf.printf "outline = %b\n" outline; + Printf.printf "fast = %b\n" fast; + Printf.printf "fontname = %s\n" fontname; + Printf.printf "winansi text = %s\n" text; + Printf.printf "position = %s\n" (string_of_position position); + Printf.printf "bates = %i\n" bates; + Printf.printf "linespacing = %f\n" linespacing; + Printf.printf "fontsize = %f\n" fontsize; + Printf.printf "underneath = %b\n" underneath; + Printf.printf "font = %s\n" begin match font with None -> "None" | Some x -> Pdftext.string_of_standard_font x end; + Printf.printf "justification = %s\n" + begin match justification with LeftJustify -> "left" | RightJustify -> "right" | CentreJustify -> "centre" end; + Printf.printf "midline = %b\n" midline; + begin match colour with r, g, b -> Printf.printf "%f, %f, %f\n" r g b end; + Printf.printf "opacity = %f\n" opacity; + flprint "\n"; + Printf.printf "relative-to-cropbox = %b" cropbox; + flprint "\n";*) + ops_metrics := []; + let text = winansi_of_utf8 text in + let text = unescape_string text in + let lines = split_at_newline text + and pdf = ref pdf in + let voffset = + match position with + | Bottom _ | BottomLeft _ | BottomRight _ -> + ref (0. -. (linespacing *. fontsize *. (float (length lines) -. 1.))) + | Left _ | Right _ -> + (* Vertically align *) + ref (0. -. (linespacing *. ((fontsize *. (float (length lines) -. 1.)) /. 2.))) + | Diagonal | ReverseDiagonal -> + (* Change so that the whole paragraph sits on the centre... *) + ref (0. -. ((linespacing *. fontsize *. (float (length lines) -. 1.)) /. 2.)) + | _ -> ref 0. + in + if midline then + begin match font with + | Some font -> + let baseline_adjustment = + (fontsize *. float (Pdfstandard14.baseline_adjustment font)) /. 1000. + in + ops_baseline_adjustment := baseline_adjustment; + voffset := !voffset +. baseline_adjustment + | _ -> + ops_baseline_adjustment := 0. + end + else + ops_baseline_adjustment := 0.; + iter + (fun line -> + let voff, hoff = + if orientation = Vertical then 0., -.(!voffset) else !voffset, 0. + in + pdf := + addtext metrics lines linewidth outline fast colour fontname bates fontsize font + underneath position hoff voff line pages orientation cropbox opacity justification filename !pdf; + voffset := !voffset +. (linespacing *. fontsize)) + lines; + ops_metrics := rev !ops_metrics; + !pdf + +let removetext range pdf = + (* Could fail on nesting, or other marked content inside our marked content.*) + let rec remove_until_last_EMC level = function + | [] -> [] + | Pdfops.Op_BMC "/CPDFSTAMP"::more -> + remove_until_last_EMC (level + 1) more + | Pdfops.Op_EMC::more -> + if level = 1 + then more + else remove_until_last_EMC (level - 1) more + | _::more -> + remove_until_last_EMC level more + in + let rec remove_stamps prev = function + | [] -> rev prev + | Pdfops.Op_BMC "/CPDFSTAMP"::more -> + let rest = remove_until_last_EMC 1 more in + remove_stamps prev rest + | h::t -> remove_stamps (h::prev) t + in + let removetext_page _ page = + {page with + Pdfpage.content = + let ops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content in + [Pdfops.stream_of_ops (remove_stamps [] ops)]} + in + process_pages removetext_page pdf range + +(* \section{Padding with blank pages.} *) +let insert_after pos page pages = + let before, after = cleave pages pos in + before @ [page] @ after + +(* Insert many. *) +let rec insert_after_many pages = function + | [] -> pages + | (pos, page)::more -> + let pages' = insert_after pos page pages in + insert_after_many pages' (map (fun (p, pa) -> p + 1, pa) more) + +let pad range pdf i = + let pages = Pdfpage.pages_of_pagetree pdf in + let blankpages = + map + (fun n -> + {Pdfpage.content = []; + Pdfpage.mediabox = (select (n + i) pages).Pdfpage.mediabox; + Pdfpage.resources = Pdf.Dictionary []; + Pdfpage.rotate = (select (n + i) pages).Pdfpage.rotate; + Pdfpage.rest = (select (n + i) pages).Pdfpage.rest}) + range + in + let pages' = insert_after_many pages (combine range blankpages) in + Pdfpage.change_pages true pdf pages' + +let padafter range pdf = + let isinpdf n = mem n (ilist 1 (Pdfpage.endpage pdf)) in + if not (fold_left ( && ) true (map isinpdf range)) then + raise (Failure "padafter: range contains pages not present in pdf"); + pad range pdf 0 + +let padbefore range pdf = + let isinpdf n = mem n (ilist 1 (Pdfpage.endpage pdf)) in + if not (fold_left ( && ) true (map isinpdf range)) then + raise (Failure "padbefore: range contains pages not present in pdf"); + pad (map pred range) pdf 1 + +let padmultiple n pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + let len = length pages in + let pages_to_add = if len / n * n = len then 0 else n - (len mod n) in + if pages_to_add > 0 then + let blankpages = + many + {Pdfpage.content = []; + Pdfpage.mediabox = (select len pages).Pdfpage.mediabox; + Pdfpage.resources = Pdf.Dictionary []; + Pdfpage.rotate = (select len pages).Pdfpage.rotate; + Pdfpage.rest = (select len pages).Pdfpage.rest} + pages_to_add + in + Pdfpage.change_pages true pdf (pages @ blankpages) + else + pdf + +(* \section{Shift page data} *) +let make_mediabox (xmin, ymin, xmax, ymax) = + Pdf.Array + [Pdf.Real xmin; Pdf.Real ymin; Pdf.Real xmax; Pdf.Real ymax] + +(* Change the media box and other known boxes by the function [f] which takes +xmin, xmax, ymin, ymax as input. *) +let change_boxes f pdf page = + let names = ["/TrimBox"; "/ArtBox"; "/CropBox"; "/BleedBox"] + in let getbox n = + Pdf.lookup_direct pdf n page.Pdfpage.rest + in + let boxes = combine names (map getbox names) in + let toreplace = lose (function (_, None) -> true | _ -> false) boxes in + let toreplace = + map + (function (name, Some value) -> (name, value) | _ -> assert false) + toreplace + in + let rest' = + fold_left + (fun e (k, v) -> + let v = + make_mediabox (f (Pdf.parse_rectangle v)) + in + Pdf.replace_dict_entry e k v) + page.Pdfpage.rest + toreplace + in + {page with + Pdfpage.mediabox = + make_mediabox (f (Pdf.parse_rectangle page.Pdfpage.mediabox)); + Pdfpage.rest = rest'} + +(* The content is flipped by altering any use of [Op_cm]. But we must also +alter any /Matrix entries in pattern dictionaries for tiled and shading +patterns. In addition, shadings used by Op_sh in the main page content and in +xobjects must be altered. *) +(*let rec change_shadings pdf tr resources = + let transform_shading s = + s + in + try + let resources = + begin match Pdf.lookup_direct pdf "/Shading" resources with + | Some (Pdf.Dictionary shadings) -> + let names, nums = + split + (map + (fun (name, shading) -> + Printf.printf "Fixing up shading %s\n" name; + name, Pdf.addobj pdf (transform_shading shading)) + shadings) + in + let entries = + map2 (fun name num -> name, Pdf.Indirect num) names nums + in + Pdf.add_dict_entry resources "/Shading" (Pdf.Dictionary entries) + | _ -> resources + end + in + let process_xobject xobject = + change_shadings pdf tr xobject + in + begin match Pdf.lookup_direct pdf "/XObject" resources with + | Some (Pdf.Dictionary xobjects) -> + let names, nums = + split + (map + (fun (name, xobject) -> + Printf.printf "Looking for shadings in xobject %s\n" name; + name, Pdf.addobj pdf (process_xobject xobject)) + xobjects) + in + let entries = + map2 (fun name num -> name, Pdf.Indirect num) names nums + in + Pdf.add_dict_entry resources "/XObject" (Pdf.Dictionary entries) + | _ -> resources + end + with + _ -> resources*) + +let change_pattern_matrices pdf tr resources = + try + (*let resources = change_shadings pdf tr resources in*) + begin match Pdf.lookup_direct pdf "/Pattern" resources with + | Some (Pdf.Dictionary patterns) -> + let names, nums = + split + (map + (fun (name, p) -> + let old_pattern = Pdf.direct pdf p in + let new_pattern = + let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in + let new_tr = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) existing_tr in + Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr) + in + name, Pdf.addobj pdf new_pattern) + patterns) + in + let entries = + map2 (fun name num -> name, Pdf.Indirect num) names nums + in + Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries) + | _ -> resources + end + with + Pdftransform.NonInvertable -> resources + +let shift_page ?(fast=false) dx dy pdf _ page = + let transform_op = + Pdfops.Op_cm (Pdftransform.matrix_of_op (Pdftransform.Translate (dx, dy))) + in + let resources' = + change_pattern_matrices pdf (Pdftransform.mktranslate ~-.dx ~-.dy) page.Pdfpage.resources + in + Pdfpage.prepend_operators pdf [transform_op] ~fast {page with Pdfpage.resources = resources'} + +let shift_pdf ?(fast=false) dx dy pdf range = + process_pages (shift_page ~fast dx dy pdf) pdf range + +(* Change a page's media box so its minimum x and y are 0, making other +operations simpler to think about. Any shift that is done is reflected in +other boxes (clip etc.) *) +let rectify_boxes ?(fast=false) pdf page = + let minx, miny, _, _ = + Pdf.parse_rectangle page.Pdfpage.mediabox + in + let f (iminx, iminy, imaxx, imaxy) = + iminx -. minx, iminy -. miny, imaxx -. minx, imaxy -. miny + in + let page = change_boxes f pdf page in + if minx <> 0. || miny <> 0. + then shift_page ~fast (-.minx) (-.miny) pdf 0 page + else page + +(* \section{Flip pages} *) +let flip_page ?(fast=false) transform_op pdf _ page = + let minx, miny, maxx, maxy = + Pdf.parse_rectangle page.Pdfpage.mediabox + in + let tr = transform_op minx miny maxx maxy in + let resources = + change_pattern_matrices pdf tr page.Pdfpage.resources + in + Pdfpage.prepend_operators pdf [Pdfops.Op_cm tr] ~fast {page with Pdfpage.resources = resources} + +let vflip_pdf ?(fast=false) pdf range = + let transform_op _ miny _ maxy = + Pdftransform.matrix_of_op + (Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.)) + in + process_pages (flip_page ~fast transform_op pdf) pdf range + +let hflip_pdf ?(fast=false) pdf range = + let transform_op minx _ maxx _ = + Pdftransform.matrix_of_op + (Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.)) + in + process_pages (flip_page ~fast transform_op pdf) pdf range + +(* \section{Set media box} *) +let set_mediabox x y w h pdf range = + let crop_page _ page = + {page with + Pdfpage.mediabox = + (Pdf.Array + [Pdf.Real x; Pdf.Real y; + Pdf.Real (x +. w); Pdf.Real (y +. h)])} + in + process_pages crop_page pdf range + +let setBox box minx maxx miny maxy pdf range = + let set_box_page _ page = + {page with + Pdfpage.rest = + Pdf.add_dict_entry + page.Pdfpage.rest box + (Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])} + in + process_pages set_box_page pdf range + +(* \section{Cropping} *) +let crop_pdf x y w h pdf range = + let crop_page _ page = + {page with + Pdfpage.rest = + (Pdf.add_dict_entry + page.Pdfpage.rest + "/CropBox" + (Pdf.Array + [Pdf.Real x; Pdf.Real y; + Pdf.Real (x +. w); Pdf.Real (y +. h)]))} + in + process_pages crop_page pdf range + +let remove_cropping_pdf pdf range = + let remove_cropping_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")} + in + process_pages remove_cropping_page pdf range + +let remove_trim_pdf pdf range = + let remove_trim_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")} + in + process_pages remove_trim_page pdf range + +let remove_art_pdf pdf range = + let remove_art_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")} + in + process_pages remove_art_page pdf range + +let remove_bleed_pdf pdf range = + let remove_bleed_page _ page = + {page with + Pdfpage.rest = + (Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")} + in + process_pages remove_bleed_page pdf range + +(* \section{Rotating pages} *) +let rotate_pdf r pdf range = + let rotate_page _ page = + {page with Pdfpage.rotate = + Pdfpage.rotation_of_int r} + in + process_pages rotate_page pdf range + +let rotate_pdf_by r pdf range = + let rotate_page_by _ page = + {page with Pdfpage.rotate = + Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)} + in + process_pages rotate_page_by pdf range + +let rotate_page_contents ~fast rotpoint r pdf _ page = + let rotation_point = + match rotpoint with + | None -> + let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in + (minx +. maxx) /. 2., (miny +. maxy) /. 2. + | Some point -> point + in + let tr = + Pdftransform.matrix_of_op + (Pdftransform.Rotate (rotation_point, -.(rad_of_deg r))) + in let tr2 = + Pdftransform.matrix_of_op + (Pdftransform.Rotate (rotation_point, rad_of_deg r)) + in + let transform_op = Pdfops.Op_cm tr in + let resources' = change_pattern_matrices pdf tr2 page.Pdfpage.resources in + Pdfpage.prepend_operators pdf [transform_op] ~fast {page with Pdfpage.resources = resources'} + +let rotate_contents ?(fast=false) r pdf range = + process_pages (rotate_page_contents ~fast None r pdf) pdf range + +(* Return the pages from the pdf in the range, unordered. *) +let select_pages range pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + option_map (function n -> try Some (select n pages) with _ -> None) range + + +(* Upright functionality *) + +(* If all pages are already upright, do nothing to save time. *) +let allupright range pdf = + let page_is_upright page = + page.Pdfpage.rotate = Pdfpage.Rotate0 + in + not (mem false (map page_is_upright (select_pages range pdf))) + +let upright_transform page = + let rotate = + Pdfpage.int_of_rotation page.Pdfpage.rotate + and cx, cy = + let minx, miny, maxx, maxy = Pdf.parse_rectangle page.Pdfpage.mediabox in + (minx +. maxx) /. 2., (miny +. maxy) /. 2. + in + Pdftransform.mkrotate (cx, cy) (rad_of_deg (~-.(float rotate))) + +let transform_boxes tr pdf page = + let f (minx, miny, maxx, maxy) = + let minx, miny = Pdftransform.transform_matrix tr (minx, miny) + and maxx, maxy = Pdftransform.transform_matrix tr (maxx, maxy) in + (minx, miny, maxx, maxy) + in + change_boxes f pdf page + +let transform_contents ?(fast=false) tr pdf page = + let transform_op = Pdfops.Op_cm tr in + let resources' = change_pattern_matrices pdf (Pdftransform.matrix_invert tr) page.Pdfpage.resources in + Pdfpage.prepend_operators pdf [transform_op] ~fast {page with Pdfpage.resources = resources'} + +let upright ?(fast=false) range pdf = + if allupright range pdf then pdf else + let upright_page _ _ page = + let tr = upright_transform page in + let page = transform_boxes tr pdf page in + let page = transform_contents ~fast tr pdf page in + rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0} + in + process_pages (upright_page pdf) pdf range + +(* \section{Scale page data} *) +let scale_pdf ?(fast=false) sx sy pdf range = + let scale_page _ page = + let f (xmin, ymin, xmax, ymax) = + xmin *. sx, ymin *. sy, xmax *. sx, ymax *. sy + in + let page = change_boxes f pdf page + and matrix = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), sx, sy)) in + let transform_op = + Pdfops.Op_cm matrix + and resources' = + change_pattern_matrices pdf (Pdftransform.matrix_invert matrix) page.Pdfpage.resources + in + Pdfpage.prepend_operators pdf ~fast [transform_op] {page with Pdfpage.resources = resources'} + in + process_pages scale_page pdf range + +(* Scale to fit page of size x * y *) +(* FIXME: Can we do this in terms of scale_contents - and then just fix up the boxes? For 1.8 *) +let scale_to_fit_pdf ?(fast=false) input_scale x y op pdf range = + let scale_page_to_fit _ page = + let matrix = + let (minx, miny, maxx, maxy) = + (* Use cropbox if available *) + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> r + | None -> page.Pdfpage.mediabox) + in + if maxx <= 0. || maxy <= 0. then failwith "Zero-sized pages are invalid" else + let fx = x /. maxx in let fy = y /. maxy in + let scale = fmin fx fy *. input_scale in + let trans_x = (x -. (maxx *. scale)) /. 2. + in let trans_y = (y -. (maxy *. scale)) /. 2. in + (Pdftransform.matrix_of_transform + [Pdftransform.Translate (trans_x, trans_y); + Pdftransform.Scale ((0., 0.), scale, scale)]) + in + let page = + change_boxes + (function (minx, miny, maxx, maxy) -> 0., 0., x, y (* FIXME: scale boxes properly *)) + pdf page + in + Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast + {page with Pdfpage.resources = change_pattern_matrices pdf (Pdftransform.matrix_invert matrix) page.Pdfpage.resources} + in + process_pages scale_page_to_fit pdf range + +(* Scale contents *) +let scale_page_contents ?(fast=false) scale position pdf _ page = + let (minx, miny, maxx, maxy) as box = + (* Use cropbox if available *) + Pdf.parse_rectangle + (match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> r + | None -> page.Pdfpage.mediabox) + in + let sx, sy, _ = calculate_position true 0. box Horizontal position in + let tx, ty = + match position with + | Top t -> 0., -.t + | TopLeft t -> t, -.t + | TopRight t -> 0., -.t + | Left t -> t, 0. + | BottomLeft t -> t, t + | Bottom t -> 0., t + | BottomRight t -> -.t, -.t + | Right t -> -.t, 0. + | _ -> 0., 0. (* centre it... FIXME: We will add a center position, eventually, for text and this... *) + in + let transform = + Pdftransform.matrix_of_transform + [Pdftransform.Translate (tx, ty); + Pdftransform.Scale ((sx, sy), scale, scale)] + in + let transform_op = Pdfops.Op_cm transform in + let resources' = change_pattern_matrices pdf transform page.Pdfpage.resources in + Pdfpage.prepend_operators pdf [transform_op] ~fast {page with Pdfpage.resources = resources'} + +let scale_contents ?(fast=false) position scale pdf range = + process_pages (scale_page_contents ~fast scale position pdf) pdf range + +(* \section{List annotations} *) +let get_annotation_string encoding pdf annot = + match Pdf.lookup_direct pdf "/Contents" annot with + | Some (Pdf.String s) -> encode_output encoding s + | _ -> "" + +let print_annotation encoding pdf s = + let s = get_annotation_string encoding pdf s in + flprint "------------------------------------------------------------------------\n"; + flprint s; + flprint "\n" + +let list_page_annotations encoding pdf page = + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> + iter (print_annotation encoding pdf) (map (Pdf.direct pdf) annots) + | _ -> () + +let list_annotations encoding pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + iter (list_page_annotations encoding pdf) pages + +let get_annotations encoding pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + flatten + (map2 + (fun page pagenumber -> + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array annots) -> + let strings = + map (get_annotation_string encoding pdf) (map (Pdf.direct pdf) annots) + in + combine (many pagenumber (length strings)) strings + | _ -> []) + pages + (ilist 1 (length pages))) + +let list_annotations_more pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + iter2 + (fun page pagenumber -> + iter + (function annot -> + let print_annot annot = + let annot_type_string = + match annot.Pdfannot.subtype with + | Pdfannot.Stamp -> "Stamp" + | Pdfannot.Text -> "Text" + | Pdfannot.Link -> "Link" + | Pdfannot.FreeText -> "FreeText" + | Pdfannot.Line -> "Line" + | Pdfannot.Square -> "Square" + | Pdfannot.Circle -> "Circle" + | Pdfannot.Polygon -> "Polygon" + | Pdfannot.PolyLine -> "PolyLine" + | Pdfannot.Highlight -> "Highlight" + | Pdfannot.Underline -> "Underline" + | Pdfannot.Squiggly -> "Squiggly" + | Pdfannot.StrikeOut -> "StrikeOut" + | Pdfannot.Caret -> "Caret" + | Pdfannot.Ink -> "Ink" + | Pdfannot.Popup _ -> "Popup" + | Pdfannot.FileAttachment -> "FileAttachment" + | Pdfannot.Sound -> "Sound" + | Pdfannot.Movie -> "Movie" + | Pdfannot.Widget -> "Widget" + | Pdfannot.Screen -> "Screen" + | Pdfannot.PrinterMark -> "PrinterMark" + | Pdfannot.TrapNet -> "TrapNet" + | Pdfannot.Watermark -> "Watermark" + | Pdfannot.ThreeDee -> "ThreeDee" + | Pdfannot.Unknown -> "Unknown" + in let subject = + match annot.Pdfannot.subject with + | Some s -> s + | None -> "" + in let contents = + match annot.Pdfannot.annot_contents with + | Some s -> s + | None -> "" + in + Printf.printf "Page: %i\n" pagenumber; + Printf.printf "Subtype: %s\n" annot_type_string; + Printf.printf "Subject: %s\n" subject; + Printf.printf "Contents: %s\n" contents; + in + match annot.Pdfannot.subtype with + | Pdfannot.Popup annot -> print_annot annot + | _ -> print_annot annot + ) + (Pdfannot.annotations_of_page pdf page)) + pages + (ilist 1 (length pages)); + flprint "" (* flush *) + +(* Equalise the page lengths of two PDFs by chopping or extending the first one. +*) +let equalise_lengths a b = + let a' = + if Pdfpage.endpage a < Pdfpage.endpage b then + Pdfpage.change_pages true a + (Pdfpage.pages_of_pagetree a @ + many (Pdfpage.blankpage Pdfpaper.a4) (Pdfpage.endpage b - Pdfpage.endpage a)) + else if Pdfpage.endpage a > Pdfpage.endpage b then + Pdfpage.change_pages true a + (take (Pdfpage.pages_of_pagetree a) (Pdfpage.endpage b)) + else a + in + a', b + +(* \section{Copy annotations} *) +let copy_annotations range frompdf topdf = + let frompdf, topdf = equalise_lengths frompdf topdf in + let copy_annotations_page topdf frompdf frompage topage = + match Pdf.lookup_direct frompdf "/Annots" frompage.Pdfpage.rest with + | Some ((Pdf.Array frompage_annots) as annots) -> + let objects_to_copy = Pdf.objects_referenced [] [] frompdf annots in + iter + (fun n -> + ignore (Pdf.addobj_given_num topdf (n, Pdf.lookup_obj frompdf n))) + objects_to_copy; + let topage_annots = + match Pdf.lookup_direct frompdf "/Annots" topage.Pdfpage.rest with + | Some (Pdf.Array annots) -> annots + | _ -> [] + in + let merged_dict = Pdf.Array (frompage_annots @ topage_annots) in + let topage' = + {topage with Pdfpage.rest = + Pdf.add_dict_entry topage.Pdfpage.rest "/Annots" merged_dict} + in + topdf, topage' + | Some x -> topdf, topage + | None -> topdf, topage + in + match Pdf.renumber_pdfs [frompdf; topdf] with + | [frompdf; topdf] -> + let frompdf_pages = Pdfpage.pages_of_pagetree frompdf + in let topdf_pages = Pdfpage.pages_of_pagetree topdf in + let pdf = ref topdf + and pages = ref [] + and pnum = ref 1 + and frompdf_pages = ref frompdf_pages + and topdf_pages = ref topdf_pages in + (* Go through, updating pdf and collecting new pages. *) + while not (isnull !frompdf_pages) do + let frompdf_page = hd !frompdf_pages + and topdf_page = hd !topdf_pages in + let pdf', page = + if mem !pnum range + then copy_annotations_page !pdf frompdf frompdf_page topdf_page + else !pdf, topdf_page + in + pdf := pdf'; + pages =| page; + incr pnum; + frompdf_pages := tl !frompdf_pages; + topdf_pages := tl !topdf_pages + done; + Pdfpage.change_pages true !pdf (rev !pages) + | _ -> assert false + +(* \section{N-up} *) + +(* Given a number to fit and a mediabox, return a list of transforms for the +2 pages. FIXME: Assumes mediabox (0, 0)-based. Check this for all operations for 1.8. *) +let twoup_transforms mediabox = + let width, height = + match Pdf.parse_rectangle mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + let width_exceeds_height = width > height in + let rotate = Pdftransform.Rotate ((0., 0.), rad_of_deg 90.) + in let sc = + if width_exceeds_height + then fmin (height /. width) ((width /. 2.) /. height) + else fmin (width /. height) ((height /. 2.) /. width) + in + let scale = Pdftransform.Scale ((0., 0.), sc, sc) in + let tr0, tr1 = + if width_exceeds_height then + Pdftransform.Translate (height *. sc, 0.), + Pdftransform.Translate (height *. sc *. 2., 0.) + else + Pdftransform.Translate (height *. sc, 0.), + Pdftransform.Translate (height *. sc, width *. sc) + in + let t0 = Pdftransform.matrix_of_transform [tr0; rotate; scale] + in let t1 = Pdftransform.matrix_of_transform [tr1; rotate; scale] in + [t0; t1] + +(* Combine two pages into one throughout the document. The pages have already +had their objects renumbered so as not to clash.*) +let twoup_pages pdf = function + | [] -> assert false + | (h::_) as pages -> + let resources' = + pair_reduce + (combine_pdf_resources pdf) + (map (fun p -> p.Pdfpage.resources) pages) + in + let content' = + let transform_stream contents transform = + let ops = Pdfops.parse_operators pdf resources' contents in + (* Need protect_removeme here? especially new, Q-adding protect? *) + Pdfops.stream_of_ops + ([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ ops @ [Pdfops.Op_Q]) + in + map2 + (fun p -> transform_stream p.Pdfpage.content) + pages + (take (twoup_transforms h.Pdfpage.mediabox) (length pages)) + in + {Pdfpage.mediabox = h.Pdfpage.mediabox; + Pdfpage.rotate = h.Pdfpage.rotate; + Pdfpage.content = content'; + Pdfpage.resources = resources'; + Pdfpage.rest = h.Pdfpage.rest} + +(* Main function *) +let twoup pdf = + let pdf = upright (ilist 1 (Pdfpage.endpage pdf)) pdf in + let pages = Pdfpage.pages_of_pagetree pdf in + let pagesets = splitinto 2 pages in + let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in + let pages' = map (twoup_pages pdf) renumbered in + Pdfpage.change_pages true pdf pages' + +let twoup_stack_transforms mediabox = + let width, height = + match Pdf.parse_rectangle mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + let rotate = Pdftransform.Rotate ((0., 0.), rad_of_deg 90.) + in let tr0 = Pdftransform.Translate (height, 0.) + in let tr1 = Pdftransform.Translate (height, width) in + let t0 = Pdftransform.matrix_of_transform [tr0; rotate] + in let t1 = Pdftransform.matrix_of_transform [tr1; rotate] in + [t0; t1] + +let twoup_pages_stack pdf = function + | [] -> assert false + | (h::_) as pages -> + let resources = + pair_reduce + (combine_pdf_resources pdf) + (map (fun p -> p.Pdfpage.resources) pages) + in + (* Remove any CropBox *) + let rest = + Pdf.remove_dict_entry h.Pdfpage.rest "/CropBox" + in + let content' = + let transform_stream contents transform = + let ops = Pdfops.parse_operators pdf resources contents in + Pdfops.stream_of_ops + ([Pdfops.Op_q] @ [Pdfops.Op_cm transform] @ ops @ [Pdfops.Op_Q]) + in + map2 + (fun p -> transform_stream p.Pdfpage.content) + pages + (take (twoup_stack_transforms h.Pdfpage.mediabox) (length pages)) + in + {Pdfpage.mediabox = + (let width, height = + match Pdf.parse_rectangle h.Pdfpage.mediabox with + xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin + in + Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real height; Pdf.Real (width *. 2.)]); + Pdfpage.rotate = h.Pdfpage.rotate; + Pdfpage.content = content'; + Pdfpage.resources = resources; + Pdfpage.rest = rest} + +let twoup_stack pdf = + let pdf = upright (ilist 1 (Pdfpage.endpage pdf)) pdf in + let pages = Pdfpage.pages_of_pagetree pdf in + let pagesets = splitinto 2 pages in + let renumbered = map (Pdfpage.renumber_pages pdf) pagesets in + let pages' = map (twoup_pages_stack pdf) renumbered in + Pdfpage.change_pages true pdf pages' + +(* \section{Output info} *) +let get_info raw pdf = + let infodict = + match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with + | Some infodict -> infodict + | _ -> Pdf.Dictionary [] + in + let getstring name = + match Pdf.lookup_direct pdf name infodict with + | Some (Pdf.String s) -> + if raw then s else crude_de_unicode s + | _ -> "" + in + getstring + +let get_info_utf8 pdf = + let infodict = + match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with + | Some infodict -> infodict + | _ -> Pdf.Dictionary [] + in + (function name -> + match Pdf.lookup_direct pdf name infodict with + | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s + | _ -> "") + +let output_info encoding pdf = + let getstring = + match encoding with + | Raw -> get_info true pdf + | Stripped -> get_info false pdf + | UTF8 -> get_info_utf8 pdf + in + Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor; + Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf); + Printf.printf "Title: %s\n" (getstring "/Title"); + Printf.printf "Author: %s\n" (getstring "/Author"); + Printf.printf "Subject: %s\n" (getstring "/Subject"); + Printf.printf "Keywords: %s\n" (getstring "/Keywords"); + Printf.printf "Creator: %s\n" (getstring "/Creator"); + Printf.printf "Producer: %s\n" (getstring "/Producer"); + Printf.printf "Created: %s\n" (getstring "/CreationDate"); + Printf.printf "Modified: %s\n" (getstring "/ModDate") + +(* \section{Blacken text} *) + +(* + \begin{verbatim} + Algorithm: Change + BT + + ET + + ...to... + + BT + Op_g 0. + + ET + + \end{verbatim} +*) +let blacktext_ops pdf resources content = + let not_text = function + | Pdfops.Op_Tj _ | Pdfops.Op_TJ _ + | Pdfops.Op_' _ | Pdfops.Op_'' (_, _, _) + | Pdfops.Op_Td (_, _) | Pdfops.Op_TD (_, _) + | Pdfops.Op_Tm _ | Pdfops.Op_T' + | Pdfops.Op_Tc _ + | Pdfops.Op_Tw _ + | Pdfops.Op_Tz _ + | Pdfops.Op_TL _ + | Pdfops.Op_Tf (_, _) + | Pdfops.Op_Tr _ + | Pdfops.Op_Ts _ -> false + | _ -> true + in let textlevel = ref 0 + in let removed = ref [] + in let operators = + Pdfops.parse_operators pdf resources content + in + let rec remove_colourops prev = function + | [] -> rev prev + | Pdfops.Op_BT::more -> + incr textlevel; + remove_colourops + (Pdfops.Op_g 0.::Pdfops.Op_BT::prev) + more + | Pdfops.Op_ET::more -> + decr textlevel; + let prev' = !removed @ Pdfops.Op_ET::prev in + removed := []; + remove_colourops prev' more + | (Pdfops.Op_G _ + | Pdfops.Op_g _ + | Pdfops.Op_RG (_, _, _) + | Pdfops.Op_rg (_, _, _) + | Pdfops.Op_k (_, _, _, _) + | Pdfops.Op_K (_, _, _, _) + | Pdfops.Op_SCN _ + | Pdfops.Op_SC _ + | Pdfops.Op_scn _ + | Pdfops.Op_sc _ + | Pdfops.Op_SCNName (_, _) + | Pdfops.Op_scnName (_, _) + | Pdfops.Op_CS _ + | Pdfops.Op_cs _ + | Pdfops.Op_sh _ + | Pdfops.Op_gs _) + as op::more -> + if !textlevel > 0 + then + begin + removed =| op; + remove_colourops prev more + end + else remove_colourops (op::prev) more + | op::more -> + if !textlevel > 0 && not_text op then removed =| op; + remove_colourops (op::prev) more + in + let operators' = remove_colourops [] operators in + [Pdfops.stream_of_ops operators'] + +(* Blacken a form xobject, writing it to the same object. *) +let process_xobject f pdf resources i = + let xobj = Pdf.lookup_obj pdf i in + match Pdf.lookup_direct pdf "/Subtype" xobj with + | None -> raise (Pdf.PDFError "No /Subtype in Xobject") + | Some (Pdf.Name "/Form") -> + Pdf.getstream xobj; + begin match xobj with + | Pdf.Stream ({contents = Pdf.Dictionary dict, Pdf.Got bytes} as rf) -> + begin match f pdf resources [Pdf.Stream rf] with + | [Pdf.Stream {contents = (Pdf.Dictionary dict', data)}] -> + let dict' = + Pdf.remove_dict_entry + (Pdf.Dictionary (mergedict dict dict')) + "/Filter" + in + rf := (dict', data) + | _ -> assert false + end + | _ -> assert false (* getstream would have complained already *) + end + | Some _ -> () + + +let process_xobjects pdf page f = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary elts) -> + iter + (fun (k, v) -> + match v with + | Pdf.Indirect i -> process_xobject f pdf page.Pdfpage.resources i + | _ -> raise (Pdf.PDFError "blacktext")) + elts + | _ -> () + +let blacktext range pdf = + let blacktext_page _ page = + let content' = + blacktext_ops pdf page.Pdfpage.resources page.Pdfpage.content + in + process_xobjects pdf page blacktext_ops; + {page with Pdfpage.content = content'} + in + process_pages blacktext_page pdf range + +(* \section{Blacken lines} *) +let blacklines_ops pdf resources content = + let rec blacken_strokeops prev = function + | [] -> rev prev + | Pdfops.Op_CS _::t -> + blacken_strokeops (Pdfops.Op_CS "/DeviceGray"::prev) t + | (Pdfops.Op_SC _ | Pdfops.Op_SCN _ | Pdfops.Op_SCNName _ | Pdfops.Op_G _ + | Pdfops.Op_RG _ | Pdfops.Op_K _)::t -> + blacken_strokeops (Pdfops.Op_G 0.::prev) t + | h::t -> blacken_strokeops (h::prev) t + and operators = + Pdfops.parse_operators pdf resources content + in + let operators' = blacken_strokeops [] operators in + [Pdfops.stream_of_ops operators'] + +let blacklines range pdf = + let blacklines_page _ page = + let content' = + blacklines_ops pdf page.Pdfpage.resources page.Pdfpage.content + in + process_xobjects pdf page blacklines_ops; + {page with Pdfpage.content = content'} + in + process_pages blacklines_page pdf range + +(* \section{Blacken Fills} *) +let blackfills_ops pdf resources content = + let rec blacken_fillops prev = function + | [] -> rev prev + | Pdfops.Op_cs _::t -> + blacken_fillops (Pdfops.Op_cs "/DeviceGray"::prev) t + | (Pdfops.Op_sc _ | Pdfops.Op_scn _ | Pdfops.Op_scnName _ | Pdfops.Op_g _ + | Pdfops.Op_rg _ | Pdfops.Op_k _)::t -> + blacken_fillops (Pdfops.Op_g 0.::prev) t + | h::t -> blacken_fillops (h::prev) t + and operators = + Pdfops.parse_operators pdf resources content + in + let operators' = blacken_fillops [] operators in + [Pdfops.stream_of_ops operators'] + +let blackfills range pdf = + let blackfills_page _ page = + let content' = + blackfills_ops pdf page.Pdfpage.resources page.Pdfpage.content + in + process_xobjects pdf page blackfills_ops; + {page with Pdfpage.content = content'} + in + process_pages blackfills_page pdf range + +(* \section{Set a minimum line width to avoid dropout} *) +let thinlines range width pdf = + let thinpage _ page = + let operators = + Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content + in + let ctmstack = ref [ref Pdftransform.i_matrix] in + let scaleof_ctm () = + try + match Pdftransform.decompose (!(hd !ctmstack)) with + (scale, _, _, _, _, _) -> + scale + with + Failure "hd" -> 1. + in + let rec replace_operators prev = function + | [] -> rev prev + | (Pdfops.Op_w w)::more -> + (* Alter width. *) + let width' = width /. scaleof_ctm () in + let w' = + if w >= width' then Pdfops.Op_w w else Pdfops.Op_w width' + in + replace_operators (w'::prev) more + | (Pdfops.Op_cm m)::more -> + (* Update CTM *) + begin try + let top = hd !ctmstack in + top := Pdftransform.matrix_compose !top m + with + Failure "hd" -> error "Malformed file." + end; + replace_operators ((Pdfops.Op_cm m)::prev) more + | Pdfops.Op_q::more -> + (* Push stack *) + begin try + ctmstack =| ref (!(hd !ctmstack)) + with + Failure "hd" -> error "Malformed file" + end; + replace_operators (Pdfops.Op_q::prev) more + | Pdfops.Op_Q::more -> + (* Pop stack *) + begin try + ctmstack := tl !ctmstack + with + Failure "tl" -> error "Malformed file" + end; + replace_operators (Pdfops.Op_Q::prev) more + | (Pdfops.Op_gs gsname)::more -> + (* Perhaps insert [Op_w]. *) + let opw = + match Pdf.lookup_direct pdf "/ExtGState" page.Pdfpage.resources with + | None -> [] + | Some ext_state_dict -> + match Pdf.lookup_direct pdf gsname ext_state_dict with + | None -> [] + | Some gdict -> + match Pdf.lookup_direct pdf "/LW" gdict with + | Some s -> (try [Pdfops.Op_w (Pdf.getnum s)] with _ -> []) + | None -> [] + in + replace_operators (opw @ ((Pdfops.Op_gs gsname)::prev)) more + | x::more -> replace_operators (x::prev) more + in + let operators = replace_operators [] operators in + (* 2. Add an initial 'w' if width more than default width *) + let operators = + if width > 1. then (Pdfops.Op_w width)::operators else operators + in + let content' = [Pdfops.stream_of_ops operators] in + {page with Pdfpage.content = content'} + in + process_pages thinpage pdf range + +(* \section{Remove annotations} *) +let remove_annotations range pdf = + let remove_annotations_page pagenum page = + if mem pagenum range then + let rest' = + Pdf.remove_dict_entry page.Pdfpage.rest "/Annots" + in + {page with Pdfpage.rest = rest'} + else + page + in + process_pages remove_annotations_page pdf range + +(* \section{Making draft documents} *) + +(* Predicate on an xobject: true if an image xobject. *) +let isimage pdf (_, xobj) = + match Pdf.lookup_direct pdf "/Subtype" xobj with + | Some (Pdf.Name "/Image") -> true + | _ -> false + +(* Given a set of resources for a page, and the name of a resource, determine if +that name refers to an image xobject. *) +let xobject_isimage pdf resources name = + match resources with + | Pdf.Dictionary _ -> + begin match Pdf.lookup_direct pdf "/XObject" resources with + | Some xobjects -> + isimage pdf ("", Pdf.lookup_fail "xobject not there" pdf name xobjects) + | _ -> false + end + | _ -> failwith "bad resources" + +(* The subsitute for an image. *) +let substitute boxes = + if boxes then + rev + [Pdfops.Op_q; + Pdfops.Op_w 0.; + Pdfops.Op_G 0.; + Pdfops.Op_re (0., 0., 1., 1.); + Pdfops.Op_m (0., 0.); + Pdfops.Op_l (1., 1.); + Pdfops.Op_m (0., 1.); + Pdfops.Op_l (1., 0.); + Pdfops.Op_S; + Pdfops.Op_Q] + else + [] + +(* Remove references to images from a graphics stream. *) +let rec remove_images_stream boxes pdf resources prev = function + | [] -> rev prev + | (Pdfops.Op_Do name) as h::t -> + if xobject_isimage pdf resources name + then remove_images_stream boxes pdf resources (substitute boxes @ prev) t + else remove_images_stream boxes pdf resources (h::prev) t + | Pdfops.InlineImage _::t -> + remove_images_stream boxes pdf resources (substitute boxes @ prev) t + | h::t -> + remove_images_stream boxes pdf resources (h::prev) t + +let rec process_form_xobject boxes pdf form = + let form = Pdf.direct pdf form in + let page = + {Pdfpage.content = [form]; + Pdfpage.mediabox = Pdf.Null; + Pdfpage.resources = + begin match Pdf.lookup_direct pdf "/Resources" form with + | Some r -> r + | None -> Pdf.Dictionary [] + end; + Pdfpage.rotate = Pdfpage.Rotate0; + Pdfpage.rest = Pdf.Dictionary []} + in + let page', pdf = + remove_images_page boxes pdf page + in + let form' = + match form with + | Pdf.Stream {contents = (dict, _)} -> + begin match + Pdfops.stream_of_ops + (Pdfops.parse_operators pdf (Pdf.Dictionary []) page'.Pdfpage.content) + with + | Pdf.Stream {contents = (_, Pdf.Got data)} -> + let dict' = + Pdf.add_dict_entry dict "/Length" (Pdf.Integer (bytes_size data)) + in + Pdf.Stream {contents = (dict', Pdf.Got data)} + | _ -> assert false + end + | _ -> raise (Pdf.PDFError "not a stream") + in + form', pdf + +(* Remove images from a page. *) +and remove_images_page boxes pdf page = + let isform pdf xobj = + match Pdf.lookup_direct pdf "/Subtype" xobj with Some (Pdf.Name "/Form") -> true | _ -> false + in + (* Remove image xobjects and look into form ones *) + let form_xobjects = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary elts) -> + keep (function (_, p) -> isform pdf p) elts + | _ -> [] + in + let resources', pdf = + let names, pointers = split form_xobjects in + let form_xobjects', pdf = + let pdf = ref pdf + in let outputs = ref [] in + iter + (fun p -> + let p', pdf' = process_form_xobject boxes !pdf p in + pdf := pdf'; + outputs =| p') + pointers; + rev !outputs, !pdf + in + let nums = ref [] in + iter + (fun xobj -> + let objnum = Pdf.addobj pdf xobj in + nums =| objnum) + form_xobjects'; + let newdict = + Pdf.Dictionary (combine names (map (fun x -> Pdf.Indirect x) (rev !nums))) + in + Pdf.add_dict_entry page.Pdfpage.resources "/XObject" newdict, pdf + in + let content' = + remove_images_stream boxes pdf page.Pdfpage.resources [] + (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) + in + {page with + Pdfpage.content = + (let stream = Pdfops.stream_of_ops content' in + Pdfcodec.encode_pdfstream pdf Pdfcodec.Flate stream; + [stream]); + Pdfpage.resources = resources'}, pdf + +(* Remove images from all pages in a document. *) +let draft boxes range pdf = + let pages = Pdfpage.pages_of_pagetree pdf in + let pagenums = indx pages in + let pdf = ref pdf + in let pages' = ref [] in + iter2 + (fun p pagenum -> + let p', pdf' = + if mem pagenum range + then remove_images_page boxes !pdf p + else p, !pdf + in + pdf := pdf'; + pages' =| p') + pages + pagenums; + Pdfpage.change_pages true !pdf (rev !pages') + +let set_version v pdf = + pdf.Pdf.minor <- v + +(* Custom Code: CSP1 - four up duplication. Alter media box and crop-box. 4-up the data. *) +let custom_csp1_page pdf _ page = + let minx, miny, maxx, maxy = + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some r -> Pdf.parse_rectangle r + | None -> Pdf.parse_rectangle page.Pdfpage.mediabox + in + let mx0 = -.minx + in let my0 = -.miny + in let dx = maxx -. minx + in let dy = maxy -. miny in + let content = + let ops = + Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content + in + [Pdfops.Op_q; + Pdfops.Op_cm (Pdftransform.matrix_of_transform [Pdftransform.Translate (mx0, my0)])] @ ops @ [Pdfops.Op_Q] @ + [Pdfops.Op_q; + Pdfops.Op_cm (Pdftransform.matrix_of_transform [Pdftransform.Translate (mx0 +. dx, my0 +. dy)])] @ ops @ [Pdfops.Op_Q] @ + [Pdfops.Op_q; + Pdfops.Op_cm (Pdftransform.matrix_of_transform [Pdftransform.Translate (mx0, my0 +. dy)])] @ ops @ [Pdfops.Op_Q] @ + [Pdfops.Op_q; + Pdfops.Op_cm (Pdftransform.matrix_of_transform [Pdftransform.Translate (mx0 +. dx, my0)])] @ ops @ [Pdfops.Op_Q] + in + let new_mediabox = + Pdf.Array + [Pdf.Real 0.; + Pdf.Real 0.; + Pdf.Real ((maxx -. minx) *. 2.); + Pdf.Real ((maxy -. miny) *. 2.)] + in + {page with + Pdfpage.content = [Pdfops.stream_of_ops content]; + Pdfpage.mediabox = new_mediabox; + Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest "/CropBox" new_mediabox} + +let custom_csp1 pdf = + process_pages (custom_csp1_page pdf) pdf (ilist 1 (Pdfpage.endpage pdf)) + +let custom_csp2 f pdf = + let page = hd (Pdfpage.pages_of_pagetree pdf) in + let m_minx, m_miny, m_maxx, m_maxy = + match page.Pdfpage.mediabox with + | Pdf.Array [a; b; c; d] -> + Pdf.getnum a, Pdf.getnum b, Pdf.getnum c, Pdf.getnum d + | _ -> 0., 0., 0., 0. + in + let c_minx, c_miny, c_maxx, c_maxy = + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some (Pdf.Array [a; b; c; d]) -> + Pdf.getnum a, Pdf.getnum b, Pdf.getnum c, Pdf.getnum d + | _ -> m_minx, m_miny, m_maxx, m_maxy + in + let x = (c_minx +. c_maxx) /. 2. + in let y = (c_miny +. c_maxy) /. 2. in + scale_contents (PosCentre (x, y)) (f /. 100.) pdf (ilist 1 (Pdfpage.endpage pdf)) + +let blank_document width height pages = + let pdf_pages = + map (fun () -> Pdfpage.blankpage (Pdfpaper.make Pdfunits.PdfPoint width height)) (many () pages) + in + let pdf, pageroot = Pdfpage.add_pagetree pdf_pages (Pdf.empty ()) in + Pdfpage.add_root pageroot [] pdf + +let blank_document_paper papersize pages = + let pdf_pages = + map (fun () -> Pdfpage.blankpage papersize) (many () pages) + in + let pdf, pageroot = Pdfpage.add_pagetree pdf_pages (Pdf.empty ()) in + Pdfpage.add_root pageroot [] pdf + diff --git a/cpdf.mli b/cpdf.mli new file mode 100644 index 0000000..b31b989 --- /dev/null +++ b/cpdf.mli @@ -0,0 +1,305 @@ +(** Coherent PDF Tools Core Routines *) +open Pdfutil + +type encoding = Raw | UTF8 | Stripped + +val parse_pagespec : Pdf.t -> string -> int list + +val string_of_pagespec : Pdf.t -> int list -> string + +val validate_pagespec : string -> bool + +val name_of_spec : bool -> Pdfmarks.t list -> Pdf.t -> int -> string -> int -> string -> int -> int -> string + +(** Debug: Print out a PDF in readable form to the terminal *) +val print_pdf_objs : Pdf.t -> unit + +(** Same, but from an input and possible password - does the minimal work to find the number of pages. *) +val endpage_io : Pdfio.input -> string option -> string option -> int + +(** Compresses all streams in the PDF document which are uncompressed, using +/FlateDecode, leaving out metadata. If the PDF is encrypted, does nothing. *) +val recompress_pdf : Pdf.t -> Pdf.t + +val decompress_pdf : Pdf.t -> Pdf.t + +(* [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating +the PDF minor version to [version].*) +val set_pdf_info : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t + +(* [set_pdf_info (key, value, version)] sets the entry [key] in the +/ViewerPreferences directory, updating the PDF minor version to [version].*) +val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t + +(* Set the page layout to the given name (sans slash) e.g SinglePage *) +val set_page_layout : Pdf.t -> string -> Pdf.t + +(* Set the page layout to the given name (sans slash) e.g SinglePage *) +val set_page_mode : Pdf.t -> string -> Pdf.t + +(* Expand the string "now" to a PDF date string, ignoring any other string *) +val expand_date : string -> string + +(* Given a function from page number and page to page, a document, and a list +of page numbers to apply it to, apply the function to all those pages. *) +val process_pages : (int -> Pdfpage.t -> Pdfpage.t) -> Pdf.t -> int list -> Pdf.t + +(* Same, but just iterate *) +val iter_pages : (int -> Pdfpage.t -> unit) -> Pdf.t -> int list -> unit + +(* Same, but map *) +val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list + +(* Two possible error classes. *) +exception SoftError of string +exception HardError of string + +(* [presentation range t d h i dir effect_dur pdf] *) +val presentation : int list -> string option -> + float option -> bool -> bool -> int -> float -> Pdf.t -> Pdf.t + +(* [attach_file keep-version topage pdf filename] *) +val attach_file : bool -> int option -> Pdf.t -> string -> Pdf.t + +(* Remove attached files. *) +val remove_attached_files : Pdf.t -> Pdf.t + +(* List attached files. Attachment name and page number. Page 0 is document level. *) +val list_attached_files : Pdf.t -> (string * int) list + +(* [parse_bookmark_file verify pdf input] *) +val parse_bookmark_file : bool -> Pdf.t -> Pdfio.input -> Pdfmarks.t list + +(* [add_bookmarks verify input pdf] *) +val add_bookmarks : bool -> Pdfio.input -> Pdf.t -> Pdf.t + +(* [copy_id keepversion copyfrom copyto] *) +val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t + +(* [set_metadata keepversion filename pdf] *) +val set_metadata : bool -> string -> Pdf.t -> Pdf.t + +val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t + +(* Remove the metadata from a file *) +val remove_metadata : Pdf.t -> Pdf.t + +(* [combine_pages fast under over scaletofit swap equalize] *) +val combine_pages : bool -> Pdf.t -> Pdf.t -> bool -> bool -> bool -> Pdf.t + +(* [stamp scale_to_fit fast isover range over pdf] stamps the first page of [over] over each page of the PDF. *) +val stamp : bool -> bool -> bool -> int list -> Pdf.t -> Pdf.t -> Pdf.t + +(* [list_bookmarks deunicode range page_offset pdf output] *) +val list_bookmarks : encoding -> int list -> Pdf.t -> Pdfio.output -> unit + +(* Custom CSP1 *) +val custom_csp1 : Pdf.t -> Pdf.t + +(* Custom CSP2 *) +val custom_csp2 : float -> Pdf.t -> Pdf.t + +(* FIXME: Horrid - should return PDFs, write to file separtely. *) +(* [split_at_bookmarks linearize nobble level spec pdf] *) +val split_at_bookmarks : string -> bool -> (Pdf.t -> Pdf.t) -> int -> string -> Pdf.t -> unit + +(* The new one *) +val split_on_bookmarks : Pdf.t -> int -> Pdf.t list + +(* [split_pdf printf chunksize linearize nobble spec pdf] *) +val split_pdf : Pdfwrite.encryption option -> bool -> string -> int -> bool -> (Pdf.t -> Pdf.t) -> string -> Pdf.t -> unit + +(* Nobble a page, given pdf, pagenumber and page *) +val nobble_page : Pdf.t -> 'a -> Pdfpage.t -> Pdfpage.t + +(* Print page info (Mediabox etc) *) +val output_page_info : Pdf.t -> unit + +val hasbox : Pdf.t -> int -> string -> bool + +(* Print metadate to stdout *) +val get_metadata : Pdf.t -> Pdfio.bytes + +val print_metadata : Pdf.t -> unit + +(* Print font list to stdout *) +val print_fonts : Pdf.t -> unit + +val list_fonts : Pdf.t -> (int * string * string * string * string) list + +(* Possible positions for adding text and other uses. *) +type position = + | PosCentre of float * float + | PosLeft of float * float + | PosRight of float * float + | Top of float + | TopLeft of float + | TopRight of float + | Left of float + | BottomLeft of float + | Bottom of float + | BottomRight of float + | Right of float + | Diagonal + | ReverseDiagonal + +val string_of_position : position -> string + +type orientation = + | Horizontal + | Vertical + | VerticalDown + +type justification = + | LeftJustify + | CentreJustify + | RightJustify + +(* [calculate ignore_d w (xmin, ymin, xmax, ymax) shorterside pos] *) +val calculate_position : + bool -> + float -> + float * float * float * float -> + orientation -> position -> float * float * float + +(* Returns what the added text string would be *) +(*val addtext_returntext : Pdf.t -> string -> int -> string -> int -> string*) + +val metrics_howmany : unit -> int +val metrics_text : int -> string +val metrics_x : int -> float +val metrics_y : int -> float +val metrics_rot : int -> float +val metrics_baseline_adjustment : unit -> float + +(* [add_texts fontname font bates colour positino linespacing fontsize shorterside text pages pdf] *) +val addtexts : + bool -> (*metrics*) + float -> (*linewidth*) + bool -> (*outline*) + bool -> (*fast*) + string -> (*fontname*) + Pdftext.standard_font option -> (*font*) + int -> (*bates number *) + float * float * float -> (*colour*) + position -> (*position*) + float -> (*linespacing*) + float -> (*fontsize*) + bool -> (*underneath*) + string ->(*text*) + int list ->(*page range*) + orientation ->(*orientation*) + bool ->(*relative to cropbox?*) + float ->(*opacity*) + justification ->(*justification*) + bool ->(*midline adjust?*) + string ->(*filename*) + Pdf.t ->(*pdf*) + Pdf.t + +(* Remove text from the given pages. *) +val removetext : int list -> Pdf.t -> Pdf.t + +(* Modify the rotation of the page and its contents to leave the rotation at 0 with the page effectively unaltered. *) +val upright : ?fast:bool -> int list -> Pdf.t -> Pdf.t + +(* [crop_pdf x y w h pdf range] *) +val crop_pdf : float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t + +(* [set_mediabox x y w h pdf range] *) +val set_mediabox : float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t + +val setBox : string -> float -> float -> float -> float -> Pdf.t -> int list -> Pdf.t + +(* Remove any cropping from the given pages. *) +val remove_cropping_pdf : Pdf.t -> int list -> Pdf.t +val remove_trim_pdf : Pdf.t -> int list -> Pdf.t +val remove_bleed_pdf : Pdf.t -> int list -> Pdf.t +val remove_art_pdf : Pdf.t -> int list -> Pdf.t + +(* Change rotation to a given value 0, 90, 180, 270 on given pages. *) +val rotate_pdf : int -> Pdf.t -> int list -> Pdf.t + +(* Rotate clockwise by 0, 90, 180, 270 on given pages. *) +val rotate_pdf_by : int -> Pdf.t -> int list -> Pdf.t + +(* Rotate the contents by the given angle on the given pages. *) +val rotate_contents : ?fast:bool -> float -> Pdf.t -> int list -> Pdf.t + +(* Flip the given pages vertically *) +val vflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t + +(* Flip the given pages horizontally *) +val hflip_pdf : ?fast:bool -> Pdf.t -> int list -> Pdf.t + +(* Shift a PDF in x and y (in pts) in the given pages. *) +val shift_pdf : ?fast:bool -> float -> float -> Pdf.t -> int list -> Pdf.t + +(* Scale a PDF in sx, sy in the given pages. *) +val scale_pdf : ?fast:bool -> float -> float -> Pdf.t -> int list -> Pdf.t + +(* FIXME: See .ml *) +(* [scale_to_fit_pdf input_scale x y op pdf range] *) +val scale_to_fit_pdf : ?fast:bool -> float -> float -> float -> 'a -> Pdf.t -> int list -> Pdf.t + +(* Scale the contents of a page by a given factor centred around a given point in a given range. *) +val scale_contents : ?fast:bool -> position -> float -> Pdf.t -> int list -> Pdf.t + +(* Put blank pages before the given page numbers *) +val padbefore : int list -> Pdf.t -> Pdf.t + +(* Put blank pages after the given page numbers *) +val padafter : int list -> Pdf.t -> Pdf.t + +(* Pad to a multiple of n pages *) +val padmultiple : int -> Pdf.t -> Pdf.t + +(* List the annotations to standard output *) +val list_annotations : encoding -> Pdf.t -> unit + +val list_annotations_more : Pdf.t -> unit + +val get_annotations : encoding -> Pdf.t -> (int * string) list + +(* Copy the annotations on a given set of pages from a to b yielding c. *) +val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t + +(* Remove the annotations on given pages. *) +val remove_annotations : int list -> Pdf.t -> Pdf.t + +(* Two-up a PDF. *) +val twoup : Pdf.t -> Pdf.t + +(* Stack Two-up a PDF. *) +val twoup_stack : Pdf.t -> Pdf.t + +(* Output to standard output general information about a PDF. *) +(*val get_info : bool -> Pdf.t -> string -> string*) + +val get_info_utf8 : Pdf.t -> string -> string + +val output_info : encoding -> Pdf.t -> unit + +(* Make all lines in the PDF at least a certain thickness. *) +val thinlines : int list -> float -> Pdf.t -> Pdf.t + +(* Make all text on certain pages black. *) +val blacktext : int list -> Pdf.t -> Pdf.t + +(* Make all lines on certain pages black. *) +val blacklines : int list -> Pdf.t -> Pdf.t + +(* Make all fills on certain pages black. *) +val blackfills : int list -> Pdf.t -> Pdf.t + +(* Remove images from a PDF, optionally adding crossed boxes. *) +val draft : bool -> int list -> Pdf.t -> Pdf.t + +val set_version : int -> Pdf.t -> unit + +(*i val pdf_of_pages : Pdfmarks.bookmark list option -> int list -> Pdfdoc.page list -> Pdf.t -> Pdf.t i*) + +val blank_document : float -> float -> int -> Pdf.t + +val blank_document_paper : Pdfpaper.t -> int -> Pdf.t + diff --git a/cpdfcommand.ml b/cpdfcommand.ml new file mode 100644 index 0000000..26f4d50 --- /dev/null +++ b/cpdfcommand.ml @@ -0,0 +1,3400 @@ +(* cpdf command line tools} *) +let demo = false +and major_version = 1 +and minor_version = 7 +and version_date = "(7th August 2013)" + +open Pdfutil +open Pdfio + +(* Wrap up the file reading functions to exit with code 1 when an encryption +problem occurs. This happens when object streams are in an encrypted document +and so it can't be read without the right password... The existing error +handling only dealt with the case where the document couldn't be decrypted once +it had been loaded. *) +let pdfread_pdf_of_input a b c = + try Pdfread.pdf_of_input a b c with + Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" -> + raise (Cpdf.SoftError "Bad owner or user password when reading document") + +let pdfread_pdf_of_channel_lazy ?source b c d = + try Pdfread.pdf_of_channel_lazy ?source b c d with + Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" -> + raise (Cpdf.SoftError "Bad owner or user password when reading document") + +let pdfread_pdf_of_file a b c = + try Pdfread.pdf_of_file a b c with + Pdf.PDFError s when String.length s >=10 && String.sub s 0 10 = "Encryption" -> + raise (Cpdf.SoftError "Bad owner or user password when reading document") + +let optstring = function + | "" -> None + | x -> Some x + +(* To prevent problems when piping on Windows *) +let _ = + set_binary_mode_in stdin true; + set_binary_mode_out stdout true + +(* Fatal error reporting. *) +let error s = + prerr_string (s ^ "\nUse -help for help.\n"); + flush stderr; + exit 2 + +let soft_error s = + Printf.eprintf "%s\n" s; + flush stderr; + exit 1 + +let parse_pagespec pdf spec = + try Cpdf.parse_pagespec pdf spec with + Failure x -> error x + +(* Operations. *) +type op = + | CopyFont of string + | CSP1 + | CSP2 of float + | CSP3 + | CountPages + | Version + | Encrypt + | Decrypt + | StampOn of string + | StampUnder of string + | CombinePages of string + | TwoUp + | TwoUpStack + | RemoveBookmarks + | AddBookmarks of string + | AddText of string + | AddRectangle + | RemoveText + | Draft + | PadBefore + | PadAfter + | PadEvery of int + | PadMultiple of int + | Shift + | Scale + | ScaleToFit + | ScaleContents of float + | AttachFile of string list + | RemoveAttachedFiles + | ListAttachedFiles + | DumpAttachedFiles + | RemoveAnnotations + | ListAnnotations + | ListAnnotationsMore + | CopyAnnotations of string + | Merge + | Split + | SplitOnBookmarks of int + | Clean + | Info + | PageInfo + | Metadata + | SetMetadata of string + | RemoveMetadata + | Fonts + | RemoveFonts + | Compress + | Decompress + | Crop + | RemoveCrop + | CopyCropBoxToMediaBox + | CopyBox + | MediaBox + | Rotate of int + | Rotateby of int + | RotateContents of float + | Upright + | VFlip + | HFlip + | ThinLines of float + | SetAuthor of string + | SetTitle of string + | SetSubject of string + | SetKeywords of string + | SetCreate of string + | SetModify of string + | SetCreator of string + | SetProducer of string + | SetTrapped + | SetUntrapped + | SetVersion of int + | ListBookmarks + | SetPageLayout of string + | SetPageMode of string + | HideToolbar of bool + | HideMenubar of bool + | HideWindowUI of bool + | FitWindow of bool + | CenterWindow of bool + | DisplayDocTitle of bool + | Presentation + | ChangeId + | RemoveId + | CopyId of string + | BlackText + | BlackLines + | BlackFills + | ExtractImages + | ImageResolution of float + | MissingFonts + | DumpData + | UpdateInfo of string + | RemoveUnusedResources + | ExtractFontFile + | ExtractText + | PrintLinearization + +(* Inputs: filename, pagespec. *) +type input_kind = + | AlreadyInMemory of Pdf.t + | InFile of string + | StdIn + +let string_of_input_kind = function + | AlreadyInMemory _ -> "AlreadyInMemory" + | InFile s -> s + | StdIn -> "Stdin" + +type input = + input_kind * string * Pdfmerge.rotation * string * string (* input kind, range, rotation, user_pw, owner_pw *) + +type output_method = + | NoOutputSpecified + | Stdout + | File of string + +(* A list of PDFs to be output, if no output method was specified. *) +let output_pdfs : Pdf.t list ref = ref [] + +type font = + | StandardFont of Pdftext.standard_font + | OtherFont of string + +type args = + {mutable op : op option; + mutable preserve_objstm : bool; + mutable create_objstm : bool; + mutable out : output_method; + mutable inputs : input list; + mutable chunksize : int; + mutable linearize : bool; + mutable rectangle : float * float * float * float; + mutable coord : float * float; + mutable duration : float option; + mutable transition : string option; + mutable horizontal : bool; + mutable inward : bool; + mutable direction : int; + mutable effect_duration : float; + mutable font : font; + mutable fontname : string; + mutable fontsize : float; + mutable color : float * float * float; + mutable opacity : float; + mutable position : Cpdf.position; + mutable underneath : bool; + mutable linespacing : float; + mutable midline : bool; + mutable justification : Cpdf.justification; + mutable bates : int; + mutable prerotate : bool; + mutable orientation : Cpdf.orientation; + mutable relative_to_cropbox : bool; + mutable keepversion : bool; + mutable bycolumns : bool; + mutable pagerotation : int; + mutable crypt_method : string; + mutable owner : string; + mutable user : string; + mutable no_edit : bool; + mutable no_print : bool; + mutable no_copy : bool; + mutable no_annot : bool; + mutable no_forms : bool; + mutable no_extract : bool; + mutable no_assemble : bool; + mutable no_hq_print : bool; + mutable debug : bool; + mutable boxes : bool; + mutable encrypt_metadata : bool; + mutable retain_numbering : bool; + mutable remove_duplicate_fonts : bool; + mutable remove_duplicate_streams : bool; + mutable encoding : Cpdf.encoding; + mutable scale : float; + mutable copyfontpage : int; + mutable copyfontname : string option; + mutable fast : bool; + mutable dashrange : string; + mutable outline : bool; + mutable linewidth : float; + mutable path_to_ghostscript : string; + mutable frombox : string option; + mutable tobox : string option; + mutable mediabox_if_missing : bool; + mutable topage : string option; + mutable printf_format : bool; + mutable scale_stamp_to_fit : bool; + mutable keep_this_id : string option; + mutable do_ask : bool; + mutable verbose : bool; + mutable dont_overwrite_inputs : bool; + mutable dont_overwrite_existing_files : bool; + mutable makenewid : bool; + mutable ismulti : bool; + mutable uprightstamp : bool} + +(* List of all filenames in any AND stage - this is used to check that we don't +overwrite any input file when -dont-overwrite-existing-files is used. *) + +let all_inputs : string list ref = ref [] + +let args = + {op = None; + preserve_objstm = true; + create_objstm = false; + out = NoOutputSpecified; + inputs = []; + chunksize = 1; + linearize = false; + rectangle = 0., 0., 0., 0.; + coord = 0., 0.; + duration = None; + transition = None; + horizontal = true; + inward = true; + direction = 0; + effect_duration = 1.; + font = StandardFont Pdftext.TimesRoman; + fontname = "Times-Roman"; + fontsize = 12.; + color = 0., 0., 0.; + opacity = 1.; + position = Cpdf.TopLeft 100.; + underneath = false; + linespacing = 1.; + midline = false; + justification = Cpdf.LeftJustify; + bates = 0; + prerotate = false; + orientation = Cpdf.Horizontal; + relative_to_cropbox = false; + keepversion = false; + bycolumns = false; + pagerotation = 0; + crypt_method = ""; + owner = ""; + user = ""; + no_edit = false; + no_print = false; + no_copy = false; + no_annot = false; + no_forms = false; + no_extract = false; + no_assemble = false; + no_hq_print = false; + debug = false; + boxes = false; + encrypt_metadata = true; + retain_numbering = false; + remove_duplicate_fonts = false; + remove_duplicate_streams = false; + encoding = Cpdf.Stripped; + scale = 1.; + copyfontpage = 1; + copyfontname = None; + fast = false; + dashrange = "all"; + outline = false; + linewidth = 1.0; + path_to_ghostscript = ""; + frombox = None; + tobox = None; + mediabox_if_missing = false; + topage = None; + printf_format = false; + scale_stamp_to_fit = false; + keep_this_id = None; + do_ask = false; + verbose = false; + dont_overwrite_inputs = false; + dont_overwrite_existing_files = false; + makenewid = false; + ismulti = false; + uprightstamp = false} + +let reset_arguments () = + args.op <- None; + args.preserve_objstm <- true; + args.create_objstm <- false; + args.out <- NoOutputSpecified; + args.inputs <- []; + args.chunksize <- 1; + args.linearize <- false; + args.rectangle <- 0., 0., 0., 0.; + args.coord <- 0., 0.; + args.duration <- None; + args.transition <- None; + args.horizontal <- true; + args.inward <- true; + args.direction <- 0; + args.effect_duration <- 1.; + args.font <- StandardFont Pdftext.TimesRoman; + args.fontname <- "Times-Roman"; + args.fontsize <- 12.; + args.color <- 0., 0., 0.; + args.opacity <- 1.; + args.position <- Cpdf.TopLeft 100.; + args.underneath <- false; + args.linespacing <- 1.; + args.midline <- false; + args.justification <- Cpdf.LeftJustify; + args.bates <- 0; + args.prerotate <- false; + args.orientation <- Cpdf.Horizontal; + args.relative_to_cropbox <- false; + args.keepversion <- false; + args.bycolumns <- false; + args.pagerotation <- 0; + args.crypt_method <- ""; + args.owner <- ""; + args.user <- ""; + args.no_edit <- false; + args.no_print <- false; + args.no_copy <- false; + args.no_annot <- false; + args.no_forms <- false; + args.no_extract <- false; + args.no_assemble <- false; + args.no_hq_print <- false; + args.debug <- false; + args.boxes <- false; + args.encrypt_metadata <- true; + args.retain_numbering <- false; + args.remove_duplicate_fonts <- false; + args.remove_duplicate_streams <- false; + args.encoding <- Cpdf.Stripped; + args.scale <- 1.; + args.copyfontpage <- 1; + args.copyfontname <- None; + args.fast <- false; + args.dashrange <- "all"; + args.outline <- false; + args.linewidth <- 1.0; + args.path_to_ghostscript <- ""; + args.frombox <- None; + args.tobox <- None; + args.mediabox_if_missing <- false; + args.topage <- None; + args.printf_format <- false; + args.scale_stamp_to_fit <- false; + args.keep_this_id <- None; + args.makenewid <- false; + args.ismulti <- false; + args.uprightstamp <- false + (* We don't reset args.do_ask and args.verbose, because they operate on all parts of the AND-ed command line sent from cpdftk. *) + +let banlist_of_args () = + let l = ref [] in + if args.no_edit then l =| Pdfcrypt.NoEdit; + if args.no_print then l =| Pdfcrypt.NoPrint; + if args.no_copy then l =| Pdfcrypt.NoCopy; + if args.no_annot then l =| Pdfcrypt.NoAnnot; + if args.no_forms then l =| Pdfcrypt.NoForms; + if args.no_extract then l =| Pdfcrypt.NoExtract; + if args.no_assemble then l =| Pdfcrypt.NoAssemble; + if args.no_hq_print then l =| Pdfcrypt.NoHqPrint; + !l + +(* If a file is encrypted, decrypt it using the owner password or, if not +present, the user password. If the user password is used, the operation to be +performed is checked to see if it's allowable under the permissions regime. *) + +(* The bans. Each function has a list of bans. If any of these is present in the +bans list in the input file, the operation cannot proceed. Other operations +cannot proceed at all without owner password. *) +let banned banlist = function + | Fonts | Info | Metadata | PageInfo | CountPages -> false (* Always allowed *) + | Decrypt | Encrypt -> true (* Never allowed *) + | _ -> mem Pdfcrypt.NoEdit banlist + +let operation_allowed banlist = function + | None -> true (* Merge *) (* changed to allow it *) + | Some op -> not (banned banlist op) + +let rec decrypt_if_necessary (a, b, c, user_pw, owner_pw) op pdf = + if not (Pdfcrypt.is_encrypted pdf) then pdf else + match Pdfcrypt.decrypt_pdf_owner owner_pw pdf with + | Some pdf -> pdf + | _ -> + match Pdfcrypt.decrypt_pdf user_pw pdf with + | Some pdf, permissions -> + if operation_allowed permissions op + then pdf + else if args.do_ask + then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf + else soft_error "User password cannot give permission for this operation" + | _ -> + if args.do_ask + then decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf + else soft_error "Failed to decrypt file: wrong password?" + +and decrypt_if_necessary_ask (a, b, c, user_pw, owner_pw) op pdf = + let name = match a with InFile x -> x | StdIn -> "Standard input" | AlreadyInMemory _ -> "PDF" in + flprint "The password supplied for input PDF:\n"; + flprint (" " ^ name); + flprint "\n did not work. The PDF is encrypted, so you must supply the\n"; + flprint " owner password to open it. To quit, enter a blank password\n"; + flprint "Please enter the password to use on the input PDF:\n"; + flprint (" " ^ name ^ ".\n"); + match Pervasives.read_line () with + | "" -> soft_error "Failed to decrypt file: wrong password?" + | x -> decrypt_if_necessary (a, b, c, user_pw, x) op pdf + +let nobble pdf = + if not demo then pdf else + Cpdf.process_pages (Cpdf.nobble_page pdf) pdf (ilist 1 (Pdfpage.endpage pdf)) + +(* Output Page Count *) +let output_page_count pdf = + Printf.printf "%i\n" (Pdfpage.endpage pdf) + +let setop op () = + args.op <- Some op + +let setout name = + args.out <- File name + +let setchunk c = + if c > 0 + then args.chunksize <- c + else error "invalid chunk size" + +let setlinearize () = + args.linearize <- true + +let fixdashes s = + let bufferdashes chars = + let buf = ref [] in + iter + (function '-' -> buf =@ [' '; '-'; ' '] | x -> buf =| x) + chars; + rev !buf + in + let chars = explode s in + implode (bufferdashes chars) + +let encrypt_to_collect = ref 0 + +let setmethod s = + if args.op = None then args.op <- Some Encrypt; (* Could be additional to -split *) + match s with + | "40bit" | "128bit" | "AES" | "AES256" | "AES256ISO" -> args.crypt_method <- s + | _ -> error "Unsupported encryption method" + +let setowner s = + match s with + | "PROMPT" -> + flprint "Enter owner password to use on the output PDF.\n"; + args.owner <- Pervasives.read_line () + | s -> + args.owner <- s + +let setuser s = + match s with + | "PROMPT" -> + flprint "Enter user password to use on the output PDF.\n"; + args.user <- Pervasives.read_line () + | s -> + args.user <- s + +let anon_fun s = + try + match !encrypt_to_collect with + | 3 -> setmethod s; decr encrypt_to_collect + | 2 -> setowner s; decr encrypt_to_collect + | 1 -> setuser s; decr encrypt_to_collect + | 0 -> + let before, after = cleavewhile (neq '=') (explode s) in + begin match implode before with + | "user" -> + begin match args.inputs with + | [] -> () + | (a, b, c, _, e)::more -> + args.inputs <- (a, b, c, implode (tl after), e)::more + end + | "owner" -> + begin match args.inputs with + | [] -> () + | (a, b, c, d, _)::more -> + args.inputs <- (a, b, c, d, implode (tl after))::more + end + | _ -> raise Not_found + end + | _ -> assert false + with + Not_found -> + try + ignore (String.index s '.'); + args.inputs <- (InFile s, "all", Pdfmerge.DNR, "", "")::args.inputs; + all_inputs := s::!all_inputs + with + Not_found -> + match args.inputs with + | [] -> () + | (a, _, r, d, e)::t -> + match rev (explode s) with + | 'N'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.N, d, e)::t + | 'S'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.S, d, e)::t + | 'E'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.E, d, e)::t + | 'W'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.W, d, e)::t + | 'L'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.L, d, e)::t + | 'R'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.R, d, e)::t + | 'D'::more -> + args.inputs <- (a, fixdashes (implode (rev more)), Pdfmerge.D, d, e)::t + | _ -> + args.inputs <- (a, fixdashes s, r, d, e)::t + +(* Unit conversions to points. *) +let mm x = ((x /. 10.) /. 2.54) *. 72. + +let cm x = (x /. 2.54) *. 72. + +let inch x = x *. 72. + +let points_of_papersize p = + let unit = Pdfpaper.unit p + and w = Pdfpaper.width p + and h = Pdfpaper.height p in + let c = Pdfunits.convert 0. unit Pdfunits.PdfPoint in + c w, c h + +let rec parse_units_again numbers papersize more = + let w, h = points_of_papersize papersize in + parse_units (h::w::numbers) more + +and parse_units numbers = function + | Pdfgenlex.LexName "a10portrait"::more -> + parse_units_again numbers Pdfpaper.a10 more + | Pdfgenlex.LexName "a9portrait"::more -> + parse_units_again numbers Pdfpaper.a9 more + | Pdfgenlex.LexName "a8portrait"::more -> + parse_units_again numbers Pdfpaper.a8 more + | Pdfgenlex.LexName "a7portrait"::more -> + parse_units_again numbers Pdfpaper.a7 more + | Pdfgenlex.LexName "a6portrait"::more -> + parse_units_again numbers Pdfpaper.a6 more + | Pdfgenlex.LexName "a5portrait"::more -> + parse_units_again numbers Pdfpaper.a5 more + | Pdfgenlex.LexName "a4portrait"::more -> + parse_units_again numbers Pdfpaper.a4 more + | Pdfgenlex.LexName "a3portrait"::more -> + parse_units_again numbers Pdfpaper.a3 more + | Pdfgenlex.LexName "a2portrait"::more -> + parse_units_again numbers Pdfpaper.a2 more + | Pdfgenlex.LexName "a1portrait"::more -> + parse_units_again numbers Pdfpaper.a1 more + | Pdfgenlex.LexName "a0portrait"::more -> + parse_units_again numbers Pdfpaper.a0 more + | Pdfgenlex.LexName "a10landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a10) more + | Pdfgenlex.LexName "a9landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a9) more + | Pdfgenlex.LexName "a8landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a8) more + | Pdfgenlex.LexName "a7landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a7) more + | Pdfgenlex.LexName "a6landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a6) more + | Pdfgenlex.LexName "a5landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a5) more + | Pdfgenlex.LexName "a4landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a4) more + | Pdfgenlex.LexName "a3landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a3) more + | Pdfgenlex.LexName "a2landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a2) more + | Pdfgenlex.LexName "a1landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a1) more + | Pdfgenlex.LexName "a0landscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.a0) more + | Pdfgenlex.LexName "uslegalportrait"::more -> + parse_units_again numbers Pdfpaper.uslegal more + | Pdfgenlex.LexName "usletterportrait"::more -> + parse_units_again numbers Pdfpaper.usletter more + | Pdfgenlex.LexName "uslegallandscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.uslegal) more + | Pdfgenlex.LexName "usletterlandscape"::more -> + parse_units_again numbers (Pdfpaper.landscape Pdfpaper.usletter) more + | Pdfgenlex.LexInt x::Pdfgenlex.LexName "mm"::more -> + parse_units ((mm <| float_of_int x)::numbers) more + | Pdfgenlex.LexReal x::Pdfgenlex.LexName "mm"::more -> + parse_units (mm x::numbers) more + | Pdfgenlex.LexInt x::Pdfgenlex.LexName "cm"::more -> + parse_units ((cm <| float_of_int x)::numbers) more + | Pdfgenlex.LexReal x::Pdfgenlex.LexName "cm"::more -> + parse_units (cm x::numbers) more + | Pdfgenlex.LexInt x::Pdfgenlex.LexName "in"::more -> + parse_units ((inch <| float_of_int x)::numbers) more + | Pdfgenlex.LexReal x::Pdfgenlex.LexName "in"::more -> + parse_units (inch x::numbers) more + | Pdfgenlex.LexInt x::more -> + parse_units (float_of_int x::numbers) more + | Pdfgenlex.LexReal x::more -> + parse_units (x::numbers) more + | Pdfgenlex.LexName "pt"::more -> + parse_units numbers more + | _ -> rev numbers + +let rec space_units_inner = function + | [] -> [] + | 'm'::'m'::t -> ' '::'m'::'m'::' '::space_units_inner t + | 'c'::'m'::t -> ' '::'c'::'m'::' '::space_units_inner t + | 'i'::'n'::t -> ' '::'i'::'n'::' '::space_units_inner t + | 'p'::'t'::t -> ' '::'p'::'t'::' '::space_units_inner t + | h::t -> h::space_units_inner t + +let space_units s = + implode (space_units_inner (explode s)) + +let parse_units_string s = + parse_units [] (Pdfgenlex.lex_string <| space_units s) + +let parse_rectangle s = + try + match parse_units_string s with + | [x; y; w; h] -> x, y, w, h + | _ -> error "Bad rectangle specification" + with + _ -> error "Bad rectangle specification" + +let parse_coordinate s = + try + match parse_units_string s with + | [dx; dy] -> dx, dy + | _ -> error "Bad coordinate specification" + with + _ -> error "Bad coordinate specification" + +let parse_single_number s = + try + match parse_units_string s with + | [x] -> x + | _ -> error "Bad number Argument" + with + _ -> error "Bad number argument" + +(* Setting operations *) +let setcrop s = + setop Crop (); + args.rectangle <- parse_rectangle s + +let setmediabox s = + setop MediaBox (); + args.rectangle <- parse_rectangle s + +let setrectangle s = + setop AddRectangle (); + args.coord <- parse_coordinate s + +let setrotate i = + if i = 0 || i = 90 || i = 180 || i = 270 + then setop (Rotate i) () + else error "bad rotation" + +let setrotateby i = + if i = 0 || i = 90 || i = 180 || i = 270 + then setop (Rotateby i) () + else error "bad rotation" + +let setrotatecontents f = + setop (RotateContents f) () + +let setauthor s = setop (SetAuthor s) () +let settitle s = setop (SetTitle s) () +let setsubject s = setop (SetSubject s) () +let setkeywords s = setop (SetKeywords s) () +let setcreate s = setop (SetCreate s) () +let setmodify s = setop (SetModify s) () +let setcreator s = setop (SetCreator s) () +let setproducer s = setop (SetProducer s) () +let setmetadata s = setop (SetMetadata s) () +let setversion i = setop (SetVersion i) () +let setpagelayout s = setop (SetPageLayout s) () +let setpagemode s = setop (SetPageMode s) () + +let hidetoolbar b = + try setop (HideToolbar (bool_of_string b)) () with + _ -> failwith "HideToolBar: must use true or false" + +let hidemenubar b = + try setop (HideMenubar (bool_of_string b)) () with + _ -> failwith "HideMenuBar: must use true or false" + +let hidewindowui b = + try setop (HideWindowUI (bool_of_string b)) () with + _ -> failwith "HideWindowUI: must use true or false" + +let fitwindow b = + try setop (FitWindow (bool_of_string b)) () with + _ -> failwith "FitWindow: must use true or false" + +let centerwindow b = + try setop (CenterWindow (bool_of_string b)) () with + _ -> failwith "CenterWindow: must use true or false" + +let displaydoctitle b = + try setop (DisplayDocTitle (bool_of_string b)) () with + _ -> failwith "DisplayDocTitle: must use true or false" + +let setsplitbookmarks i = setop (SplitOnBookmarks i) () +let setstdout () = args.out <- Stdout +let setstdin () = args.inputs <- [StdIn, "all", Pdfmerge.DNR, "", ""] +let settrans s = args.transition <- Some s +let setduration f = args.duration <- Some f +let setvertical () = args.horizontal <- false +let setoutward () = args.inward <- false +let setdirection i = + args.direction <- + match i with + | 0 | 90 | 180 | 270 | 315 -> i + | _ -> error "Bad direction" +let seteffectduration f = args.effect_duration <- f +let setcopyid s = setop (CopyId s) () +let setthinlines s = setop (ThinLines (parse_single_number s)) () + +let setcopyannotations s = setop (CopyAnnotations s) () + +let setshift s = + setop Shift (); + args.coord <- parse_coordinate s + +let setscale s = + setop Scale (); + args.coord <- parse_coordinate s + +let setscaletofit s = + setop ScaleToFit (); + args.coord <- parse_coordinate s + +let setattachfile s = + match args.op with + | Some (AttachFile t) -> + args.op <- Some (AttachFile (s::t)) + | _ -> + setop (AttachFile [s]) () + +let setfont f = + args.font <- + begin match Pdftext.standard_font_of_name ("/" ^ f) with + | Some x -> StandardFont x + | None -> OtherFont f + end; + args.fontname <- f + +let setfontsize f = + if f > 0. + then args.fontsize <-f + else error "Negative font size specified" + +let setaddtext s = + setop (AddText s) () + +let setcolor s = + let r, g, b = + match String.lowercase s with + | "white" -> 1., 1., 1. + | "black" -> 0., 0., 0. + | "red" -> 1., 0., 0. + | "green" -> 0., 1., 0. + | "blue" -> 0., 0., 1. + | _ -> + let getnum = function + | Pdfgenlex.LexInt i -> float i + | Pdfgenlex.LexReal f -> f + | _ -> error "Bad color" + in + match Pdfgenlex.lex_string s with + | [a;b;c] -> getnum a, getnum b, getnum c + | _ -> error "Bad color" + in + args.color <- r, g, b + +let setopacity o = + args.opacity <- o + +let setaddbookmarks s = + setop (AddBookmarks s) () + +let setstampon f = + setop (StampOn f) () + +let setstampunder f = + setop (StampUnder f) () + +let setstamponmulti f = + setop (StampOn f) (); + args.ismulti <- true + +let setstampundermulti f = + setop (StampUnder f) (); + args.ismulti <- true + +let setcombinepages f = + setop (CombinePages f) () + +let setposcenter s = + let x, y = parse_coordinate s in + args.position <- Cpdf.PosCentre (x, y) + +let setposleft s = + let x, y = parse_coordinate s in + args.position <- Cpdf.PosLeft (x, y) + +let setposright s = + let x, y = parse_coordinate s in + args.position <- Cpdf.PosRight (x, y) + +let settop n = + args.position <- Cpdf.Top (parse_single_number n); + args.justification <- Cpdf.CentreJustify + +let settopleft n = + args.position <- Cpdf.TopLeft (parse_single_number n); + args.justification <- Cpdf.LeftJustify + +let settopright n = + args.position <- Cpdf.TopRight (parse_single_number n); + args.justification <- Cpdf.RightJustify + +let setleft n = + args.position <- Cpdf.Left (parse_single_number n); + args.justification <- Cpdf.LeftJustify + +let setbottomleft n = + args.position <- Cpdf.BottomLeft (parse_single_number n); + args.justification <- Cpdf.LeftJustify + +let setbottom n = + args.position <- Cpdf.Bottom (parse_single_number n); + args.justification <- Cpdf.CentreJustify + +let setbottomright n = + args.position <- Cpdf.BottomRight (parse_single_number n); + args.justification <- Cpdf.RightJustify + +let setright n = + args.position <- Cpdf.Right (parse_single_number n); + args.justification <- Cpdf.RightJustify + +let setdiagonal n = + args.position <- Cpdf.Diagonal; + args.justification <- Cpdf.CentreJustify + +let setreversediagonal n = + args.position <- Cpdf.ReverseDiagonal; + args.justification <- Cpdf.CentreJustify + +(* FIXME: We will add a center option to text positioning, which can be used for this too *) +let setcenter n = + args.position <- Cpdf.Diagonal; + args.justification <- Cpdf.CentreJustify + +let setbates n = + args.bates <- n + +let setkeepversion () = + args.keepversion <- true + +let setbycolumns () = + args.bycolumns <- true + +let setpagerotation r = + match r with + | 90 | 270 -> args.pagerotation <- r + | _ -> error "Bad Page rotation. Try 90 or 270." + +let set_no_edit () = + args.no_edit <- true + +let set_no_print () = + args.no_print <- true + +let set_no_copy () = + args.no_copy <- true + +let set_no_annot () = + args.no_annot <- true + +let set_no_forms () = + args.no_forms <- true + +let set_no_extract () = + args.no_extract <- true + +let set_no_assemble () = + args.no_assemble <- true + +let set_no_hq_print () = + args.no_hq_print <- true + +let set_input s = + args.inputs <- (InFile s, "all", Pdfmerge.DNR, "", "")::args.inputs; + all_inputs := s::!all_inputs + +let set_input_dir s = + let names = sort compare (leafnames_of_dir s) in + (* We don't need to set all_inputs here, since pdftk doesn't used -idir *) + args.inputs <- + (rev (map (fun n -> (InFile (s ^ slash ^ n), "all", Pdfmerge.DNR, "", "")) names)) @ args.inputs + +let setdebug () = + set Pdfread.read_debug; + set Pdfwrite.write_debug; + set Pdfcrypt.crypt_debug; + args.debug <- true + +let setboxes () = + args.boxes <- true + +let set_no_encrypt_metadata () = + args.encrypt_metadata <- false + +let set_retain_numbering () = + args.retain_numbering <- true + +let set_remove_duplicate_fonts () = + args.remove_duplicate_fonts <- true + +let setencoding enc () = + args.encoding <- enc + +let setlinespacing f = + args.linespacing <- f + +let setmidline () = + args.midline <- true + +let setscaletofitscale f = + args.scale <- f + +let setscalecontents f = + args.op <- Some (ScaleContents f); + args.position <- Cpdf.Diagonal (* Will be center *) + +(* Parsing the control file *) +let rec getuntilendquote prev = function + | [] -> implode (rev prev), [] + | '"'::t -> implode (rev prev), t + | '\\'::'"'::t -> getuntilendquote ('"'::prev) t + | h::t -> getuntilendquote (h::prev) t + +let rec getarg prev = function + | [] -> implode (rev prev), [] + | h::t -> + if Pdf.is_whitespace h + then implode (rev prev), t + else getarg (h::prev) t + +let rec parse_chars args = function + | [] -> rev args + | h::more when Pdf.is_whitespace h -> + parse_chars args more + | '"'::more -> + let this, rest = getuntilendquote [] more in + parse_chars (this::args) rest + | h::t -> + let this, rest = getarg [] (h::t) in + parse_chars (this::args) rest + +let control_args = ref [] + +let parse_control_file name = + (parse_chars [] + (charlist_of_bytes (Pdfio.bytes_of_input_channel (open_in_bin name)))) + +let setencryptcollect () = + encrypt_to_collect := 3 + +let setcopyfont s = + args.op <- Some (CopyFont s) + +let setfontpage i = + args.copyfontpage <- i + +let setcopyfontname s = + args.copyfontname <- Some s + +let setpadevery i = + args.op <- Some (PadEvery i) + +let setpadmultiple i = + args.op <- Some (PadMultiple i) + +let setfast () = + args.fast <- true + +let setcsp2 f = + args.op <- Some (CSP2 f) + +let setextractimages () = + args.op <- Some ExtractImages + +(* Explicitly add a range. Parse it and replace the top input file with the range. *) +let setrange spec = + args.dashrange <- spec + +let setoutline () = + args.outline <- true + +let setlinewidth l = + args.linewidth <- l + +let setunderneath () = + args.underneath <- true + +let setimageresolution f = + args.op <- Some (ImageResolution f) + +let setgspath p = + args.path_to_ghostscript <- p + +let setvertical () = + args.orientation <- Cpdf.Vertical + +let setverticaldown () = + args.orientation <- Cpdf.VerticalDown + +let setfrombox s = + args.op <- Some CopyBox; + args.frombox <- Some s + +let settobox s = + args.tobox <- Some s + +let setmediaboxifmissing () = + args.mediabox_if_missing <- true + +let setrelativetocropbox () = + args.relative_to_cropbox <- true + +let setprerotate () = + args.prerotate <- true + +let setflatkids () = + Pdfpage.flat_pagetrees := true + +let settopage s = + args.topage <- Some s + +let setprintfformat () = + args.printf_format <- true + +let setscalestamptofit () = + args.scale_stamp_to_fit <- true + +let setkeepthisid () = + match args.inputs with + | (InFile s, _, _, _, _)::_ -> args.keep_this_id <- Some s + | _ -> () + +let setupdateinfo s = + args.op <- Some (UpdateInfo s) + +let setdoask () = + args.do_ask <- true + +let setverbose () = + args.verbose <- true + +let promptinputs () = + flprint "Please enter a filename for an input PDF:\n"; + set_input (Pervasives.read_line ()) + +let promptinputpasswords () = + flprint "Please enter the open password to use on the input PDF:\n "; + match args.inputs with + | (InFile s, b, c, d, _)::more -> + flprint s; + flprint ".\n It can be empty, or have a maximum of 32 characters:\n"; + let pw = Pervasives.read_line () in + args.inputs <- (InFile s, b, c, d, pw)::more + | _ -> () + +let promptoutput () = + flprint "Please enter a name for the output:\n"; + args.out <- File (Pervasives.read_line ()) + +let setdontoverwriteexistingfiles () = + args.dont_overwrite_existing_files <- true + +let setdontoverwriteinputs () = + args.dont_overwrite_inputs <- true + +let setmakenewid () = + args.makenewid <- true + +let setuprightstamp () = + args.uprightstamp <- true + +let setjustifyleft () = + args.justification <- Cpdf.LeftJustify + +let setjustifyright () = + args.justification <- Cpdf.RightJustify + +let setjustifycenter () = + args.justification <- Cpdf.CentreJustify + +let setremoveduplicatestreams () = + args.remove_duplicate_streams <- true + +let setnopreserveobjstm () = + args.preserve_objstm <- false + +let setcreateobjstm () = + args.create_objstm <- true + +let setstdinuser u = + match args.inputs with + | (StdIn, x, y, _, o)::t -> args.inputs <- (StdIn, x, y, u, o)::t + | _ -> error "-stdin-user: must follow -stdin" + +let setstdinowner o = + match args.inputs with + | (StdIn, x, y, u, _)::t -> args.inputs <- (StdIn, x, y, u, o)::t + | _ -> error "-stdin-user: must follow -stdin" + +(* Parse a control file, make an argv, and then make Arg parse it. *) +let rec make_control_argv_and_parse filename = + control_args := !control_args @ parse_control_file filename + +and specs = + [("-version", + Arg.Unit (setop Version), + " Print the cpdf version number"); + ("-o", + Arg.String setout, + " Set the output file, if appropriate"); + ("-i", + Arg.String set_input, + " Add an input file"); + ("-idir", + Arg.String set_input_dir, + " Add a directory of files"); + ("-stdin", + Arg.Unit setstdin, + " Read input from standard input"); + ("-stdin-owner", + Arg.String setstdinowner, + " Owner password for -stdin"); + ("-stdin-user", + Arg.String setstdinuser, + " User password for -stdin"); + ("-stdout", + Arg.Unit setstdout, + " Send result to standard output"); + ("-range", + Arg.String setrange, + " Explicitly add a range"); + ("-change-id", + Arg.Unit (setop ChangeId), + " Change the file's /ID tag"); + ("-no-preserve-objstm", + Arg.Unit setnopreserveobjstm, + " Don't preserve object streams"); + ("-create-objstm", + Arg.Unit setcreateobjstm, + " Create object streams anew"); + ("-keep-version", + Arg.Unit setkeepversion, + " Don't change the version number"); + ("-l", + Arg.Unit setlinearize, + " Linearize output files where possible"); + ("-raw", + Arg.Unit (setencoding Cpdf.Raw), + " Do not process text"); + ("-stripped", + Arg.Unit (setencoding Cpdf.Stripped), + " Process text by simple stripping to ASCII"); + ("-utf8", + Arg.Unit (setencoding Cpdf.UTF8), + " Process text by conversion to UTF8 Unicode"); + ("-fast", + Arg.Unit setfast, + " Speed over correctness with malformed documents"); + ("-control", + Arg.String make_control_argv_and_parse, + " Use a control file"); + ("-merge", + Arg.Unit (setop Merge), + " Merge a number of files into one"); + ("-retain-numbering", + Arg.Unit set_retain_numbering, + " Don't renumber pages when merging"); + ("-remove-duplicate-fonts", + Arg.Unit set_remove_duplicate_fonts, + " Remove duplicate fonts when merging"); + ("-split", + Arg.Unit (setop Split), + " Split a file into individual pages"); + ("-chunk", + Arg.Int setchunk, + " Set chunk size for -split (default 1)"); + ("-split-bookmarks", + Arg.Int setsplitbookmarks, + " Split a file at bookmarks at a given level"); + ("-scale-page", + Arg.String setscale, + " -scale-page \"sx sy\" scales by (sx, sy)"); + ("-scale-to-fit", + Arg.String setscaletofit, + " -scale-to-fit \"x y\" scales to page size (x, y)"); + ("-scale-contents", + Arg.Float setscalecontents, + " Scale Contents by the given factor"); + ("-center", + Arg.Float setcenter, + " Scale contents around center"); + ("-scale-to-fit-scale", + Arg.Float setscaletofitscale, + " -scale-to-fit-scale (1.0 = 100%)"); + ("-shift", + Arg.String setshift, + " -shift \"dx dy\" shifts the chosen pages"); + ("-rotate", + Arg.Int setrotate, + " Set rotation of pages to 0, 90, 180, 270"); + ("-rotateby", + Arg.Int setrotateby, + " Rotate pages by 90, 180 or 270 degrees"); + ("-rotate-contents", + Arg.Float setrotatecontents, + " Rotate contents of pages"); + ("-upright", + Arg.Unit (setop Upright), + " Make pages upright"); + ("-hflip", + Arg.Unit (setop HFlip), + " Flip pages horizontally"); + ("-vflip", + Arg.Unit (setop VFlip), + " Flip pages vertically"); + ("-crop", + Arg.String setcrop, + " Crop specified pages"); + ("-remove-crop", + Arg.Unit (setop RemoveCrop), + " Remove cropping on specified pages"); + ("-copy-cropbox-to-mediabox", + Arg.Unit (setop CopyCropBoxToMediaBox), + ""); (* Undocumented now, since /frombox, /tobox now used *) + ("-frombox", Arg.String setfrombox, " Set box to copy from"); + ("-tobox", Arg.String settobox, " Set box to copy to"); + ("-mediabox-if-missing", Arg.Unit setmediaboxifmissing, " If copy from box missing, substitute media box"); + ("-mediabox", + Arg.String setmediabox, + " Set media box on specified pages"); + ("-encrypt", + Arg.Unit setencryptcollect, + " Encrypt a document"); + ("-decrypt", + Arg.Unit (setop Decrypt), + " Decrypt a file"); + ("-no-edit", Arg.Unit set_no_edit, " No edits"); + ("-no-print", Arg.Unit set_no_print, " No printing"); + ("-no-copy", Arg.Unit set_no_copy, " No copying"); + ("-no-annot", Arg.Unit set_no_annot, " No annotations"); + ("-no-forms", Arg.Unit set_no_forms, " No forms"); + ("-no-extract", Arg.Unit set_no_extract, " No extracting"); + ("-no-assemble", Arg.Unit set_no_assemble, " No assembling"); + ("-no-hq-print", Arg.Unit set_no_hq_print, " No high quality printing"); + ("-no-encrypt-metadata", + Arg.Unit set_no_encrypt_metadata, + " Don't encrypt metadata (AES only)"); + ("-decompress", + Arg.Unit (setop Decompress), + " Decompress"); + ("-compress", + Arg.Unit (setop Compress), + " Compress streams, leaving metadata alone"); + ("-remove-duplicate-streams", + Arg.Unit setremoveduplicatestreams, + ""); + ("-list-bookmarks", + Arg.Unit (setop ListBookmarks), + " List Bookmarks"); + ("-remove-bookmarks", + Arg.Unit (setop RemoveBookmarks), + " Remove bookmarks from a file"); + ("-add-bookmarks", + Arg.String setaddbookmarks, + " Add bookmarks from the given file"); + ("-presentation", + Arg.Unit (setop Presentation), + " Make a presentation"); + ("-trans", + Arg.String settrans, + " Set the transition method for -presentation"); + ("-duration", + Arg.Float setduration, + " Set the display duration for -presentation"); + ("-vertical", + Arg.Unit setvertical, + " Set dimension for Split and Blinds styles"); + ("-outward", + Arg.Unit setoutward, + " Set direction for Split and Box styles"); + ("-direction", + Arg.Int setdirection, + " Set direction for Wipe and Glitter styles"); + ("-effect-duration", + Arg.Float seteffectduration, + " Set the effect duration in seconds"); + ("-stamp-on", + Arg.String setstampon, + " Stamp a file on some pages of another"); + ("-stamp-under", + Arg.String setstampunder, + " Stamp a file under some pages of another"); + ("-combine-pages", + Arg.String setcombinepages, + " Combine two files by merging individual pages"); + ("-add-text", + Arg.String setaddtext, + " Superimpose text on the given range of pages"); + ("-remove-text", + Arg.Unit (setop RemoveText), + " Remove text previously added by cpdf"); + ("-add-rectangle", + Arg.String setrectangle, + ""); + ("-bates", + Arg.Int setbates, + " Set the base bates number"); + ("-font", + Arg.String setfont, + " Set the font"); + ("-font-size", + Arg.Float setfontsize, + " Set the font size"); + ("-color", + Arg.String setcolor, + " Set the color"); + ("-opacity", + Arg.Float setopacity, + " Set the text opacity"); + ("-outline", + Arg.Unit setoutline, + " Use outline mode for text"); + ("-linewidth", + Arg.Float setlinewidth, + " Set line width for outline text"); + ("-pos-center", + Arg.String setposcenter, + " Set position relative to center of baseline"); + ("-pos-left", + Arg.String setposleft, + " Set position relative to left of baseline"); + ("-pos-right", + Arg.String setposright, + " Set position relative to right of baseline"); + ("-top", + Arg.String settop, + " Set position relative to center top of page"); + ("-topleft", + Arg.String settopleft, + " Set position relative to top left of page"); + ("-topright", + Arg.String settopright, + " Set position relative to top right of page"); + ("-left", + Arg.String setleft, + " Set position relative to center left of page"); + ("-bottomleft", + Arg.String setbottomleft, + " Set position relative to bottom left of page"); + ("-bottom", + Arg.String setbottom, + " Set position relative to center bottom of page"); + ("-bottomright", + Arg.String setbottomright, + " Set position relative to bottom right of page"); + ("-right", + Arg.String setright, + " Set position relative to center right of page"); + ("-diagonal", + Arg.Unit setdiagonal, + " Place text diagonally across page"); + ("-reverse-diagonal", + Arg.Unit setreversediagonal, + " Place text diagonally across page from top left"); + ("-justify-left", + Arg.Unit setjustifyleft, + " Justify multiline text left"); + ("-justify-right", + Arg.Unit setjustifyright, + " Justify multiline text right"); + ("-justify-center", + Arg.Unit setjustifycenter, + " Justify multiline text centre"); + ("-underneath", + Arg.Unit setunderneath, + " Text stamp is underneath content"); + ("-line-spacing", + Arg.Float setlinespacing, + " Line spacing (1 is normal)"); + ("-midline", + Arg.Unit setmidline, + " Adjust text to midline rather than baseline"); + ("-relative-to-cropbox", + Arg.Unit setrelativetocropbox, + " Add text relative to Crop Box not Media Box"); + ("-prerotate", + Arg.Unit setprerotate, + " Calls -upright on pages before adding text"); + ("-twoup", + Arg.Unit (setop TwoUp), + " Put 2 pages onto one"); + ("-twoup-stack", + Arg.Unit (setop TwoUpStack), + " Stack 2 pages onto one twice the size"); + ("-pad-before", + Arg.Unit (setop PadBefore), + " Add a blank page before the given pages"); + ("-pad-after", + Arg.Unit (setop PadAfter), + " Add a blank page after the given pages"); + ("-pad-every", + Arg.Int setpadevery, + " Add a blank page after every n pages"); + ("-pad-multiple", + Arg.Int setpadmultiple, + " Pad the document to a multiple of n pages"); + ("-list-annotations", + Arg.Unit (setop ListAnnotations), + " List annotations"); + ("-copy-annotations", + Arg.String setcopyannotations, + " Copy annotations from given file"); + ("-remove-annotations", + Arg.Unit (setop RemoveAnnotations), + " Remove annotations"); + ("-list-fonts", + Arg.Unit (setop Fonts), + " Output font list"); + ("-info", + Arg.Unit (setop Info), + " Output file information"); + ("-page-info", + Arg.Unit (setop PageInfo), + " Output file information"); + ("-set-author", + Arg.String setauthor, + " Set Author"); + ("-set-title", + Arg.String settitle, + " Set Title"); + ("-set-subject", + Arg.String setsubject, + " Set Subject"); + ("-set-keywords", + Arg.String setkeywords, + " Set Keywords"); + ("-set-create", + Arg.String setcreate, + " Set Creation date"); + ("-set-modify", + Arg.String setmodify, + " Set Modification date"); + ("-set-creator", + Arg.String setcreator, + " Set Creator"); + ("-set-producer", + Arg.String setproducer, + " Set Producer"); + ("-set-trapped", + Arg.Unit (setop SetTrapped), + " Mark as trapped"); + ("-set-untrapped", + Arg.Unit (setop SetUntrapped), + " Mark as not trapped"); + ("-set-page-layout", + Arg.String setpagelayout, + " Set page layout upon document opening"); + ("-set-page-mode", + Arg.String setpagemode, + " Set page mode upon document opening"); + ("-set-metadata", + Arg.String setmetadata, + " Set metadata to the contents of a file"); + ("-print-metadata", + Arg.Unit (setop Metadata), + " Output metadata information"); + ("-remove-metadata", + Arg.Unit (setop RemoveMetadata), + " Remove document metadata"); + ("-hide-toolbar", + Arg.String hidetoolbar, + " Hide the viewer's toolbar"); + ("-hide-menubar", + Arg.String hidemenubar, + " Hide the viewer's menubar"); + ("-hide-window-ui", + Arg.String hidewindowui, + " Hide the viewer's scroll bars etc."); + ("-fit-window", + Arg.String fitwindow, + " Resize document's window to fit size of page"); + ("-center-window", + Arg.String centerwindow, + " Position window in the center of screen"); + ("-display-doc-title", + Arg.String displaydoctitle, + " Display document's title in the title bar"); + ("-pages", + Arg.Unit (setop CountPages), + " Count pages"); + ("-list-attached-files", + Arg.Unit (setop ListAttachedFiles), + " List attached files"); + ("-dump-attachments", + Arg.Unit (setop DumpAttachedFiles), + ""); + ("-attach-file", + Arg.String setattachfile, + " Attach a file"); + ("-to-page", + Arg.String settopage, + " Attach file to given page instead of document"); + ("-remove-files", + Arg.Unit (setop RemoveAttachedFiles), + " Remove embedded attached document-level files"); + ("-image-resolution", + Arg.Float setimageresolution, + " List images under a given dpi"); + ("-copy-font", + Arg.String setcopyfont, + " Copy a named font"); + ("-copy-font-page", + Arg.Int setfontpage, + " Set the page a copied font is drawn from"); + ("-remove-fonts", + Arg.Unit (setop RemoveFonts), + " Remove embedded fonts"); + ("-copy-font-name", + Arg.String setcopyfontname, + " Set the name of the font to copy"); + ("-missing-fonts", + Arg.Unit (setop MissingFonts), + " List missing fonts"); + ("-remove-id", + Arg.Unit (setop RemoveId), + " Remove the file's /ID tag"); + ("-draft", + Arg.Unit (setop Draft), + " Remove images from the file"); + ("-boxes", + Arg.Unit setboxes, + " Add crossed boxes to -draft option"); + ("-blacktext", + Arg.Unit (setop BlackText), + " Blacken document text"); + ("-blacklines", + Arg.Unit (setop BlackLines), + " Blacken lines in document"); + ("-blackfills", + Arg.Unit (setop BlackFills), + " Blacken fills in document"); + ("-thinlines", + Arg.String setthinlines, + " Set minimum line thickness to the given width"); + ("-clean", + Arg.Unit (setop Clean), + " Garbage-collect a file"); + ("-set-version", + Arg.Int setversion, + " Set PDF version number"); + ("-copy-id-from", + Arg.String setcopyid, + " Copy one file's ID tag to another"); + (* These items are for cpdftk *) + ("-update-info", Arg.String setupdateinfo, ""); + ("-printf-format", Arg.Unit setprintfformat, ""); + ("-scale-stamp-to-fit", Arg.Unit setscalestamptofit, ""); + ("-dump-data", Arg.Unit (setop DumpData), ""); + ("-keep-this-id", Arg.Unit setkeepthisid, ""); + ("-do-ask", Arg.Unit setdoask, ""); + ("-verbose", Arg.Unit setverbose, ""); + ("-prompt-inputs", Arg.Unit promptinputs, ""); + ("-prompt-input-passwords", Arg.Unit promptinputpasswords, ""); + ("-prompt-output", Arg.Unit promptoutput, ""); + ("-dont-overwrite-existing-files", Arg.Unit setdontoverwriteexistingfiles, ""); + ("-dont-overwrite-inputs", Arg.Unit setdontoverwriteinputs, ""); + ("-make-new-id", Arg.Unit setmakenewid, ""); + ("-upright-stamp", Arg.Unit setuprightstamp, ""); + ("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), ""); + ("-stamp-under-multi", Arg.String setstampundermulti, ""); + ("-stamp-on-multi", Arg.String setstamponmulti, ""); + ("-list-annotations-more", Arg.Unit (setop ListAnnotationsMore), ""); + (*These items are undocumented *) + ("-extract-fontfile", Arg.Unit (setop ExtractFontFile), ""); + ("-extract-images", Arg.Unit setextractimages, ""); + (*("-output-graph", Arg.Unit (setoutputgraph), "");*) + ("-csp1", Arg.Unit (setop CSP1), ""); + ("-csp2", Arg.Float setcsp2, ""); + ("-csp3", Arg.Unit (setop CSP3), ""); + ("-text-vertical", Arg.Unit setvertical, ""); + ("-text-vertical-down", Arg.Unit setverticaldown, ""); + ("-flat-kids", Arg.Unit setflatkids, ""); + ("-gs", Arg.String setgspath, ""); + ("-debug", Arg.Unit setdebug, ""); + ("-fix-prince", Arg.Unit (setop RemoveUnusedResources), ""); + ("-extract-text", Arg.Unit (setop ExtractText), ""); + ("-print-linearization", Arg.Unit (setop PrintLinearization), "")] + +and usage_msg = +"Syntax: cpdf [-o ] \n\n\ +This is a copyrighted, commercial program, and may NOT be freely copied.\n\n\ +Version " ^ string_of_int major_version ^ "." ^ string_of_int minor_version ^ " " ^ version_date ^ "\n\n\ +To buy, visit http://www.coherentpdf.com/\n\n\ +Input names are distinguished by containing a '.' and may be\n\ +followed by a page range specification, for instance \"1,2,3\"\n\ +or \"1-6,9-end\" or \"even\" or \"odd\" or \"reverse\".\n\nOperations (See \ +manual for full details):\n" + +(* Reading and writing *) +let rec writing_ok outname = + if args.dont_overwrite_inputs && mem outname !all_inputs then + error ("Error: The output filename: " ^ outname ^"\n is the same as an input filename.\n"); + if args.dont_overwrite_existing_files && Sys.file_exists outname then + begin + flprint ("Output file: " ^ outname ^ " already exists. Overwrite? (y/n)\n"); + match explode (Pervasives.read_line ()) with + | ('y' | 'Y')::_ -> outname + | _ -> + flprint "Enter a name for the output:\n"; + writing_ok (Pervasives.read_line ()) + end + else + outname + +let write_pdf mk_id pdf = + if args.create_objstm && not args.keepversion + then pdf.Pdf.minor <- max pdf.Pdf.minor 5; + let mk_id = args.makenewid || mk_id in + match args.out with + | NoOutputSpecified -> + output_pdfs =| pdf + | File outname -> + let outname = writing_ok outname in + let pdf = Cpdf.recompress_pdf <| nobble pdf in + Pdf.remove_unreferenced pdf; + Pdfwrite.pdf_to_file_options + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize None mk_id pdf outname + | Stdout -> + let pdf = Cpdf.recompress_pdf <| nobble pdf in + Pdf.remove_unreferenced pdf; + Pdfwrite.pdf_to_channel + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize None mk_id pdf stdout; + flush stdout (*r For Windows *) + +let pdf_of_stdin user_pw owner_pw = + let user_pw = Some user_pw + and owner_pw = if owner_pw = "" then None else Some owner_pw + in + let o, bytes = Pdfio.input_output_of_bytes 16384 in + try + while true do o.Pdfio.output_char (input_char stdin) done; + Pdf.empty () + with + End_of_file -> + let i = Pdfio.input_of_bytes (Pdfio.extract_bytes_from_input_output o bytes) in + pdfread_pdf_of_input user_pw owner_pw i + +let get_single_pdf op read_lazy = + match args.inputs with + | (InFile inname, _, _, u, o) as input::_ -> + let pdf = + if read_lazy then + pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname) + else + pdfread_pdf_of_file (optstring u) (optstring o) inname + in + decrypt_if_necessary input op pdf + | (StdIn, _, _, u, o) as input::_ -> + decrypt_if_necessary input op (pdf_of_stdin u o) + | (AlreadyInMemory pdf, _, _, _, _)::_ -> pdf + | _ -> + raise (Arg.Bad "cpdf: No input specified.\n") + +let get_pagespec () = + match args.inputs with + | (_, ps, _, _, _)::_ -> ps + | _ -> error "get_pagespec" + +let filenames = null_hash () + +(* This now memoizes on the name of the file to make sure we only load each +file once *) +let get_pdf_from_input_kind ((_, _, _, u, o) as input) op = function + | AlreadyInMemory pdf -> pdf + | InFile s -> + begin try Hashtbl.find filenames s with + Not_found -> + let pdf = decrypt_if_necessary input op (pdfread_pdf_of_file (optstring u) (optstring o) s) in + Hashtbl.add filenames s pdf; pdf + end + | StdIn -> + decrypt_if_necessary input op (pdf_of_stdin u o) + +(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *) +let copy_font frompdf fontname fontpage range pdf = + match Pdf.renumber_pdfs [frompdf; pdf] with + | [] | [_] | _::_::_::_ -> assert false + | [frompdf; pdf] -> + (* 1. Get fontpage *) + let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in + let frompdf_page = + try select fontpage frompdf_pages with + Not_found -> failwith "copy_font: Page not found in input pdf" + in + (* 2. Extract font *) + let fonts = + match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with + | Some f -> f + | None -> failwith "copy_font: font not found" + in + let fromfont = + match Pdf.lookup_direct frompdf fontname fonts with + | Some f -> f + | None -> failwith "copy_font: font not found" + in + let basefontname = + match Pdf.lookup_direct frompdf "/BaseFont" fromfont with + | Some (Pdf.Name n) -> n + | _ -> "/CopyFontAddedNoName" + in + (* 3. Get all objects forming font (except main /Font one) *) + let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in + (* 4. Copy them to from frompdf to pdf. *) + iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers; + (* 5. Get pages from pdf *) + let pdf_pages = Pdfpage.pages_of_pagetree pdf in + (* 6. Add the font to pages in range *) + let pages' = + map + (function (page, pagenum) -> + if mem pagenum range then + let font = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some f -> f + | None -> Pdf.Dictionary [] + in + let font' = + match font with + | (Pdf.Dictionary _) as d -> + Pdf.add_dict_entry d basefontname fromfont + | _ -> failwith "copy_font: error" + in + let resources' = + Pdf.add_dict_entry page.Pdfpage.resources "/Font" font' + in + {page with + Pdfpage.resources = resources'} + else page) + (combine pdf_pages (indx pdf_pages)); + in + (* 7. Put the pages back into the pdf, and return *) + let pdf, root = Pdfpage.add_pagetree pages' pdf in + Pdfpage.add_root root [] pdf + +(* Extract Images. *) +let pnm_to_channel_24 channel w h s = + let white () = output_char channel ' ' + and newline () = output_char channel '\n' + and output_string = Pervasives.output_string channel in + output_string "P6"; + white (); + output_string (string_of_int w); + white (); + output_string (string_of_int h); + white (); + output_string "255"; + newline (); + let pos = ref 0 in + for y = 1 to h do + for x = 1 to w * 3 do + output_byte channel (bget s !pos); + incr pos + done + done + +let null_device = + match Sys.os_type with + | "Win32" -> "nul" + | _ -> "/dev/null" + +(* cpdf -extract-images in.pdf 2-5 -o img%%% (FIXME: Add output spec. Document png stuff.) *) +let write_stream name stream = + let fh = open_out_bin name in + for x = 0 to bytes_size stream - 1 do + output_byte fh (bget stream x) + done; + close_out fh + +let write_image pdf resources name image = + match Pdfimage.get_image_24bpp pdf resources image with + | Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream + | Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream + | Pdfimage.JBIG2 (stream, _) -> write_stream (name ^ ".jbig2") stream + | Pdfimage.Raw (w, h, Pdfimage.BPP24, stream) -> + let fh = open_out_bin (name ^ ".pnm") in + pnm_to_channel_24 fh w h stream; + close_out fh; + (* If pnmtopng is present, convert the pnm to a PNG. *) + begin match + Sys.command ("pnmtopng -gamma 0.45 -quiet " ^ "\"" ^ name ^ ".pnm\"" ^ "> \"" ^ name ^ ".png\" 2>" ^ null_device) + with + | 0 -> Sys.remove (name ^ ".pnm") + | _ -> () + end + | _ -> () + +(* FIXME: Doesn't cope with images within form xobjects *) +let extract_images pdf range stem = + let pdf_pages = Pdfpage.pages_of_pagetree pdf in + let pages = + option_map + (function (i, pdf_pages) -> if mem i range then Some pdf_pages else None) + (combine (indx pdf_pages) pdf_pages) + in + iter + (function page -> + let xobjects = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary elts) -> map snd elts + | _ -> [] + in + let images = + keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Image")) xobjects + in + if images <> [] then + (let names = + map + (function n -> let r = Cpdf.name_of_spec false [] (*FIXME *) pdf 0 stem n "" 0 0 in (*i flprint r; flprint "\n"; i*) r) + (ilist 1 (length images)) + in + iter2 (write_image pdf page.Pdfpage.resources) names images)) + pages + +let getencryption pdf = + match Pdfread.what_encryption pdf with + | None | Some Pdfwrite.AlreadyEncrypted -> "Not encrypted" + | Some Pdfwrite.PDF40bit -> "40bit" + | Some Pdfwrite.PDF128bit -> "128bit" + | Some (Pdfwrite.AES128bit true) -> "128bit AES, Metadata encrypted" + | Some (Pdfwrite.AES128bit false) -> "128bit AES, Metadata not encrypted" + | Some (Pdfwrite.AES256bit true) -> "256bit AES, Metadata encrypted" + | Some (Pdfwrite.AES256bit false) -> "256bit AES, Metadata not encrypted" + | Some (Pdfwrite.AES256bitISO true) -> "256bit AES ISO, Metadata encrypted" + | Some (Pdfwrite.AES256bitISO false) -> "256bit AES ISO, Metadata not encrypted" + +let string_of_permission = function + | Pdfcrypt.NoEdit -> "No edit" + | Pdfcrypt.NoPrint -> "No print" + | Pdfcrypt.NoCopy -> "No copy" + | Pdfcrypt.NoAnnot -> "No Annotate" + | Pdfcrypt.NoForms -> "No edit forms" + | Pdfcrypt.NoExtract -> "No extract" + | Pdfcrypt.NoAssemble -> "No assemble" + | Pdfcrypt.NoHqPrint -> "No high-quality print" + +let getpermissions pdf = + fold_left + (fun x y -> if x = "" then x ^ y else x ^ ", " ^ y) + "" + (map string_of_permission (Pdfread.permissions pdf)) + +(* If a cropbox exists, make it the mediabox. If not, change nothing. *) +let copy_cropbox_to_mediabox pdf range = + Cpdf.process_pages + (fun _ page -> + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some pdfobject -> {page with Pdfpage.mediabox = Pdf.direct pdf pdfobject} + | None -> page) + pdf + range + +(* Image resolution *) +type xobj = + | Image of int * int (* width, height *) + | Form of Pdftransform.transform_matrix * Pdf.pdfobject * Pdf.pdfobject (* Will add actual data later. *) + +(* Given a page and a list of (pagenum, name, thing) *) +let rec image_resolution_page pdf page pagenum dpi (images : (int * string * xobj) list) = + try + let pageops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content + and transform = ref [ref Pdftransform.i_matrix] in + iter + (function + | Pdfops.Op_cm matrix -> + begin match !transform with + | [] -> raise (Failure "no transform") + | _ -> (hd !transform) := Pdftransform.matrix_compose !(hd !transform) matrix + end + | Pdfops.Op_Do xobject -> + let trans (x, y) = + match !transform with + | [] -> raise (Failure "no transform") + | _ -> Pdftransform.transform_matrix !(hd !transform) (x, y) + in + let o = trans (0., 0.) + and x = trans (1., 0.) + and y = trans (0., 1.) + in + (*i Printf.printf "o = %f, %f, x = %f, %f, y = %f, %f\n" (fst o) (snd o) (fst x) (snd x) (fst y) (snd y); i*) + let rec lookup_image k = function + | [] -> assert false + | (_, a, _) as h::_ when a = k -> h + | _::t -> lookup_image k t + in + begin match lookup_image xobject images with + | (pagenum, name, Form (xobj_matrix, content, resources)) -> + let content = + (* Add in matrix etc. *) + let total_matrix = Pdftransform.matrix_compose xobj_matrix !(hd !transform) in + let ops = + Pdfops.Op_cm total_matrix:: + Pdfops.parse_operators pdf resources [content] + in + Pdfops.stream_of_ops ops + in + let page = + {Pdfpage.content = [content]; + Pdfpage.mediabox = Pdf.Null; + Pdfpage.resources = resources; + Pdfpage.rotate = Pdfpage.Rotate0; + Pdfpage.rest = Pdf.Dictionary []} + in + let newpdf = Pdfpage.change_pages true pdf [page] in + image_resolution newpdf [pagenum] dpi + | (pagenum, name, Image (w, h)) -> + let lx = Pdfunits.convert 0. Pdfunits.PdfPoint Pdfunits.Inch (distance_between o x) + and ly = Pdfunits.convert 0. Pdfunits.PdfPoint Pdfunits.Inch (distance_between o y) in + let wdpi = float w /. lx + and hdpi = float h /. ly in + if wdpi < dpi || hdpi < dpi then + Printf.printf "%i, %s, %i, %i, %f, %f\n" pagenum xobject w h wdpi hdpi + (*i else + Printf.printf "S %i, %s, %i, %i, %f, %f\n" pagenum xobject (int_of_float w) (int_of_float h) wdpi hdpi i*) + end + | Pdfops.Op_q -> + begin match !transform with + | [] -> raise (Failure "Unbalanced q/Q ops") + | h::t -> + let h' = ref Pdftransform.i_matrix in + h' := !h; + transform := h'::h::t + end + | Pdfops.Op_Q -> + begin match !transform with + | [] -> raise (Failure "Unbalanced q/Q ops") + | _ -> transform := tl !transform + end + | _ -> ()) + pageops + with + e -> Printf.printf "Error %s\n" (Printexc.to_string e); flprint "\n" + +and image_resolution pdf range dpi = + let images = ref [] in + Cpdf.iter_pages + (fun pagenum page -> + (* 1. Get all image names and their native resolutions from resources as string * int * int *) + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary xobjects) -> + iter + (function (name, xobject) -> + match Pdf.lookup_direct pdf "/Subtype" xobject with + | Some (Pdf.Name "/Image") -> + let width = + match Pdf.lookup_direct pdf "/Width" xobject with + | Some x -> Pdf.getnum x + | None -> 1. + and height = + match Pdf.lookup_direct pdf "/Height" xobject with + | Some x -> Pdf.getnum x + | None -> 1. + in + images := (pagenum, name, Image (int_of_float width, int_of_float height))::!images + | Some (Pdf.Name "/Form") -> + let resources = + match Pdf.lookup_direct pdf "/Resources" xobject with + | None -> page.Pdfpage.resources (* Inherit from page or form above. *) + | Some r -> r + and contents = + xobject + and matrix = + match Pdf.lookup_direct pdf "/Matrix" xobject with + | Some (Pdf.Array [a; b; c; d; e; f]) -> + {Pdftransform.a = Pdf.getnum a; Pdftransform.b = Pdf.getnum b; Pdftransform.c = Pdf.getnum c; + Pdftransform.d = Pdf.getnum d; Pdftransform.e = Pdf.getnum e; Pdftransform.f = Pdf.getnum f} + | _ -> Pdftransform.i_matrix + in + images := (pagenum, name, Form (matrix, contents, resources))::!images + | _ -> () + ) + xobjects + | _ -> ()) + pdf + range; + (* Now, split into differing pages, and call [image_resolution_page] on each one *) + let pagesplits = + map + (function (a, _, _)::_ as ls -> (a, ls) | _ -> assert false) + (collate (fun (a, _, _) (b, _, _) -> compare a b) (rev !images)) + and pages = + Pdfpage.pages_of_pagetree pdf + in + iter + (function (pagenum, images) -> + let page = select pagenum pages in + image_resolution_page pdf page pagenum dpi images) + pagesplits + +(* Missing Fonts *) + +let is_missing pdf dict = + match Pdf.lookup_direct pdf "/FontDescriptor" dict with + | None -> true + | Some d -> + match Pdf.lookup_direct pdf "/FontFile" d with + | Some _ -> false + | None -> + match Pdf.lookup_direct pdf "/FontFile2" d with + | Some _ -> false + | None -> + match Pdf.lookup_direct pdf "/FontFile3" d with + | Some _ -> false + | None -> true + +let missing_font pdf page (name, dict) = + if is_missing pdf dict then + let subtype = + match Pdf.lookup_direct pdf "/Subtype" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + and basefont = + match Pdf.lookup_direct pdf "/BaseFont" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + and encoding = + match Pdf.lookup_direct pdf "/Encoding" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in + if Pdftext.standard_font_of_name basefont <> None then () else + Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding + +let missing_fonts pdf range = + Cpdf.iter_pages + (fun num page -> + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some (Pdf.Dictionary fontdict) -> + (* Extract descendant fonts *) + let name_dict_pairs = + flatten + (map + (fun (name, dict) -> + match Pdf.lookup_direct pdf "/DescendantFonts" dict with + | Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts + | _ -> [(name, dict)]) + fontdict) + in + iter (missing_font pdf num) name_dict_pairs + | _ -> ()) + pdf + range + +let lines_of_channel c = + let ls = ref [] in + try + while true do ls =| input_line c done; [] + with + End_of_file -> rev !ls + +(* parse the results of the standard error part of gs -sDEVICE=bbox *) +let parse_whiteboxes filename = + try + let fh = open_in filename in + let lines = lines_of_channel fh in + let lines = + keep + (function l -> + match explode l with + | '%'::'%'::_ -> true | _ -> false) + lines + in + let hires_lines = drop_odds lines in + let result = + map + (fun line -> + let line = implode (drop (explode line) 20) in + match Pdfgenlex.lex_string line with + | [a; b; c; d] -> + let getfloat = function + | Pdfgenlex.LexInt i -> float i + | Pdfgenlex.LexReal f -> f + | _ -> raise (Failure "parse_whiteboxes") + in + getfloat a, getfloat b, getfloat c, getfloat d + | x -> raise (Failure ("bad lex on line " ^ line ^ "made tokens " ^ Pdfgenlex.string_of_tokens x))) + hires_lines + in + close_in fh; + result + with + e -> + Printf.eprintf "%s\n" ("parse_whiteboxes " ^ Printexc.to_string e); + raise (Failure "") + +(* Make start, end pairs from a sortedrange *) +let rec startends_of_range_inner pairs ls = + match pairs, ls with + | _, [] -> rev pairs + | [], (h::t) -> + startends_of_range_inner [(h, h)] t + | ((s, e)::ps), (h::t) when h = e + 1 -> + startends_of_range_inner ((s, h)::ps) t + | ps, (h::t) -> + startends_of_range_inner ((h, h)::ps) t + +let startends_of_range x = + startends_of_range_inner [] x + +(* Calculating margins *) +let calculate_margins filename pdf (s, e) = + (* Call ghostscript *) + let gscall = + args.path_to_ghostscript ^ + " -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox -r1200" + ^ " -dFirstPage=" + ^ string_of_int s + ^ " -dLastPage=" ^ string_of_int e + ^ " \"" ^ filename ^ "\"" ^ " > waste.txt 2> margins.txt" + in + match Sys.command gscall with + | 0 -> + (* Parse white boxes *) + let whiteboxes = + parse_whiteboxes "margins.txt" + (* Get media boxes *) + and mediaboxes = + take' (e - s + 1) + (drop' (s - 1) + (map + (function page -> + (* Prefer the crop box *) + match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with + | Some pdfobject -> Pdf.parse_rectangle (Pdf.direct pdf pdfobject) + | None -> Pdf.parse_rectangle page.Pdfpage.mediabox) + (Pdfpage.pages_of_pagetree pdf))) + in + iter2 + (fun (m_minx, m_miny, m_maxx, m_maxy) (w_minx, w_miny, w_maxx, w_maxy) -> + if w_minx = 0. && w_miny = 0. && w_maxx = 0. && w_maxy = 0. then + Printf.printf "100, 100, 100, 100\n" + else + let topmargin = ((m_maxy -. w_maxy) /. (m_maxy -. m_miny)) *. 100. + and bottommargin = ((w_miny -. m_miny) /. (m_maxy -. m_miny)) *. 100. + and leftmargin = ((w_minx -. m_minx) /. (m_maxx -. m_minx)) *. 100. + and rightmargin = ((m_maxx -. w_maxx) /. (m_maxx -. m_minx)) *. 100. in + Printf.printf "%f, %f, %f, %f\n" leftmargin rightmargin topmargin bottommargin) + mediaboxes + whiteboxes; + (* Clean up temp files *) + Sys.remove "margins.txt"; + Sys.remove "waste.txt" + | _ -> Printf.eprintf "Call to ghostscript failed." + +let calculate_margins filename pdf range = + iter (calculate_margins filename pdf) (startends_of_range (sort compare range)) + +(* copy the contents of the box f to the box t. If mediabox_if_missing is set, +the contents of the mediabox will be used if the from fox is not available. If +mediabox_is_missing is false, the page is unaltered. *) +let copy_box f t mediabox_if_missing pdf range = + Cpdf.process_pages + (fun _ page -> + if f = "/MediaBox" then + {page with Pdfpage.rest = + (Pdf.add_dict_entry page.Pdfpage.rest t (page.Pdfpage.mediabox))} + else + match Pdf.lookup_direct pdf f page.Pdfpage.rest with + | Some pdfobject -> + if t = "/MediaBox" + then {page with + Pdfpage.mediabox = Pdf.direct pdf pdfobject} + else {page with Pdfpage.rest = + (Pdf.add_dict_entry page.Pdfpage.rest t (Pdf.direct pdf pdfobject))} + | None -> + if mediabox_if_missing + then {page with Pdfpage.rest = Pdf.add_dict_entry page.Pdfpage.rest t page.Pdfpage.mediabox} + else page) + pdf + range + +(* Remove Embedded fonts. This is done by removing the Font Descriptor. *) +let remove_fontdescriptor pdf = function + | Pdf.Dictionary d as font -> + begin match lookup "/Type" d with + | Some (Pdf.Name "/Font") -> + (match Pdf.lookup_direct pdf "/FontDescriptor" font with + | Some fontdes -> + let fontdescriptor' = + Pdf.remove_dict_entry + (Pdf.remove_dict_entry + (Pdf.remove_dict_entry fontdes "/FontFile") + "/FontFile2") + "/FontFile3" + in + Pdf.add_dict_entry font "/FontDescriptor" (Pdf.Indirect (Pdf.addobj pdf fontdescriptor')) + | _ -> font) + | _ -> font + end + | x -> x + +let remove_fonts pdf = + Pdf.objiter (fun k v -> ignore (Pdf.addobj_given_num pdf (k, remove_fontdescriptor pdf v))) pdf; + pdf + +let dump_attachment out pdf (_, embeddedfile) = + match Pdf.lookup_direct pdf "/F" embeddedfile with + | Some (Pdf.String s) -> + let efdata = + begin match Pdf.lookup_direct pdf "/EF" embeddedfile with + | Some d -> + let stream = + match Pdf.lookup_direct pdf "/F" d with + | Some s -> s + | None -> error "Bad embedded file stream" + in + Pdfcodec.decode_pdfstream_until_unknown pdf stream; + begin match stream with Pdf.Stream {contents = (_, Pdf.Got b)} -> b | _ -> error "Bad embedded file stream" end + | _ -> error "Bad embedded file stream" + end + in + let filename = if out = "" then s else out ^ "/" ^ s in + (*i Printf.printf "writing to %s\n" filename; i*) + let fh = open_out_bin filename in + for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done; + close_out fh + | _ -> () + +let dump_attached_document pdf out = + let root = Pdf.lookup_obj pdf pdf.Pdf.root in + let names = + match Pdf.lookup_direct pdf "/Names" root with Some n -> n | _ -> Pdf.Dictionary [] + in + match Pdf.lookup_direct pdf "/EmbeddedFiles" names with + | Some x -> + iter (dump_attachment out pdf) (Pdf.contents_of_nametree pdf x) + | None -> () + +let dump_attached_page pdf out page = + let annots = + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array l) -> l + | _ -> [] + in + let efannots = + keep + (fun annot -> + match Pdf.lookup_direct pdf "/Subtype" annot with + | Some (Pdf.Name "/FileAttachment") -> true + | _ -> false) + annots + in + let fsannots = option_map (Pdf.lookup_direct pdf "/FS") efannots in + iter (dump_attachment out pdf) (map (fun x -> 0, x) fsannots) + +(* Dump both document-level and page-level attached files to file, using their file names *) +let dump_attached_files pdf out = + try + dump_attached_document pdf out; + iter (dump_attached_page pdf out) (Pdfpage.pages_of_pagetree pdf) + with + _ -> error "Couldn't dump attached files" + +(* Prerotate a pdf *) +let prerotate_pdf pdf r = + let setto angle = Cpdf.rotate_pdf angle pdf (ilist 1 (Pdfpage.endpage pdf)) + and setby angle = Cpdf.rotate_pdf_by angle pdf (ilist 1 (Pdfpage.endpage pdf)) in + match r with + | Pdfmerge.DNR -> pdf + | Pdfmerge.N -> setto 0 + | Pdfmerge.S -> setto 180 + | Pdfmerge.E -> setto 90 + | Pdfmerge.W -> setto 270 + | Pdfmerge.L -> setby ~-90 + | Pdfmerge.R -> setby 90 + | Pdfmerge.D -> setby 180 + +(* Convert from unicode or PDFDocencoded to ASCII string with HTML entities in it. *) +let html_of_unicode s = + implode + (flatten + (map + (function 60 -> explode "<" + | 62 -> explode ">" + | 38 -> explode "&" + | 34 -> explode """ + | x when x >= 0x20 && x <= 0x7e -> [char_of_int x] + | x -> ['&';'#'] @ explode (string_of_int x) @ [';']) + (Pdftext.codepoints_of_pdfdocstring s))) + +(* Convert from HTML entities to a PDF string which is unicode-encoded (if there are any non-ASCII chars, or PDFDocEncoded if there aren't) . *) +let unicode_of_html s = + let rec codepoints_of_html ps = function + | '&'::'l'::'t'::';'::r -> codepoints_of_html (60::ps) r + | '&'::'g'::'t'::';'::r -> codepoints_of_html (62::ps) r + | '&'::'a'::'m'::'p'::';'::r -> codepoints_of_html (38::ps) r + | '&'::'q'::'u'::'o'::'t'::';'::r -> codepoints_of_html (34::ps) r + | '&'::'#'::r -> + begin match cleavewhile (function '0'..'9' -> true | _ -> false) r with + | [], r -> codepoints_of_html ps r + | cs, (';'::r) -> + let i = try int_of_string (implode cs) with _ -> error "bad HTML literal in update_info" in + codepoints_of_html (i::ps) r + | _ -> error "bad HTML literal in update_info 2" + end + | x::r when int_of_char x >= 0x20 && int_of_char x <= 0x7e -> codepoints_of_html (int_of_char x::ps) r + | _::r -> codepoints_of_html ps r + | [] -> rev ps + in + Pdftext.pdfdocstring_of_codepoints (codepoints_of_html [] (explode s)) + +let dump_data pdf out = + let channel = + match out with + | NoOutputSpecified -> stdout + | Stdout -> stdout + | File f -> open_out_bin f + in + let prs s = Pervasives.output_string channel s in + (* 1. Info keys *) + begin match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with + | Some (Pdf.Dictionary d) -> + iter + (function (name, pdfobj) -> + match pdfobj with + | Pdf.String s -> + begin match s with "" -> () | _ -> + begin match explode name with + | [] -> () + | h::t -> prs (Printf.sprintf "InfoKey: %s\nInfoValue: %s\n" (implode t) (html_of_unicode s)) + end + end + | _ -> ()) + d + | _ -> flprint "Warning: no info dictionary found\n"; () + end; + let hex s = + fold_left ( ^ ) "" (map (Printf.sprintf "%02x") (map int_of_char (explode s))) + in + (* 2. IDs *) + begin match Pdf.lookup_direct pdf "/ID" pdf.Pdf.trailerdict with + | Some (Pdf.Array [Pdf.String s; Pdf.String t]) -> prs (Printf.sprintf "PdfID0: %s\nPdfID1: %s\n" (hex s) (hex t)) + | _ -> () + end; + (* 3. No of pages *) + prs (Printf.sprintf "NumberOfPages: %i\n" (Pdfpage.endpage pdf)); + (* 4. Outlines *) + iter + (function m -> + prs (Printf.sprintf "BookmarkTitle: %s\n" (html_of_unicode m.Pdfmarks.text)); + prs (Printf.sprintf "BookmarkLevel: %i\n" (m.Pdfmarks.level + 1)); + prs (Printf.sprintf "BookmarkPageNumber: %i\n" (Pdfpage.pagenumber_of_target pdf m.Pdfmarks.target))) + (Pdfmarks.read_bookmarks pdf); + (* 5. Close and finish *) + match out with File _ -> close_out channel | _ -> flush stdout + +(* Parse and update info *) +let update_info pdf source = + let channel = + match source with + | "use-stdin" -> stdin + | x -> open_in_bin x + in + let rec read_lines prev channel = + try read_lines (input_line channel::prev) channel with End_of_file -> rev prev + in + let lines = read_lines [] channel in + let kvpairs = + map + (function l -> let k, v = cleavewhile (neq ':') (explode l) in implode k, implode (tail_no_fail (tail_no_fail v))) + lines + in + (*i iter + (function (k, v) -> Printf.printf "(%s,%s)\n" k v) + kvpairs; i*) + (* Split into 1) info keys / values 2) PdfIDs, Bookmarks *) + let infolines = + keep (function (("InfoKey" | "InfoValue"), _) -> true | _ -> false) kvpairs; + and pdfidlines = + keep (function (("PdfID0" | "PdfID1"), _) -> true | _ -> false) kvpairs + and bookmarklines = + keep (function (("BookmarkTitle" | "BookmarkLevel" | "BookmarkPageNumber"), _) -> true | _ -> false) kvpairs + in + (* 1. Add/Replace info keys *) + let kvpairs = + map + (function [(_, k); (_, v)] -> k, v | _ -> error "Mismatched info Key/Value pairs") + (splitinto 2 infolines) + in + let pdf = + {pdf with Pdf.trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" + (Pdf.Dictionary + (fold_left + (fun d (k, v) -> add k v d) + (match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with | Some (Pdf.Dictionary d) -> d | _ -> []) + (map (function (k, v) -> "/" ^ k, Pdf.String (unicode_of_html v)) kvpairs)))} + in + (* 2. Add/Replace PDF Id *) + let pdf = + let unhex s = + match Pdfread.lex_hexstring (Pdfio.input_of_string ("<" ^ s ^ ">")) with + | Pdfgenlex.LexString s -> s + | _ -> error "PDFId wrongly formed in update_info file" + in + match pdfidlines with + | ["PdfID0", a; "PdfID1", b] -> + {pdf with Pdf.trailerdict = + Pdf.add_dict_entry pdf.Pdf.trailerdict "/ID" (Pdf.Array [Pdf.String (unhex a); Pdf.String (unhex b)])} + | _ -> pdf + in + (* 3. Replace Bookmarks *) + let marks = + map + (function + | [("BookmarkTitle", a); ("BookmarkLevel", b); ("BookmarkPageNumber", c)] -> + {Pdfmarks.level = int_of_string b - 1; + Pdfmarks.text = unicode_of_html a; + Pdfmarks.target = Pdfpage.target_of_pagenumber pdf (int_of_string c); + Pdfmarks.isopen = false} + | _ -> error "Bookmark entries malformed in update_info file") + (splitinto 3 bookmarklines) + in + let pdf = Pdfmarks.add_bookmarks marks pdf in + begin match source with "use-stdin" -> () | _ -> close_in channel end; + pdf + +(* If pages in stamp < pages in main, extend stamp by repeating its last page. If pages in stamp more, chop stamp *) +let equalize_pages_extend main stamp = + let length_stamp = Pdfpage.endpage stamp + in let length_main = Pdfpage.endpage main + in let extend_lastpage lastpage page len = + Pdfpage.change_pages true page (Pdfpage.pages_of_pagetree page @ (many lastpage len)) + in let chop pdf n = + Pdfpage.change_pages true pdf (take (Pdfpage.pages_of_pagetree pdf) n) + in + if length_stamp > length_main + then chop stamp length_main + else extend_lastpage (last (Pdfpage.pages_of_pagetree stamp)) stamp (length_main - length_stamp) + +let remove_unused_resources_page pdf n page = + let xobjects, all_names = + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary d) -> Pdf.Dictionary d, map fst d + | _ -> Pdf.Dictionary [], [] + in + let names_to_keep = + option_map + (function Pdfops.Op_Do n -> Some n | _ -> None) + (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) + in + let names_to_remove = lose (mem' names_to_keep) all_names in + let xobjdict = fold_left (Pdf.remove_dict_entry) xobjects names_to_remove in + {page with Pdfpage.resources = Pdf.add_dict_entry page.Pdfpage.resources "/XObject" xobjdict} + +let remove_unused_resources pdf = + Cpdf.process_pages (remove_unused_resources_page pdf) pdf (ilist 1 (Pdfpage.endpage pdf)) + +let extract_page_text pdf _ page = + let text_extractor = ref None in + fold_left ( ^ ) "" + (map + (function + | Pdfops.Op_Tf (fontname, _) -> + let fontdict = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | None -> raise (Pdf.PDFError "Missing /Font in text extraction") + | Some d -> + match Pdf.lookup_direct pdf fontname d with + | None -> raise (Pdf.PDFError "Missing font in text extraction") + | Some d -> d + in + text_extractor := Some (Pdftext.text_extractor_of_font pdf fontdict); + "" + | Pdfops.Op_Tj text when !text_extractor <> None -> + Pdftext.utf8_of_codepoints + (Pdftext.codepoints_of_text (unopt !text_extractor) text) + | Pdfops.Op_TJ (Pdf.Array objs) when !text_extractor <> None -> + fold_left ( ^ ) "" + (option_map + (function + | Pdf.String text -> + Some + (Pdftext.utf8_of_codepoints + (Pdftext.codepoints_of_text (unopt !text_extractor) text)) + | _ -> None) + objs) + | _ -> "") + (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)) + +(* For each page, extract all the ops with text in them, and concatenate it all together *) +let extract_text pdf range = + fold_left ( ^ ) "" + (Cpdf.map_pages (extract_page_text pdf) pdf range) + +(* Extracts font to font.dat in CWD. *) +let extract_fontfile pagenumber fontname pdf = + let resources = (select pagenumber (Pdfpage.pages_of_pagetree pdf)).Pdfpage.resources in + match Pdf.lookup_direct pdf "/Font" resources with + | None -> failwith "extract_fontfile: font not found" + | Some fonts -> + let fontobj = Pdf.lookup_fail "no /Fonts" pdf fontname fonts in + let font = Pdftext.read_font pdf fontobj in + match font with + | Pdftext.CIDKeyedFont (_, {Pdftext.cid_fontdescriptor = {Pdftext.fontfile = Some fontfile}}, _) + | Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.fontfile = Some fontfile}} -> + begin let objnum = + match fontfile with + | Pdftext.FontFile i -> i + | Pdftext.FontFile2 i -> i + | Pdftext.FontFile3 i -> i + in + match Pdf.lookup_obj pdf objnum with + | Pdf.Stream s as obj -> + Pdfcodec.decode_pdfstream pdf obj; + begin match s with + | {contents = (_, Pdf.Got bytes)} -> + let fh = open_out_bin "font.dat" in + for x = 0 to bytes_size bytes - 1 do output_byte fh (bget bytes x) done; + close_out fh; + (* Now try to read using Pdfcff module *) + (*let font = Pdftruetype.to_type3 pdf font in*) + (*let extractor = Pdftext.text_extractor_of_font pdf fontobj in*) + (*flprint "glyph names for incodes 0,1,2,3..."; + iter print_string (Pdftext.glyphnames_of_text extractor "\000\001\002\003\004\005\006\007"); + flprint "\n";*) + () + | _ -> failwith "extract_fontfile" + end + | _ -> failwith "extract_fontfile" + end + | _ -> failwith "unsupported or unfound font" + +let addrectangle (w, h) color position relative_to_cropbox underneath range pdf = + let addrectangle_page _ page = + page + in + Cpdf.process_pages addrectangle_page pdf range + +(* \section{Main function} *) +let go () = + match args.op with + | Some Version -> + flprint + ("cpdf Version " ^ string_of_int major_version ^ "." ^ string_of_int minor_version ^ " " ^ version_date ^ "\n") + | None | Some Merge -> + (* 14th Sept 2010 - changed to write_pdf false pdf because we're not supposed to change it.*) + begin match args.out, args.inputs with + | _, (_::_ as inputs) -> + let write_pdf x pdf = + match args.keep_this_id with + | None -> write_pdf x pdf + | Some s -> + (* get the ID from the file with name 's', and copy to pdf *) + let namewiths = + keep (function (InFile s', _, _, _, _) when s' = s -> true | _ -> false) inputs + in + match namewiths with + | (namewiths, _, _, _, _) as input::t -> + let spdf = get_pdf_from_input_kind input (Some Decrypt) namewiths in + write_pdf x (Cpdf.copy_id true spdf pdf) + | _ -> write_pdf x pdf + in + let names, ranges, rotations, _, _ = split5 inputs in + let pdfs = map2 (fun i -> get_pdf_from_input_kind i (Some Decrypt)) inputs names in + (* If at least one file had object streams and args.preserve_objstm is true, set -objstm-create *) + if args.preserve_objstm then + iter + (fun pdf -> + if Hashtbl.length pdf.Pdf.objects.Pdf.object_stream_ids > 0 + then args.create_objstm <- true) + pdfs; + begin match pdfs with + | [pdf] -> + (* FIXME Here, if the output file is different from the + input file, and we're just extracting pages, might we use a + lazy read? *) + (*Pdfwrite.debug_whole_pdf pdf;*) + if hd ranges <> "all" || hd rotations <> Pdfmerge.DNR || !Pdfpage.flat_pagetrees then + let pdf = if hd rotations <> Pdfmerge.DNR then prerotate_pdf pdf (hd rotations) else pdf in + let range = parse_pagespec pdf (hd ranges) in + let newpdf = Pdfpage.pdf_of_pages ~retain_numbering:args.retain_numbering pdf range in + write_pdf false newpdf + else + write_pdf false pdf + | _ -> + (* If args.keep_this_id is set, change the ID to the one from the kept one *) + let rangenums = map2 parse_pagespec pdfs ranges in + let outpdf = + Pdfmerge.merge_pdfs ~rotations args.retain_numbering args.remove_duplicate_fonts + (map string_of_input_kind names) pdfs rangenums + in + write_pdf false outpdf + end + | _ -> + match args.op with + | Some Merge -> + error "Merge: Must specify one output and at least one input" + | None -> + error "Must specify one output and at least one input" + | _ -> assert false + end + | Some RemoveUnusedResources -> + begin match args.inputs, args.out with + | _::_, _ -> + let pdf = get_single_pdf (Some RemoveUnusedResources) false in + let outpdf = remove_unused_resources pdf in + write_pdf true outpdf + | _ -> error "RemoveUnusedResources: bad command line" + end + | Some (CopyFont fromfile) -> + begin match args.inputs, args.out with + | (_, pagespec, _, u, o)::_, _ -> + let pdf = get_single_pdf (Some (CopyFont fromfile)) false + and frompdf = pdfread_pdf_of_file (optstring u) (optstring o) fromfile in + let range = parse_pagespec pdf pagespec in + let copyfontname = + match args.copyfontname with + | Some x -> x + | None -> failwith "copy_font: no font name given" + in + let outpdf = copy_font frompdf copyfontname args.copyfontpage range pdf in + write_pdf true outpdf + | _ -> error "copyfont: bad command line" + end + | Some RemoveFonts -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf (Some RemoveFonts) false in + write_pdf true (remove_fonts pdf) + | _ -> error "remove fonts: bad command line" + end + | Some ExtractFontFile -> + (*Graphics.open_graph " 1600x1050";*) + begin match args.inputs, args.out with + | (_, pagespec, _, u, o)::_, _ -> + let pdf = get_single_pdf (Some ExtractFontFile) false in + let page = args.copyfontpage + and name = + match args.copyfontname with + | Some x -> x + | None -> failwith "extract fontfile: no font name given" + in + extract_fontfile page name pdf + | _ -> error "extract fontfile: bad command line" + end + | Some CountPages -> + let pdf, inname, input = + match args.inputs with + | (InFile inname, _, _, u, o) as input::_ -> pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname), inname, input + | (StdIn, _, _, u, o) as input::_ -> pdf_of_stdin u o, "", input + | (AlreadyInMemory pdf, _, _, _, _) as input::_ -> pdf, "", input + | _ -> raise (Arg.Bad "cpdf: No input specified.\n") + in + let pdf = decrypt_if_necessary input (Some Info) pdf in + output_page_count pdf + | Some Clean -> + begin match args.out with + | (File _ | Stdout) -> + let pdf' = get_single_pdf (Some Clean) false in + write_pdf false pdf' + | _ -> error "Clean: No output specified" + end + | Some Info -> + (* Change as of 17th Sept 08 - now presents the pdf undecrypted so that + encryption info can be read out *) + let pdf, inname, input = + match args.inputs with + | (InFile inname, _, _, u, o) as input::_ -> + pdfread_pdf_of_channel_lazy (optstring u) (optstring o) (open_in_bin inname), inname, input + | (StdIn, _, _, u, o) as input::_ -> pdf_of_stdin u o, "", input + | (AlreadyInMemory pdf, _, _, _, _) as input::_ -> pdf, "", input + | _ -> raise (Arg.Bad "cpdf: No input specified.\n") + in + Printf.printf "Encryption: %s\n" (getencryption pdf); + Printf.printf "Permissions: %s\n" (getpermissions pdf); + if inname <> "" then + Printf.printf "Linearized: %b\n" (Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname))); + let pdf = decrypt_if_necessary input (Some Info) pdf in + Cpdf.output_info args.encoding pdf + | Some PageInfo -> + Cpdf.output_page_info (get_single_pdf (Some PageInfo) true) + | Some Metadata -> + Cpdf.print_metadata (get_single_pdf (Some Metadata) true) + | Some Fonts -> + Cpdf.print_fonts (get_single_pdf (Some Fonts) true) + | Some ListBookmarks -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf pagespec in + Cpdf.list_bookmarks args.encoding range pdf (Pdfio.output_of_channel stdout); + flush stdout + | _ -> error "list-bookmarks: bad command line" + end + | Some Crop -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let x, y, w, h = args.rectangle in + let pdf = get_single_pdf (Some Crop) false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.crop_pdf x y w h pdf range in + write_pdf false pdf + | _ -> error "crop: bad command line" + end + + | Some MediaBox -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let x, y, w, h = args.rectangle in + let pdf = get_single_pdf (Some MediaBox) false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.set_mediabox x y w h pdf range in + write_pdf false pdf + | _ -> error "set media box: bad command line" + end + | Some CopyBox -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf (Some CopyBox) false in + let range = parse_pagespec pdf pagespec in + let f, t = + begin match args.frombox, args.tobox with + | Some f, Some t -> f, t + | _ -> error "Copy box: no tobox or no frombox specified" + end + in + let pdf = copy_box f t args.mediabox_if_missing pdf range in + write_pdf false pdf + | _ -> error "Copy Box: bad command line" + end + | Some Decompress -> + let pdf = get_single_pdf (Some Decompress) true in + Pdf.iter_stream + (function stream -> + try Pdfcodec.decode_pdfstream_until_unknown pdf stream with + e -> Printf.eprintf "Decode failure: %s. Carrying on...\n" (Printexc.to_string e); ()) + pdf; + begin match args.out with + | NoOutputSpecified -> + error "no output specified" + | File outname -> + let outname = writing_ok outname in + Pdfwrite.pdf_to_file_options args.linearize None args.makenewid pdf outname + | Stdout -> + Pdfwrite.pdf_to_channel args.linearize None args.makenewid pdf stdout; + flush stdout + end + | Some Compress -> + let pdf = get_single_pdf (Some Compress) false in + if args.remove_duplicate_streams then + Pdfmerge.remove_duplicate_fonts pdf; (* Will eventually do more! *) + write_pdf false (Cpdf.recompress_pdf pdf) + | Some RemoveCrop -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf (Some RemoveCrop) false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.remove_cropping_pdf pdf range in + write_pdf false pdf + | _ -> error "remove-crop: bad command line" + end + | Some CopyCropBoxToMediaBox -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf (Some CopyCropBoxToMediaBox) false in + let range = parse_pagespec pdf pagespec in + let pdf = copy_cropbox_to_mediabox pdf range in + write_pdf false pdf + | _ -> error "remove-crop: bad command line" + end + | Some (Rotate _) | Some (Rotateby _) -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf pagespec in + let rotate = + match args.op with + | Some (Rotate i) -> Cpdf.rotate_pdf i + | Some (Rotateby i) -> Cpdf.rotate_pdf_by i + | _ -> assert false + in + let pdf = rotate pdf range in + write_pdf false pdf + | _ -> error "rotate: bad command line" + end + | Some (RotateContents a) -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.rotate_contents ~fast:args.fast a pdf range in + write_pdf false pdf + | _ -> error "rotate-contents: bad command line" + end + | Some Upright -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf pagespec in + let pdf = Cpdf.upright ~fast:args.fast range pdf in + write_pdf false pdf + | _ -> error "rotate-contents: bad command line" + end + | Some ((VFlip | HFlip) as flip) -> + begin match args.inputs, args.out with + | (_, pagespec, _, _, _)::_, _ -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf pagespec in + let pdf = + if flip = VFlip + then Cpdf.vflip_pdf ~fast:args.fast pdf range + else Cpdf.hflip_pdf ~fast:args.fast pdf range + in + write_pdf false pdf + | _ -> error "flip: bad command line" + end + | Some ((SetAuthor _ | SetTitle _ | SetSubject _ | SetKeywords _ + | SetCreate _ | SetModify _ | SetCreator _ | SetProducer _ + | SetTrapped | SetUntrapped) as op) -> + let key, value, version = + let f s = if args.encoding <> Cpdf.Raw then Pdftext.pdfdocstring_of_utf8 s else s in + match op with + | SetAuthor s -> "/Author", Pdf.String (f s), 0 + | SetTitle s -> "/Title", Pdf.String (f s), 1 + | SetSubject s -> "/Subject", Pdf.String (f s), 1 + | SetKeywords s -> "/Keywords", Pdf.String (f s), 1 + | SetCreate s -> "/CreationDate", Pdf.String (Cpdf.expand_date s), 0 + | SetModify s -> "/ModDate", Pdf.String (Cpdf.expand_date s), 0 + | SetCreator s -> "/Creator", Pdf.String (f s), 0 + | SetProducer s -> "/Producer", Pdf.String (f s), 0 + | SetTrapped -> "/Trapped", Pdf.Boolean true, 3 + | SetUntrapped -> "/Trapped", Pdf.Boolean false, 3 + | _ -> assert false + in + let pdf = get_single_pdf args.op false in + let version = if args.keepversion then pdf.Pdf.minor else version in + write_pdf false (Cpdf.set_pdf_info (key, value, version) pdf) + | Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _ + | FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) -> + begin match args.out with + | _ -> + let key, value, version = + match op with + | HideToolbar s -> "/HideToolbar", Pdf.Boolean s, 0 + | HideMenubar s -> "/HideMenubar", Pdf.Boolean s, 0 + | HideWindowUI s -> "/HideWindowUI", Pdf.Boolean s, 0 + | FitWindow s -> "/FitWindow", Pdf.Boolean s, 0 + | CenterWindow s -> "/CenterWindow", Pdf.Boolean s, 0 + | DisplayDocTitle s -> "/DisplayDocTitle", Pdf.Boolean s, 4 + | _ -> assert false + in + let pdf = get_single_pdf args.op false in + let version = if args.keepversion then pdf.Pdf.minor else version in + write_pdf false (Cpdf.set_viewer_preference (key, value, version) pdf) + end + | Some (SetMetadata metadata_file) -> + write_pdf false (Cpdf.set_metadata args.keepversion metadata_file (get_single_pdf args.op false)) + | Some (SetVersion v) -> + let pdf = get_single_pdf args.op false in + write_pdf false {pdf with Pdf.minor = v} + | Some (SetPageLayout s) -> + write_pdf false (Cpdf.set_page_layout (get_single_pdf args.op false) s) + | Some (SetPageMode s) -> + write_pdf false (Cpdf.set_page_mode (get_single_pdf args.op false) s) + | Some Split -> + begin match args.inputs, args.out with + | [(f, ranges, _, _, _)], File output_spec -> + let pdf = get_single_pdf args.op true + and filename = + match f with + | InFile n -> n + | _ -> "" + in + let enc = + match args.crypt_method with + | "" -> None + | _ -> + Some + {Pdfwrite.encryption_method = + (match args.crypt_method with + | "40bit" -> Pdfwrite.PDF40bit + | "128bit" -> Pdfwrite.PDF128bit + | "AES" -> Pdfwrite.AES128bit args.encrypt_metadata + | "AES256" -> Pdfwrite.AES256bit args.encrypt_metadata + | "AES256ISO" -> Pdfwrite.AES256bitISO args.encrypt_metadata + | _ -> assert false (* Pre-checked *)); + Pdfwrite.owner_password = args.owner; + Pdfwrite.user_password = args.user; + Pdfwrite.permissions = banlist_of_args ()} + in + Cpdf.split_pdf enc args.printf_format filename args.chunksize args.linearize nobble output_spec pdf + | _, Stdout -> error "Can't split to standard output" + | _, NoOutputSpecified -> error "Split: No output format specified" + | _ -> error "Split: bad parameters" + end + | Some Presentation -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + let pdf' = + Cpdf.presentation + range + args.transition args.duration args.horizontal + args.inward args.direction args.effect_duration pdf + in + pdf.Pdf.minor <- if args.keepversion then pdf.Pdf.minor else max pdf.Pdf.minor 1; + write_pdf false pdf' + | Some ChangeId -> + begin match args.inputs, args.out with + | [(k, _, _, _, _) as input], File s -> + let pdf = get_pdf_from_input_kind input args.op k in + let s = writing_ok s in + Pdfwrite.pdf_to_file_options + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize None true pdf s + | [(k, _, _, _, _) as input], Stdout -> + let pdf = get_pdf_from_input_kind input args.op k in + Pdfwrite.pdf_to_channel + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize None true pdf stdout; + flush stdout + | _ -> error "ChangeId: exactly one input file and output file required." + end + | Some RemoveId -> + let pdf = get_single_pdf args.op false in + pdf.Pdf.trailerdict <- Pdf.remove_dict_entry pdf.Pdf.trailerdict "/ID"; + write_pdf false pdf + | Some (CopyId getfrom) -> + begin match args.inputs with + | [(k, _, _, u, o) as input] -> + let pdf = + Cpdf.copy_id + args.keepversion + (pdfread_pdf_of_file (optstring u) (optstring o) getfrom) + (get_pdf_from_input_kind input args.op k) + in + write_pdf false pdf + | _ -> error "copy-id: No input file specified" + end + | Some (ThinLines w) -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.thinlines range w pdf) + | Some BlackText -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.blacktext range pdf) + | Some BlackLines -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.blacklines range pdf) + | Some BlackFills -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.blackfills range pdf) + | Some RemoveAnnotations -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.remove_annotations range pdf) + | Some (CopyAnnotations getfrom) -> + begin match args.inputs with + | [(k, _, _, u, o) as input] -> + let input_pdf = get_pdf_from_input_kind input args.op k in + let range = parse_pagespec input_pdf (get_pagespec ()) in + let pdf = + Cpdf.copy_annotations + range + (pdfread_pdf_of_file (optstring u) (optstring o) getfrom) + input_pdf + in + write_pdf false pdf + | _ -> error "copy-annotations: No input file specified" + end + | Some ListAnnotations -> + Cpdf.list_annotations args.encoding (get_single_pdf args.op true) + | Some ListAnnotationsMore -> + Cpdf.list_annotations_more (get_single_pdf args.op true) + | Some Shift -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + begin match args.coord with (dx, dy) -> + write_pdf false (Cpdf.shift_pdf ~fast:args.fast dx dy pdf range) + end + | Some Scale -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + begin match args.coord with (sx, sy) -> + write_pdf false (Cpdf.scale_pdf ~fast:args.fast sx sy pdf range) + end + | Some ScaleToFit -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + let x, y = args.coord + and scale = args.scale in + write_pdf false (Cpdf.scale_to_fit_pdf ~fast:args.fast scale x y args.op pdf range) + | Some (ScaleContents scale) -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.scale_contents ~fast:args.fast args.position scale pdf range) + | Some ListAttachedFiles -> + let pdf = get_single_pdf args.op false in + let names, numbers = split (Cpdf.list_attached_files pdf) in + iter2 (Printf.printf "%i %s\n") numbers names; + flprint "" + | Some DumpAttachedFiles -> + let pdf = get_single_pdf args.op false in + begin match args.out with + | NoOutputSpecified -> dump_attached_files pdf "" + | File n -> dump_attached_files pdf n + | Stdout -> error "Can't dump attachments to stdout" + end + | Some RemoveAttachedFiles -> + write_pdf false (Cpdf.remove_attached_files (get_single_pdf args.op false)) + | Some (AttachFile files) -> + begin match args.inputs with + | [(k, _, _, _, _) as input] -> + let pdf = get_pdf_from_input_kind input args.op k in + (* Convert from string to int *) + let topage = + try + match args.topage with + | None -> None + | Some "end" -> Some (Pdfpage.endpage pdf) + | Some s -> Some (int_of_string s) + with _ -> error "Bad -to-page" + in + let pdf = fold_left (Cpdf.attach_file args.keepversion topage) pdf (rev files) in + write_pdf false pdf + | _ -> error "attach file: No input file specified" + end + | Some (SplitOnBookmarks level) -> + begin match args.out with + | File output_spec -> + let pdf = get_single_pdf args.op false + and filename = + match args.inputs with + | [(InFile f, _, _, _, _)] -> f + | _ -> "" + in + Cpdf.split_at_bookmarks filename args.linearize nobble level output_spec pdf + | Stdout -> error "Can't split to standard output" + | NoOutputSpecified -> error "Split: No output format specified" + end + | Some PadBefore -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.padbefore range pdf) + | Some PadAfter -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.padafter range pdf) + | Some (PadEvery n) -> + let pdf = get_single_pdf args.op false in + let range = + match keep (function m -> m mod n = 0) (ilist 1 (Pdfpage.endpage pdf)) with + | [] -> [] + | l -> if last l = Pdfpage.endpage pdf then all_but_last l else l + in + write_pdf false (Cpdf.padafter range pdf) + | Some (PadMultiple n) -> + let pdf = get_single_pdf args.op false in + write_pdf false (Cpdf.padmultiple n pdf) + | Some Draft -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.draft args.boxes range pdf) + | Some (AddText text) -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + let font = + match args.font with + | StandardFont f -> Some f + | OtherFont f -> None (* it's in fontname *) + in + let pdf = + if args.prerotate then Cpdf.upright ~fast:args.fast range pdf else pdf + and filename = + match args.inputs with + | (InFile inname, _, _, _, _)::_ -> inname + | _ -> "" + in + write_pdf false + (Cpdf.addtexts + false args.linewidth args.outline args.fast args.fontname font args.bates + args.color args.position args.linespacing args.fontsize + args.underneath text range args.orientation args.relative_to_cropbox args.opacity + args.justification args.midline filename pdf) + | Some RemoveText -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false (Cpdf.removetext range pdf) + | Some AddRectangle -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec pdf (get_pagespec ()) in + write_pdf false + (addrectangle + args.coord args.color args.position args.relative_to_cropbox args.underneath range pdf) + | Some (AddBookmarks file) -> + write_pdf false + (Cpdf.add_bookmarks true (Pdfio.input_of_channel (open_in_bin file)) + (get_single_pdf args.op false)) + | Some RemoveBookmarks -> + write_pdf false (Pdfmarks.remove_bookmarks (get_single_pdf args.op false)) + | Some TwoUp -> + write_pdf false (Cpdf.twoup (get_single_pdf args.op false)) + | Some TwoUpStack -> + write_pdf false (Cpdf.twoup_stack (get_single_pdf args.op false)) + | Some (StampOn over) -> + let overpdf = + match over with + | "stamp_use_stdin" -> pdf_of_stdin "" "" + | x -> pdfread_pdf_of_file None None x + in + let overpdf = if args.uprightstamp then Cpdf.upright ~fast:args.fast (ilist 1 (Pdfpage.endpage overpdf)) overpdf else overpdf in + let pdf = get_single_pdf args.op false in + let marks = Pdfmarks.read_bookmarks pdf in + let range = parse_pagespec pdf (get_pagespec ()) in + let pdf = + if args.ismulti + then + let overpdf = equalize_pages_extend pdf overpdf in + Cpdf.combine_pages args.fast pdf overpdf true false false + else + Cpdf.stamp args.fast args.scale_stamp_to_fit true range overpdf pdf + in + write_pdf false (Pdfmarks.add_bookmarks marks pdf) + | Some (StampUnder under) -> + let underpdf = + match under with + | "stamp_use_stdin" -> pdf_of_stdin "" "" + | x -> pdfread_pdf_of_file None None x + in + let underpdf = if args.uprightstamp then Cpdf.upright ~fast:args.fast (ilist 1 (Pdfpage.endpage underpdf)) underpdf else underpdf in + let pdf = get_single_pdf args.op false in + let marks = Pdfmarks.read_bookmarks pdf in + let range = parse_pagespec pdf (get_pagespec ()) in + let pdf = + if args.ismulti + then + let underpdf = equalize_pages_extend pdf underpdf in + Cpdf.combine_pages args.fast pdf underpdf true true false + else Cpdf.stamp args.fast args.scale_stamp_to_fit false range underpdf pdf + in + write_pdf false (Pdfmarks.add_bookmarks marks pdf) + | Some (CombinePages over) -> + write_pdf false + (Cpdf.combine_pages args.fast (get_single_pdf args.op false) (pdfread_pdf_of_file None None over) false false true) + | Some Encrypt -> + let pdf = get_single_pdf args.op false in + let pdf = Cpdf.recompress_pdf <| nobble pdf + and encryption = + {Pdfwrite.encryption_method = + (match args.crypt_method with + | "40bit" -> Pdfwrite.PDF40bit + | "128bit" -> Pdfwrite.PDF128bit + | "AES" -> Pdfwrite.AES128bit args.encrypt_metadata + | "AES256" -> Pdfwrite.AES256bit args.encrypt_metadata + | "AES256ISO" -> Pdfwrite.AES256bitISO args.encrypt_metadata + | _ -> assert false (* Pre-checked *)); + Pdfwrite.owner_password = args.owner; + Pdfwrite.user_password = args.user; + Pdfwrite.permissions = banlist_of_args ()} + in + Pdf.remove_unreferenced pdf; + if not args.keepversion then + begin + let newversion = + match args.crypt_method with + "40bit" -> 1 | "128bit" -> 4 | "AES" -> 6 | "AES256" | "AES256ISO" -> 7 | _ -> 0 + in + let newversion = if args.create_objstm then 5 else newversion in + pdf.Pdf.minor <- max pdf.Pdf.minor newversion + end; + begin match args.out with + | NoOutputSpecified -> + error "encrypt: no output specified" + | File outname -> + let outname = writing_ok outname in + Pdfwrite.pdf_to_file_options + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize (Some encryption) args.makenewid pdf outname + | Stdout -> + Pdfwrite.pdf_to_channel + ~preserve_objstm:args.preserve_objstm + ~generate_objstm:args.create_objstm + args.linearize (Some encryption) args.makenewid pdf stdout; + flush stdout; + end + | Some Decrypt -> + write_pdf false (get_single_pdf args.op false) + | Some RemoveMetadata -> + write_pdf false (Cpdf.remove_metadata (get_single_pdf args.op false)) + | Some ExtractImages -> + let output_spec = + begin match args.out with + | File output_spec -> output_spec + | _ -> "" + end + in + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + extract_images pdf range output_spec + | Some (ImageResolution f) -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + image_resolution pdf range f + | Some MissingFonts -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + missing_fonts pdf range + | Some CSP1 -> + write_pdf false (Cpdf.custom_csp1 (get_single_pdf (Some CSP1) false)) + | Some (CSP2 f) -> + write_pdf false (Cpdf.custom_csp2 f (get_single_pdf (Some (CSP2 f)) false)) + | Some CSP3 -> + begin match args.inputs with + | [InFile s, _, _, _, _] -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + calculate_margins s pdf range + | _ -> + Printf.eprintf "CSP3: Too many input files or input not a file" + end + | Some DumpData -> + let pdf = get_single_pdf args.op true in + dump_data pdf args.out + | Some (UpdateInfo source) -> + let pdf = get_single_pdf args.op false in + write_pdf false (update_info pdf source) + | Some ExtractText -> + let pdf = get_single_pdf args.op true in + let range = parse_pagespec pdf (get_pagespec ()) in + let text = extract_text pdf range in + begin match args.out with + | File filename -> + let fh = open_out_bin filename in + output_string fh text; + close_out fh + | NoOutputSpecified | Stdout -> + print_string text; + print_newline () + end + | Some PrintLinearization -> + begin match args.inputs with + | (InFile inname, _, _, u, o)::_ -> + Pdfread.print_linearization (Pdfio.input_of_channel (open_in_bin inname)) + | _ -> raise (Arg.Bad "-print-linearization: supply a single file name") + end + +let parse_argv () = + Arg.parse_argv ~current:(ref 0) + +let align_specs s = + Arg.align s + +(* Main function. *) +let go_withargv argv = + if demo then + flprint "This demo is for evaluation only. http://www.coherentpdf.com/\n"; + try + (* Split the arguments into sets either side of ANDs *) + let sets = + let args = + (map (fun l -> "cpdf"::l) (split_around (eq "AND") (tl (Array.to_list argv)))) + in + match args with + | [] -> [] + | _ -> combine (map Array.of_list args) (map (eq (length args)) (ilist 1 (length args))) + in + iter + (fun (s, islast) -> + (*Printf.printf "AND:%b, %s\n" islast (Array.fold_left (fun x y -> x ^ " " ^ y) "" s); + flprint "\n";*) + reset_arguments (); + parse_argv () s (align_specs specs) anon_fun usage_msg; + parse_argv () (Array.of_list ("cpdf"::!control_args)) (align_specs specs) anon_fun usage_msg; + let addrange pdf = AlreadyInMemory pdf, args.dashrange, Pdfmerge.DNR, "", "" in + args.inputs <- rev (map addrange !output_pdfs) @ rev args.inputs; + output_pdfs := []; + go ()) + sets; + flush stdout (*r for Windows *) + with + | Arg.Bad s -> + let s' = + let chars = explode s in + implode (takewhile (neq '\n') chars) ^ " Use -help for help.\n\n" + in + prerr_string s'; + flush stderr; + exit 2 + | Arg.Help _ -> + Arg.usage (align_specs specs) usage_msg; + flush stderr (*r for Windows *) + | Sys_error s as e -> + prerr_string (s ^ "\n\n"); + flush stderr; + if args.debug then raise e else exit 2 + | Pdf.PDFError s as e -> + prerr_string + ("cpdf encountered an error. Technical details follow:\n\n" ^ s ^ "\n\n"); + flush stderr; + if args.debug then raise e else exit 2 + | Cpdf.SoftError s -> soft_error s + | Cpdf.HardError s -> error s + | e -> + prerr_string + ("cpdf encountered an unexpected error. Technical Details follow:\n" ^ + Printexc.to_string e ^ "\n\n"); + flush stderr; + if args.debug then raise e else exit 2 + +let go () = go_withargv Sys.argv + diff --git a/cpdfcommand.mli b/cpdfcommand.mli new file mode 100644 index 0000000..526661f --- /dev/null +++ b/cpdfcommand.mli @@ -0,0 +1,7 @@ +(* cpdfcommand.mli *) +val go : unit -> unit + +val go_withargv : string array -> unit + +val demo : bool + diff --git a/cpdfcommandrun.ml b/cpdfcommandrun.ml new file mode 100644 index 0000000..2a43a43 --- /dev/null +++ b/cpdfcommandrun.ml @@ -0,0 +1,2 @@ +let _ = Cpdfcommand.go () + diff --git a/cpdfstrftime.ml b/cpdfstrftime.ml new file mode 100644 index 0000000..03cd8fa --- /dev/null +++ b/cpdfstrftime.ml @@ -0,0 +1,93 @@ +(* C-Style strftime *) +open Pdfutil + +let strf_A t = + match t.Unix.tm_wday with + | 0 -> "Sunday" | 1 -> "Monday" | 2 -> "Tuesday" + | 3 -> "Wednesday" | 4 -> "Thursday" | 5 -> "Friday" + | 6 -> "Saturday" + | _ -> "strf_AFail" + +let strf_a t = + String.sub (strf_A t) 0 3 + +let strf_B t = + match t.Unix.tm_mon with + | 0 -> "January" | 1 -> "February" | 2 -> "March" | 3 -> "April" + | 4 -> "May" | 5 -> "June" | 6 -> "July" | 7 -> "August" + | 8 -> "September" | 9 -> "October" | 10 -> "November" + | 11 -> "December" | _ -> "strf_Bfail" + +let strf_b t = + String.sub (strf_B t) 0 3 + +let strf_d t = + let s = string_of_int t.Unix.tm_mday in + if String.length s = 1 then "0" ^ s else s + +let strf_e t = + let s = string_of_int t.Unix.tm_mday in + if String.length s = 1 then " " ^ s else s + +let strf_H t = + let s = string_of_int t.Unix.tm_hour in + if String.length s = 1 then "0" ^ s else s + +let strf_I t = + let s = string_of_int (t.Unix.tm_hour mod 12) in + if String.length s = 1 then "0" ^ s else s + +let strf_j t = + let s = string_of_int t.Unix.tm_yday in + match String.length s with + | 1 -> "00" ^ s + | 2 -> "0" ^ s + | _ -> s + +let strf_m t = + let s = string_of_int (t.Unix.tm_mon + 1) in + if String.length s = 1 then "0" ^ s else s + +let strf_M t = + let s = string_of_int t.Unix.tm_min in + if String.length s = 1 then "0" ^ s else s + +let strf_p t = + if t.Unix.tm_hour >= 12 then "p.m" else "a.m" + +let strf_S t = + let s = string_of_int t.Unix.tm_sec in + if String.length s = 1 then "0" ^ s else s + +let strf_T t = + strf_H t ^ ":" ^ strf_M t ^ ":" ^ strf_S t + +let strf_u t = + match t.Unix.tm_wday with + | 0 -> "7" + | n -> string_of_int (n + 1) + +let strf_w t = + string_of_int t.Unix.tm_wday + +let strf_Y t = + string_of_int (t.Unix.tm_year + 1900) + +let strf_percent _ = "%" + +let strftime_pairs = + ["%a", strf_a; "%A", strf_A; "%b", strf_b; "%B", strf_B; + "%d", strf_d; "%e", strf_e; "%H", strf_H; + "%I", strf_I; "%j", strf_j; "%m", strf_m; "%M", strf_M; + "%p", strf_p; "%S", strf_S; "%T", strf_T; "%u", strf_u; + "%w", strf_w; "%Y", strf_Y; "%%", strf_percent] + +let strftime text = + let time = Unix.localtime (Unix.gettimeofday ()) in + let text = ref text in + iter + (fun (search, replace_fun) -> + text := string_replace_all search (replace_fun time) !text) + strftime_pairs; + !text + diff --git a/cpdfstrftime.mli b/cpdfstrftime.mli new file mode 100644 index 0000000..e9a366a --- /dev/null +++ b/cpdfstrftime.mli @@ -0,0 +1,2 @@ +(* C-style strftime *) +val strftime : string -> string