mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-02-20 05:40:41 +01:00
more
This commit is contained in:
parent
3f6cc82efa
commit
37f900042e
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
|||||||
# Build the cpdf command line tools and top level
|
# Build the cpdf command line tools and top level
|
||||||
MODS = cpdfyojson cpdfxmlm \
|
MODS = cpdfyojson cpdfxmlm \
|
||||||
cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
||||||
cpdfpagespec cpdfposition cpdf cpdfpresent cpdffont cpdftype \
|
cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdf cpdffont cpdftype \
|
||||||
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfcommand
|
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfcommand
|
||||||
|
|
||||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||||
|
675
cpdf.ml
675
cpdf.ml
@ -10,67 +10,11 @@ type color =
|
|||||||
|
|
||||||
let debug = ref false
|
let debug = ref false
|
||||||
|
|
||||||
let xmp_template =
|
|
||||||
{|<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
|
||||||
|
|
||||||
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
|
||||||
xmlns:iX='http://ns.adobe.com/iX/1.0/'>
|
|
||||||
|
|
||||||
<rdf:Description about=''
|
|
||||||
xmlns='http://ns.adobe.com/pdf/1.3/'
|
|
||||||
xmlns:pdf='http://ns.adobe.com/pdf/1.3/'>
|
|
||||||
<pdf:CreationDate>CREATEDATE</pdf:CreationDate>
|
|
||||||
<pdf:ModDate>MODDATE</pdf:ModDate>
|
|
||||||
<pdf:Producer>PRODUCER</pdf:Producer>
|
|
||||||
<pdf:Creator>CREATOR</pdf:Creator>
|
|
||||||
<pdf:Title>TITLE</pdf:Title>
|
|
||||||
<pdf:Subject>SUBJECT</pdf:Subject>
|
|
||||||
<pdf:Author>AUTHOR</pdf:Author>
|
|
||||||
<pdf:Keywords>KEYWORDS</pdf:Keywords>
|
|
||||||
<pdf:Trapped>TRAPPED</pdf:Trapped>
|
|
||||||
</rdf:Description>
|
|
||||||
|
|
||||||
<rdf:Description about=''
|
|
||||||
xmlns='http://ns.adobe.com/xap/1.0/'
|
|
||||||
xmlns:xap='http://ns.adobe.com/xap/1.0/'>
|
|
||||||
<xap:CreateDate>CREATEDATE</xap:CreateDate>
|
|
||||||
<xap:CreatorTool>CREATOR</xap:CreatorTool>
|
|
||||||
<xap:ModifyDate>MODDATE</xap:ModifyDate>
|
|
||||||
<xap:MetadataDate>METADATADATE</xap:MetadataDate>
|
|
||||||
</rdf:Description>
|
|
||||||
|
|
||||||
<rdf:Description about=''
|
|
||||||
xmlns='http://purl.org/dc/elements/1.1/'
|
|
||||||
xmlns:dc='http://purl.org/dc/elements/1.1/'>
|
|
||||||
<dc:title>TITLE</dc:title>
|
|
||||||
</rdf:Description>
|
|
||||||
|
|
||||||
</rdf:RDF>
|
|
||||||
|
|
||||||
<?xpacket end='r'?>|}
|
|
||||||
|
|
||||||
type encoding =
|
|
||||||
| Raw
|
|
||||||
| UTF8
|
|
||||||
| Stripped
|
|
||||||
|
|
||||||
(* Just strip everything which isn't 7 bit ASCII *)
|
|
||||||
let crude_de_unicode s =
|
|
||||||
implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s)))
|
|
||||||
|
|
||||||
let encode_output enc s =
|
|
||||||
match enc with
|
|
||||||
| Raw -> s
|
|
||||||
| UTF8 -> Pdftext.utf8_of_pdfdocstring s
|
|
||||||
| Stripped -> crude_de_unicode s
|
|
||||||
|
|
||||||
(* Get the number of pages in file. Doesn't need decryption. *)
|
(* Get the number of pages in file. Doesn't need decryption. *)
|
||||||
let endpage_io ?revision i user_pw owner_pw =
|
let endpage_io ?revision i user_pw owner_pw =
|
||||||
let pdf = Pdfread.pdf_of_input_lazy ?revision user_pw owner_pw i in
|
let pdf = Pdfread.pdf_of_input_lazy ?revision user_pw owner_pw i in
|
||||||
Pdfpage.endpage pdf
|
Pdfpage.endpage pdf
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
let print_pdf_objs pdf =
|
let print_pdf_objs pdf =
|
||||||
Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict);
|
Printf.printf "Trailerdict: %s\n" (Pdfwrite.string_of_pdf pdf.Pdf.trailerdict);
|
||||||
Printf.printf "Root: %i\n" pdf.Pdf.root;
|
Printf.printf "Root: %i\n" pdf.Pdf.root;
|
||||||
@ -103,14 +47,6 @@ let rec process_text time text m =
|
|||||||
| [] -> Cpdfstrftime.strftime ~time text
|
| [] -> Cpdfstrftime.strftime ~time text
|
||||||
| (s, r)::t -> process_text time (string_replace_all_lazy s r text) t
|
| (s, r)::t -> process_text time (string_replace_all_lazy s r text) t
|
||||||
|
|
||||||
let expand_date = function
|
|
||||||
| "now" ->
|
|
||||||
begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with
|
|
||||||
| Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S"
|
|
||||||
| _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S"
|
|
||||||
end
|
|
||||||
| x -> x
|
|
||||||
|
|
||||||
(* For uses of process_pages which don't need to deal with matrices, this
|
(* For uses of process_pages which don't need to deal with matrices, this
|
||||||
function transforms into one which returns the identity matrix *)
|
function transforms into one which returns the identity matrix *)
|
||||||
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
|
let ppstub f n p = (f n p, n, Pdftransform.i_matrix)
|
||||||
@ -205,17 +141,6 @@ let combine_pdf_resources pdf a b =
|
|||||||
(Pdf.Dictionary [])
|
(Pdf.Dictionary [])
|
||||||
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)
|
(unknown_keys_a @ unknown_keys_b @ combined_known_entries)
|
||||||
|
|
||||||
(* \section{Copy an /ID from one file to another} *)
|
|
||||||
let copy_id keepversion copyfrom copyto =
|
|
||||||
match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with
|
|
||||||
| None -> copyto (* error "Source PDF file has no /ID entry to copy from" *)
|
|
||||||
| Some id ->
|
|
||||||
copyto.Pdf.trailerdict <-
|
|
||||||
Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id;
|
|
||||||
copyto.Pdf.minor <-
|
|
||||||
if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1;
|
|
||||||
copyto
|
|
||||||
|
|
||||||
(* \section{Remove bookmarks} *)
|
(* \section{Remove bookmarks} *)
|
||||||
|
|
||||||
(* \section{Add bookmarks} *)
|
(* \section{Add bookmarks} *)
|
||||||
@ -377,155 +302,6 @@ let add_bookmarks ~json verify input pdf =
|
|||||||
(*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*)
|
(*iter (fun b -> flprint (Pdfmarks.string_of_bookmark b); flprint "\n") parsed;*)
|
||||||
Pdfmarks.add_bookmarks parsed pdf
|
Pdfmarks.add_bookmarks parsed pdf
|
||||||
|
|
||||||
(* \section{Set page mode} *)
|
|
||||||
let set_page_mode pdf s =
|
|
||||||
match s with
|
|
||||||
| "UseNone" | "UseOutlines" | "UseThumbs"
|
|
||||||
| "FullScreen" | "UseOC" | "UseAttachments" ->
|
|
||||||
begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
|
||||||
| Some catalog ->
|
|
||||||
let catalog' =
|
|
||||||
Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s))
|
|
||||||
in
|
|
||||||
let catalognum = Pdf.addobj pdf catalog' in
|
|
||||||
let trailerdict' =
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
|
|
||||||
in
|
|
||||||
{pdf with
|
|
||||||
Pdf.root = catalognum;
|
|
||||||
Pdf.trailerdict = trailerdict'}
|
|
||||||
| None -> error "bad root"
|
|
||||||
end
|
|
||||||
| _ -> error "Unknown page mode"
|
|
||||||
|
|
||||||
(* Set open action *)
|
|
||||||
let set_open_action pdf fit pagenumber =
|
|
||||||
if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then
|
|
||||||
raise (error "set_open_action: invalid page number")
|
|
||||||
else
|
|
||||||
let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in
|
|
||||||
let destination =
|
|
||||||
if fit then
|
|
||||||
Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"]
|
|
||||||
else
|
|
||||||
Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null]
|
|
||||||
in
|
|
||||||
let open_action =
|
|
||||||
Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")]
|
|
||||||
in
|
|
||||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
|
||||||
| Some catalog ->
|
|
||||||
let catalog' =
|
|
||||||
Pdf.add_dict_entry catalog "/OpenAction" open_action
|
|
||||||
in
|
|
||||||
let catalognum = Pdf.addobj pdf catalog' in
|
|
||||||
let trailerdict' =
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
|
|
||||||
in
|
|
||||||
{pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'}
|
|
||||||
| None -> error "bad root"
|
|
||||||
|
|
||||||
(* \section{Set viewer preferences} *)
|
|
||||||
let set_viewer_preference (key, value, version) pdf =
|
|
||||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
|
||||||
| Some catalog ->
|
|
||||||
let viewer_preferences =
|
|
||||||
match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
|
|
||||||
| Some d -> d
|
|
||||||
| None -> Pdf.Dictionary []
|
|
||||||
in
|
|
||||||
let viewer_preferences' =
|
|
||||||
Pdf.add_dict_entry viewer_preferences key value
|
|
||||||
in
|
|
||||||
let catalog' =
|
|
||||||
Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences'
|
|
||||||
in
|
|
||||||
let catalognum = Pdf.addobj pdf catalog' in
|
|
||||||
let trailerdict' =
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
|
|
||||||
in
|
|
||||||
{pdf with
|
|
||||||
Pdf.minor = max pdf.Pdf.minor version;
|
|
||||||
Pdf.root = catalognum;
|
|
||||||
Pdf.trailerdict = trailerdict'}
|
|
||||||
| None -> error "bad root"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(* \section{Set page layout} *)
|
|
||||||
let set_page_layout pdf s =
|
|
||||||
match s with
|
|
||||||
| "SinglePage" | "OneColumn" | "TwoColumnLeft"
|
|
||||||
| "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" ->
|
|
||||||
begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
|
||||||
| Some catalog ->
|
|
||||||
let catalog' =
|
|
||||||
Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s))
|
|
||||||
in
|
|
||||||
let catalognum = Pdf.addobj pdf catalog' in
|
|
||||||
let trailerdict' =
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
|
|
||||||
in
|
|
||||||
{pdf with
|
|
||||||
Pdf.root = catalognum;
|
|
||||||
Pdf.trailerdict = trailerdict'}
|
|
||||||
| None -> error "bad root"
|
|
||||||
end
|
|
||||||
| _ -> error "Unknown page layout"
|
|
||||||
|
|
||||||
(* \section{Set or replace metadata} *)
|
|
||||||
let set_metadata_from_bytes keepversion data pdf =
|
|
||||||
let metadata_stream =
|
|
||||||
Pdf.Stream
|
|
||||||
{contents =
|
|
||||||
(Pdf.Dictionary
|
|
||||||
["/Length", Pdf.Integer (bytes_size data);
|
|
||||||
"/Type", Pdf.Name "/Metadata";
|
|
||||||
"/Subtype", Pdf.Name "/XML"],
|
|
||||||
Pdf.Got data)}
|
|
||||||
in
|
|
||||||
let objnum = Pdf.addobj pdf metadata_stream in
|
|
||||||
let document_catalog =
|
|
||||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
|
||||||
| Some s -> s
|
|
||||||
| None -> error "Malformed PDF: No root."
|
|
||||||
in
|
|
||||||
let document_catalog' =
|
|
||||||
Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum)
|
|
||||||
in
|
|
||||||
let rootnum = Pdf.addobj pdf document_catalog' in
|
|
||||||
let trailerdict =
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)
|
|
||||||
in
|
|
||||||
{pdf with
|
|
||||||
Pdf.trailerdict = trailerdict;
|
|
||||||
Pdf.root = rootnum;
|
|
||||||
Pdf.minor =
|
|
||||||
if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor}
|
|
||||||
|
|
||||||
let set_metadata keepversion filename pdf =
|
|
||||||
let ch = open_in_bin filename in
|
|
||||||
let data = mkbytes (in_channel_length ch) in
|
|
||||||
for x = 0 to bytes_size data - 1 do
|
|
||||||
bset data x (input_byte ch)
|
|
||||||
done;
|
|
||||||
set_metadata_from_bytes keepversion data pdf
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(* \section{Remove metadata} *)
|
|
||||||
let remove_metadata pdf =
|
|
||||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
|
||||||
| None -> error "malformed file"
|
|
||||||
| Some root ->
|
|
||||||
let root' = Pdf.remove_dict_entry root "/Metadata" in
|
|
||||||
let rootnum = Pdf.addobj pdf root' in
|
|
||||||
{pdf with
|
|
||||||
Pdf.trailerdict =
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum);
|
|
||||||
Pdf.root =
|
|
||||||
rootnum}
|
|
||||||
|
|
||||||
(* List bookmarks *)
|
(* List bookmarks *)
|
||||||
let output_string_of_target pdf fastrefnums x =
|
let output_string_of_target pdf fastrefnums x =
|
||||||
match Pdfdest.pdfobject_of_destination x with
|
match Pdfdest.pdfobject_of_destination x with
|
||||||
@ -584,9 +360,9 @@ let list_bookmarks ~json encoding range pdf output =
|
|||||||
replace q bs q (replace nl bs n (replace bs bs bs codepoints))
|
replace q bs q (replace nl bs n (replace bs bs bs codepoints))
|
||||||
in
|
in
|
||||||
match encoding with
|
match encoding with
|
||||||
| UTF8 -> Pdftext.utf8_of_codepoints escaped
|
| Cpdfmetadata.UTF8 -> Pdftext.utf8_of_codepoints escaped
|
||||||
| Stripped -> process_stripped escaped
|
| Cpdfmetadata.Stripped -> process_stripped escaped
|
||||||
| Raw -> s
|
| Cpdfmetadata.Raw -> s
|
||||||
in
|
in
|
||||||
let bookmarks = Pdfmarks.read_bookmarks pdf in
|
let bookmarks = Pdfmarks.read_bookmarks pdf in
|
||||||
let refnums = Pdf.page_reference_numbers pdf in
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
@ -719,27 +495,6 @@ let hasbox pdf page boxname =
|
|||||||
| _ -> false
|
| _ -> false
|
||||||
|
|
||||||
|
|
||||||
(* Print metadata *)
|
|
||||||
let get_metadata pdf =
|
|
||||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
|
||||||
| None -> error "malformed file"
|
|
||||||
| Some root ->
|
|
||||||
match Pdf.lookup_direct pdf "/Metadata" root with
|
|
||||||
| Some ((Pdf.Stream _) as s) ->
|
|
||||||
Pdfcodec.decode_pdfstream pdf s;
|
|
||||||
begin match s with
|
|
||||||
| Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data
|
|
||||||
| _ -> assert false
|
|
||||||
end
|
|
||||||
| _ -> None
|
|
||||||
|
|
||||||
let print_metadata pdf =
|
|
||||||
match get_metadata pdf with
|
|
||||||
None -> ()
|
|
||||||
| Some data ->
|
|
||||||
for x = 0 to bytes_size data - 1 do
|
|
||||||
Printf.printf "%c" (char_of_int (bget data x))
|
|
||||||
done
|
|
||||||
|
|
||||||
(* List fonts *)
|
(* List fonts *)
|
||||||
let list_font pdf page (name, dict) =
|
let list_font pdf page (name, dict) =
|
||||||
@ -1728,7 +1483,7 @@ let stamp relative_to_cropbox position topline midline fast scale_to_fit isover
|
|||||||
let merged =
|
let merged =
|
||||||
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
|
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
|
||||||
in
|
in
|
||||||
let merged = copy_id true pdf merged in
|
let merged = Cpdfmetadata.copy_id true pdf merged in
|
||||||
let merged_pages = Pdfpage.pages_of_pagetree merged in
|
let merged_pages = Pdfpage.pages_of_pagetree merged in
|
||||||
let under_pages, over_page =
|
let under_pages, over_page =
|
||||||
all_but_last merged_pages, last merged_pages
|
all_but_last merged_pages, last merged_pages
|
||||||
@ -1819,7 +1574,7 @@ let stamp_as_xobject pdf range over =
|
|||||||
let merged =
|
let merged =
|
||||||
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
|
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
|
||||||
in
|
in
|
||||||
let merged = copy_id true pdf merged in
|
let merged = Cpdfmetadata.copy_id true pdf merged in
|
||||||
let merged_pages = Pdfpage.pages_of_pagetree merged in
|
let merged_pages = Pdfpage.pages_of_pagetree merged in
|
||||||
let under_pages, over_page =
|
let under_pages, over_page =
|
||||||
all_but_last merged_pages, last merged_pages
|
all_but_last merged_pages, last merged_pages
|
||||||
@ -2185,7 +1940,7 @@ let scale_contents ?(fast=false) position scale pdf range =
|
|||||||
(* \section{List annotations} *)
|
(* \section{List annotations} *)
|
||||||
let get_annotation_string encoding pdf annot =
|
let get_annotation_string encoding pdf annot =
|
||||||
match Pdf.lookup_direct pdf "/Contents" annot with
|
match Pdf.lookup_direct pdf "/Contents" annot with
|
||||||
| Some (Pdf.String s) -> encode_output encoding s
|
| Some (Pdf.String s) -> Cpdfmetadata.encode_output encoding s
|
||||||
| _ -> ""
|
| _ -> ""
|
||||||
|
|
||||||
let print_annotation encoding pdf num s =
|
let print_annotation encoding pdf num s =
|
||||||
@ -2696,415 +2451,6 @@ let twoup fast pdf =
|
|||||||
let pdf = upright all (rotate_pdf ~-90 pdf all) in
|
let pdf = upright all (rotate_pdf ~-90 pdf all) in
|
||||||
scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all
|
scale_to_fit_pdf ~fast Cpdfposition.Diagonal 1. (many (width, height) endpage) () pdf all
|
||||||
|
|
||||||
(* \section{Output info} *)
|
|
||||||
let get_info raw pdf =
|
|
||||||
let infodict =
|
|
||||||
match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
|
||||||
| Some infodict -> infodict
|
|
||||||
| _ -> Pdf.Dictionary []
|
|
||||||
in
|
|
||||||
let getstring name =
|
|
||||||
match Pdf.lookup_direct pdf name infodict with
|
|
||||||
| Some (Pdf.String s) ->
|
|
||||||
if raw then s else crude_de_unicode s
|
|
||||||
| Some (Pdf.Boolean false) -> "False"
|
|
||||||
| Some (Pdf.Boolean true) -> "True"
|
|
||||||
| _ -> if name = "/Trapped" then "False" else ""
|
|
||||||
in
|
|
||||||
getstring
|
|
||||||
|
|
||||||
let get_info_utf8 pdf =
|
|
||||||
let infodict =
|
|
||||||
match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
|
||||||
| Some infodict -> infodict
|
|
||||||
| _ -> Pdf.Dictionary []
|
|
||||||
in
|
|
||||||
(function name ->
|
|
||||||
match Pdf.lookup_direct pdf name infodict with
|
|
||||||
| Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
|
|
||||||
| Some (Pdf.Boolean false) -> "False"
|
|
||||||
| Some (Pdf.Boolean true) -> "True"
|
|
||||||
| _ -> if name = "/Trapped" then "False" else "")
|
|
||||||
|
|
||||||
let getstring encoding pdf =
|
|
||||||
match encoding with
|
|
||||||
| Raw -> get_info true pdf
|
|
||||||
| Stripped -> get_info false pdf
|
|
||||||
| UTF8 -> get_info_utf8 pdf
|
|
||||||
|
|
||||||
let output_info encoding pdf =
|
|
||||||
let getstring = getstring encoding pdf in
|
|
||||||
Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor;
|
|
||||||
Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf);
|
|
||||||
Printf.printf "Title: %s\n" (getstring "/Title");
|
|
||||||
Printf.printf "Author: %s\n" (getstring "/Author");
|
|
||||||
Printf.printf "Subject: %s\n" (getstring "/Subject");
|
|
||||||
Printf.printf "Keywords: %s\n" (getstring "/Keywords");
|
|
||||||
Printf.printf "Creator: %s\n" (getstring "/Creator");
|
|
||||||
Printf.printf "Producer: %s\n" (getstring "/Producer");
|
|
||||||
Printf.printf "Created: %s\n" (getstring "/CreationDate");
|
|
||||||
Printf.printf "Modified: %s\n" (getstring "/ModDate");
|
|
||||||
Printf.printf "Trapped: %s\n" (getstring "/Trapped")
|
|
||||||
|
|
||||||
type xmltree =
|
|
||||||
E of Cpdfxmlm.tag * xmltree list
|
|
||||||
| D of string
|
|
||||||
|
|
||||||
let xmltree_of_bytes b =
|
|
||||||
let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in
|
|
||||||
let el tag childs = E (tag, childs)
|
|
||||||
and data d = D d in
|
|
||||||
Cpdfxmlm.input_doc_tree ~el ~data i
|
|
||||||
|
|
||||||
let bytes_of_xmltree t =
|
|
||||||
let buf = Buffer.create 1024 in
|
|
||||||
let o = Cpdfxmlm.make_output (`Buffer buf) in
|
|
||||||
let frag = function
|
|
||||||
E (tag, childs) -> `El (tag, childs)
|
|
||||||
| D d -> `Data d
|
|
||||||
in
|
|
||||||
Cpdfxmlm.output_doc_tree frag o t;
|
|
||||||
bytes_of_string (Buffer.contents buf)
|
|
||||||
|
|
||||||
let rec string_of_xmltree = function
|
|
||||||
D d ->
|
|
||||||
Printf.sprintf "DATA {%s}" d
|
|
||||||
| E (tag, trees) ->
|
|
||||||
Printf.sprintf "ELT (%s, %s)"
|
|
||||||
(string_of_tag tag)
|
|
||||||
(string_of_xmltrees trees)
|
|
||||||
|
|
||||||
and string_of_tag ((n, n'), attributes) =
|
|
||||||
Printf.sprintf
|
|
||||||
"NAME |%s| |%s|, ATTRIBUTES {%s}" n n'
|
|
||||||
(string_of_attributes attributes)
|
|
||||||
|
|
||||||
and string_of_attribute ((n, n'), str) =
|
|
||||||
Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str
|
|
||||||
|
|
||||||
and string_of_attributes attrs =
|
|
||||||
fold_left
|
|
||||||
(fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs)
|
|
||||||
|
|
||||||
and string_of_xmltrees trees =
|
|
||||||
fold_left
|
|
||||||
(fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
|
|
||||||
|
|
||||||
let adobe = "http://ns.adobe.com/pdf/1.3/"
|
|
||||||
|
|
||||||
let xmp = "http://ns.adobe.com/xap/1.0/"
|
|
||||||
|
|
||||||
let dc = "http://purl.org/dc/elements/1.1/"
|
|
||||||
|
|
||||||
let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
||||||
|
|
||||||
let combine_with_spaces strs =
|
|
||||||
String.trim
|
|
||||||
(fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs)
|
|
||||||
|
|
||||||
(* Collect all <li> elements inside a seq, bag, or alt. Combine with commas. If
|
|
||||||
none found, return empty string instead. *)
|
|
||||||
let collect_list_items = function
|
|
||||||
E (((n, n'), _), elts) when
|
|
||||||
n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag")
|
|
||||||
->
|
|
||||||
combine_with_spaces
|
|
||||||
(option_map
|
|
||||||
(function
|
|
||||||
E (((n, n'), _), [D d]) when n = rdf && n' = "li" ->
|
|
||||||
Some d
|
|
||||||
| _ -> None)
|
|
||||||
elts)
|
|
||||||
| _ -> ""
|
|
||||||
|
|
||||||
let collect_list_items_all all =
|
|
||||||
match keep (function E _ -> true | _ -> false) all with
|
|
||||||
h::_ -> Some (collect_list_items h)
|
|
||||||
| [] -> None
|
|
||||||
|
|
||||||
let rec get_data_for namespace name = function
|
|
||||||
D _ -> None
|
|
||||||
| E (((n, n'), _), [D d]) when n = namespace && n' = name ->
|
|
||||||
Some d
|
|
||||||
| E (((n, n'), _), e) when n = namespace && n' = name ->
|
|
||||||
collect_list_items_all e
|
|
||||||
| E (_, l) ->
|
|
||||||
match option_map (get_data_for namespace name) l with
|
|
||||||
x :: _ -> Some x
|
|
||||||
| _ -> None
|
|
||||||
|
|
||||||
let output_xmp_info encoding pdf =
|
|
||||||
let print_out tree title namespace name =
|
|
||||||
match get_data_for namespace name tree with
|
|
||||||
None -> ()
|
|
||||||
| Some data ->
|
|
||||||
Printf.printf "%s: " title;
|
|
||||||
print_endline data
|
|
||||||
in
|
|
||||||
match get_metadata pdf with
|
|
||||||
None -> ()
|
|
||||||
| Some metadata ->
|
|
||||||
try
|
|
||||||
let dtd, tree = xmltree_of_bytes metadata in
|
|
||||||
print_out tree "XMP pdf:Keywords" adobe "Keywords";
|
|
||||||
print_out tree "XMP pdf:Producer" adobe "Producer";
|
|
||||||
print_out tree "XMP pdf:Trapped" adobe "Trapped";
|
|
||||||
print_out tree "XMP pdf:Title" adobe "Title";
|
|
||||||
print_out tree "XMP pdf:Creator" adobe "Creator";
|
|
||||||
print_out tree "XMP pdf:Subject" adobe "Subject";
|
|
||||||
print_out tree "XMP pdf:Author" adobe "Author";
|
|
||||||
print_out tree "XMP pdf:CreationDate" adobe "CreationDate";
|
|
||||||
print_out tree "XMP pdf:ModDate" adobe "ModDate";
|
|
||||||
print_out tree "XMP xmp:CreateDate" xmp "CreateDate";
|
|
||||||
print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool";
|
|
||||||
print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate";
|
|
||||||
print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate";
|
|
||||||
print_out tree "XMP dc:title" dc "title";
|
|
||||||
print_out tree "XMP dc:creator" dc "creator";
|
|
||||||
print_out tree "XMP dc:subject" dc "subject";
|
|
||||||
print_out tree "XMP dc:description" dc "description"
|
|
||||||
with
|
|
||||||
_ -> ()
|
|
||||||
|
|
||||||
(* Get XMP info equivalent of an old metadata field *)
|
|
||||||
let check = function
|
|
||||||
"/Title" -> [(adobe, "Title"); (dc, "title")]
|
|
||||||
| "/Author" -> [(adobe, "Author"); (dc, "creator")]
|
|
||||||
| "/Subject" -> [(adobe, "Subject"); (dc, "subject")]
|
|
||||||
| "/Keywords" -> [(adobe, "Keywords")]
|
|
||||||
| "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")]
|
|
||||||
| "/Producer" -> [(adobe, "Producer")]
|
|
||||||
| "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")]
|
|
||||||
| "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")]
|
|
||||||
| _ -> failwith "Cpdf.check_name not known"
|
|
||||||
|
|
||||||
let get_xmp_info pdf name =
|
|
||||||
let tocheck = check name in
|
|
||||||
match get_metadata pdf with
|
|
||||||
None -> ""
|
|
||||||
| Some metadata ->
|
|
||||||
try
|
|
||||||
let _, tree = xmltree_of_bytes metadata in
|
|
||||||
let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in
|
|
||||||
match lose (eq "") results with
|
|
||||||
x::_ -> x
|
|
||||||
| [] -> ""
|
|
||||||
with
|
|
||||||
_ -> ""
|
|
||||||
|
|
||||||
(* Set XMP info *)
|
|
||||||
let rec set_xml_field kind fieldname value = function
|
|
||||||
D data -> D data
|
|
||||||
| E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e <tag/> *)
|
|
||||||
E (((n, n'), m), [D value])
|
|
||||||
| E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts)
|
|
||||||
|
|
||||||
let set_pdf_info_xml kind fieldname value xmldata pdf =
|
|
||||||
let dtd, tree = xmltree_of_bytes xmldata in
|
|
||||||
let str =
|
|
||||||
match value with
|
|
||||||
Pdf.String s -> s
|
|
||||||
| Pdf.Boolean true -> "True"
|
|
||||||
| Pdf.Boolean false -> "False"
|
|
||||||
| _ -> failwith "set_pdf_info_xml: not a string"
|
|
||||||
in
|
|
||||||
let newtree = set_xml_field kind fieldname str tree in
|
|
||||||
bytes_of_xmltree (dtd, newtree)
|
|
||||||
|
|
||||||
let set_pdf_info_xml_many changes value xmldata pdf =
|
|
||||||
let xmldata = ref xmldata in
|
|
||||||
iter
|
|
||||||
(fun (kind, fieldname) ->
|
|
||||||
xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf)
|
|
||||||
changes;
|
|
||||||
!xmldata
|
|
||||||
|
|
||||||
|
|
||||||
(* \section{Set an entry in the /Info dictionary} *)
|
|
||||||
|
|
||||||
(* We must parse the date to get its components, then use strftime to build the
|
|
||||||
* new string in XMP format *)
|
|
||||||
|
|
||||||
type date =
|
|
||||||
{mutable year : int;
|
|
||||||
mutable month : int; (* 1 - 12 *)
|
|
||||||
mutable day : int; (* 1 - 31 *)
|
|
||||||
mutable hour : int; (* 0 - 23 *)
|
|
||||||
mutable minute : int; (* 0 - 59 *)
|
|
||||||
mutable second : int; (* 0 - 59 *)
|
|
||||||
mutable ut_relationship : int; (* -1, 0, +1 *)
|
|
||||||
mutable offset_hours : int; (* 0 - 59 *)
|
|
||||||
mutable offset_minutes : int (* 0 - 59 *)}
|
|
||||||
|
|
||||||
let default_date () =
|
|
||||||
{year = 0;
|
|
||||||
month = 1;
|
|
||||||
day = 1;
|
|
||||||
hour = 0;
|
|
||||||
minute = 0;
|
|
||||||
second = 0;
|
|
||||||
ut_relationship = 0;
|
|
||||||
offset_hours = 0;
|
|
||||||
offset_minutes = 0}
|
|
||||||
|
|
||||||
(* XMP date format is YYYY-MM-DDThh:mm:ssTZD *)
|
|
||||||
let make_xmp_date_from_components d =
|
|
||||||
let tzd =
|
|
||||||
if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else
|
|
||||||
(if d.ut_relationship >=0 then "+" else "-") ^
|
|
||||||
Printf.sprintf "%02i" d.offset_hours ^
|
|
||||||
":" ^
|
|
||||||
Printf.sprintf "%02i" d.offset_minutes
|
|
||||||
in
|
|
||||||
Cpdfstrftime.strftime
|
|
||||||
~time:{Cpdfstrftime._tm_sec = d.second;
|
|
||||||
Cpdfstrftime._tm_min = d.minute;
|
|
||||||
Cpdfstrftime._tm_hour = d.hour;
|
|
||||||
Cpdfstrftime._tm_mday = d.day;
|
|
||||||
Cpdfstrftime._tm_mon = d.month - 1;
|
|
||||||
Cpdfstrftime._tm_year = d.year - 1900;
|
|
||||||
Cpdfstrftime._tm_wday = 0;
|
|
||||||
Cpdfstrftime._tm_yday = 0;
|
|
||||||
Cpdfstrftime._tm_isdst = false}
|
|
||||||
"%Y-%m-%dT%H:%M:%S"
|
|
||||||
^
|
|
||||||
tzd
|
|
||||||
|
|
||||||
let xmp_date date =
|
|
||||||
let d = default_date () in
|
|
||||||
try
|
|
||||||
match explode date with
|
|
||||||
'D'::':'::r ->
|
|
||||||
begin match r with
|
|
||||||
y1::y2::y3::y4::r ->
|
|
||||||
d.year <- int_of_string (implode [y1; y2; y3; y4]);
|
|
||||||
begin match r with
|
|
||||||
m1::m2::r ->
|
|
||||||
d.month <- int_of_string (implode [m1; m2]);
|
|
||||||
begin match r with
|
|
||||||
d1::d2::r ->
|
|
||||||
d.day <- int_of_string (implode [d1; d2]);
|
|
||||||
begin match r with
|
|
||||||
h1::h2::r ->
|
|
||||||
d.hour <- int_of_string (implode [h1; h2]);
|
|
||||||
begin match r with
|
|
||||||
m1::m2::r ->
|
|
||||||
d.minute <- int_of_string (implode [m1; m2]);
|
|
||||||
begin match r with
|
|
||||||
s1::s2::r ->
|
|
||||||
d.second <- int_of_string (implode [s1; s2]);
|
|
||||||
begin match r with
|
|
||||||
o::r ->
|
|
||||||
d.ut_relationship <-
|
|
||||||
if o = '+' then 1 else
|
|
||||||
if o = '-' then -1 else
|
|
||||||
0;
|
|
||||||
begin match r with
|
|
||||||
h1::h2::'\''::r ->
|
|
||||||
d.offset_hours <- int_of_string (implode [h1; h2]);
|
|
||||||
begin match r with
|
|
||||||
m1::m2::_ ->
|
|
||||||
d.offset_minutes <- int_of_string (implode [m1; m2]);
|
|
||||||
raise Exit
|
|
||||||
| _ -> raise Exit
|
|
||||||
end
|
|
||||||
| _ -> raise Exit
|
|
||||||
end
|
|
||||||
| _ -> raise Exit
|
|
||||||
end
|
|
||||||
| _ -> raise Exit
|
|
||||||
end
|
|
||||||
| _ -> raise Exit
|
|
||||||
end
|
|
||||||
| _ -> raise Exit
|
|
||||||
end
|
|
||||||
| _ -> raise Exit
|
|
||||||
end
|
|
||||||
| _ -> raise Exit
|
|
||||||
end
|
|
||||||
| _ ->
|
|
||||||
Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date;
|
|
||||||
make_xmp_date_from_components d
|
|
||||||
end
|
|
||||||
| _ ->
|
|
||||||
Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date;
|
|
||||||
make_xmp_date_from_components d
|
|
||||||
with
|
|
||||||
Exit -> make_xmp_date_from_components d
|
|
||||||
|
|
||||||
let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf =
|
|
||||||
let infodict =
|
|
||||||
match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
|
||||||
| Some d -> d
|
|
||||||
| None -> Pdf.Dictionary []
|
|
||||||
in
|
|
||||||
let infodict' = Pdf.add_dict_entry infodict key value in
|
|
||||||
let objnum = Pdf.addobj pdf infodict' in
|
|
||||||
if not xmp_just_set then
|
|
||||||
begin
|
|
||||||
pdf.Pdf.trailerdict <-
|
|
||||||
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum);
|
|
||||||
pdf.Pdf.minor <-
|
|
||||||
max pdf.Pdf.minor version
|
|
||||||
end;
|
|
||||||
if xmp_also then
|
|
||||||
begin match get_metadata pdf with
|
|
||||||
None -> pdf
|
|
||||||
| Some xmldata ->
|
|
||||||
let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in
|
|
||||||
let changes, value =
|
|
||||||
match key with
|
|
||||||
| "/Producer" -> [(adobe, "Producer")], value
|
|
||||||
| "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value
|
|
||||||
| "/Author" -> [(adobe, "Author")], value
|
|
||||||
| "/Title" -> [(adobe, "Title"); (dc, "title")], value
|
|
||||||
| "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value
|
|
||||||
| "/Keywords" -> [(adobe, "Keywords")], value
|
|
||||||
| "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value
|
|
||||||
| "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value
|
|
||||||
| "/Trapped" -> [(adobe, "Trapped")], value
|
|
||||||
| _ -> failwith "Unknown call to set_pdf_info"
|
|
||||||
in
|
|
||||||
set_metadata_from_bytes
|
|
||||||
true
|
|
||||||
(set_pdf_info_xml_many changes value xmldata pdf)
|
|
||||||
pdf
|
|
||||||
end
|
|
||||||
else
|
|
||||||
pdf
|
|
||||||
|
|
||||||
(* Set metadata date *)
|
|
||||||
let set_metadata_date pdf date =
|
|
||||||
match get_metadata pdf with
|
|
||||||
None -> pdf
|
|
||||||
| Some xmldata ->
|
|
||||||
let changes= [(xmp, "MetadataDate")] in
|
|
||||||
let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in
|
|
||||||
set_metadata_from_bytes
|
|
||||||
true
|
|
||||||
(set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf)
|
|
||||||
pdf
|
|
||||||
|
|
||||||
let replacements pdf =
|
|
||||||
let info = get_info_utf8 pdf in
|
|
||||||
[("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i));
|
|
||||||
("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i));
|
|
||||||
("PRODUCER", info "/Producer");
|
|
||||||
("CREATOR", info "/Creator");
|
|
||||||
("TITLE", info "/Title");
|
|
||||||
("SUBJECT", info "/Subject");
|
|
||||||
("AUTHOR", info "/Author");
|
|
||||||
("KEYWORDS", info "/Keywords");
|
|
||||||
("TRAPPED", info "/Trapped");
|
|
||||||
("METADATADATE", xmp_date (expand_date "now"))]
|
|
||||||
|
|
||||||
let create_metadata pdf =
|
|
||||||
let xmp = ref xmp_template in
|
|
||||||
iter
|
|
||||||
(fun (s, r) -> xmp := string_replace_all s r !xmp)
|
|
||||||
(replacements pdf);
|
|
||||||
set_metadata_from_bytes false (bytes_of_string !xmp) pdf
|
|
||||||
|
|
||||||
(* \section{Blacken text} *)
|
(* \section{Blacken text} *)
|
||||||
|
|
||||||
(*
|
(*
|
||||||
@ -3497,9 +2843,6 @@ let draft onlyremove boxes range pdf =
|
|||||||
pagenums;
|
pagenums;
|
||||||
Pdfpage.change_pages true !pdf (rev !pages')
|
Pdfpage.change_pages true !pdf (rev !pages')
|
||||||
|
|
||||||
let set_version v pdf =
|
|
||||||
pdf.Pdf.minor <- v
|
|
||||||
|
|
||||||
let blank_document width height pages =
|
let blank_document width height pages =
|
||||||
let pdf_pages =
|
let pdf_pages =
|
||||||
map (fun () -> Pdfpage.blankpage (Pdfpaper.make Pdfunits.PdfPoint width height)) (many () pages)
|
map (fun () -> Pdfpage.blankpage (Pdfpaper.make Pdfunits.PdfPoint width height)) (many () pages)
|
||||||
@ -4022,7 +3365,7 @@ let list_spot_colours pdf =
|
|||||||
let add_bookmark_title filename use_title pdf =
|
let add_bookmark_title filename use_title pdf =
|
||||||
let title =
|
let title =
|
||||||
if use_title then
|
if use_title then
|
||||||
match get_info_utf8 pdf "/Title", get_xmp_info pdf "/Title" with
|
match Cpdfmetadata.get_info_utf8 pdf "/Title", Cpdfmetadata.get_xmp_info pdf "/Title" with
|
||||||
"", x | x, "" | _, x -> x
|
"", x | x, "" | _, x -> x
|
||||||
else
|
else
|
||||||
Filename.basename filename
|
Filename.basename filename
|
||||||
@ -4062,13 +3405,13 @@ let create_pdf pages pagesize =
|
|||||||
|
|
||||||
(* Remove characters which might not make good filenames. *)
|
(* Remove characters which might not make good filenames. *)
|
||||||
let remove_unsafe_characters encoding s =
|
let remove_unsafe_characters encoding s =
|
||||||
if encoding = Raw then s else
|
if encoding = Cpdfmetadata.Raw then s else
|
||||||
let chars =
|
let chars =
|
||||||
lose
|
lose
|
||||||
(function x ->
|
(function x ->
|
||||||
match x with
|
match x with
|
||||||
'/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true
|
'/' | '?' | '<' | '>' | '\\' | ':' | '*' | '|' | '\"' | '^' | '+' | '=' -> true
|
||||||
| x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Stripped) -> true
|
| x when int_of_char x < 32 || (int_of_char x > 126 && encoding <> Cpdfmetadata.Stripped) -> true
|
||||||
| _ -> false)
|
| _ -> false)
|
||||||
(explode s)
|
(explode s)
|
||||||
in
|
in
|
||||||
|
80
cpdf.mli
80
cpdf.mli
@ -1,13 +1,6 @@
|
|||||||
(** Coherent PDF Tools Core Routines *)
|
(** Coherent PDF Tools Core Routines *)
|
||||||
open Pdfutil
|
open Pdfutil
|
||||||
|
|
||||||
(** {2 Types and Exceptions} *)
|
|
||||||
|
|
||||||
(** Possible output encodings for some function. [Raw] does no processing at
|
|
||||||
all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8.
|
|
||||||
[Stripped] extracts the unicode codepoints and returns only those which
|
|
||||||
correspond to 7 bit ASCII. *)
|
|
||||||
type encoding = Raw | UTF8 | Stripped
|
|
||||||
|
|
||||||
type color =
|
type color =
|
||||||
Grey of float
|
Grey of float
|
||||||
@ -38,45 +31,6 @@ val map_pages : (int -> Pdfpage.t -> 'a) -> Pdf.t -> int list -> 'a list
|
|||||||
|
|
||||||
val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t
|
val copy_cropbox_to_mediabox : Pdf.t -> int list -> Pdf.t
|
||||||
|
|
||||||
(** {2 Metadata and settings} *)
|
|
||||||
|
|
||||||
(** [copy_id keepversion copyfrom copyto] copies the ID, if any, from
|
|
||||||
[copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto]
|
|
||||||
won't be affected. *)
|
|
||||||
val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
|
|
||||||
the PDF minor version to [version].*)
|
|
||||||
val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
val get_xmp_info : Pdf.t -> string -> string
|
|
||||||
|
|
||||||
(** [set_pdf_info (key, value, version)] sets the entry [key] in the
|
|
||||||
/ViewerPreferences directory, updating the PDF minor version to [version].*)
|
|
||||||
val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
|
|
||||||
val set_page_layout : Pdf.t -> string -> Pdf.t
|
|
||||||
|
|
||||||
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
|
|
||||||
val set_page_mode : Pdf.t -> string -> Pdf.t
|
|
||||||
|
|
||||||
(** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *)
|
|
||||||
val set_open_action : Pdf.t -> bool -> int -> Pdf.t
|
|
||||||
|
|
||||||
(** Set the PDF version number *)
|
|
||||||
val set_version : int -> Pdf.t -> unit
|
|
||||||
|
|
||||||
(** Given a PDF, returns a function which can lookup a given dictionary entry
|
|
||||||
from the /Info dictionary, returning it as a UTF8 string *)
|
|
||||||
val get_info_utf8 : Pdf.t -> string -> string
|
|
||||||
|
|
||||||
(** Output to standard output general information about a PDF. *)
|
|
||||||
val output_info : encoding -> Pdf.t -> unit
|
|
||||||
|
|
||||||
(** Output to standard output information from any XMP metadata stream in a PDF. *)
|
|
||||||
val output_xmp_info : encoding -> Pdf.t -> unit
|
|
||||||
|
|
||||||
(** {2 Bookmarks} *)
|
(** {2 Bookmarks} *)
|
||||||
|
|
||||||
(** [parse_bookmark_file verify pdf input] parses the bookmark file in [input].
|
(** [parse_bookmark_file verify pdf input] parses the bookmark file in [input].
|
||||||
@ -90,30 +44,9 @@ val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t
|
|||||||
|
|
||||||
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
|
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
|
||||||
output in the format specified in cpdfmanual.pdf *)
|
output in the format specified in cpdfmanual.pdf *)
|
||||||
val list_bookmarks : json:bool -> encoding -> int list -> Pdf.t -> Pdfio.output -> unit
|
val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit
|
||||||
|
|
||||||
(** {2 XML Metadata} *)
|
|
||||||
|
|
||||||
(** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *)
|
|
||||||
val set_metadata : bool -> string -> Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** The same, but the content comes from [bytes]. *)
|
|
||||||
val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** Remove the metadata from a file *)
|
|
||||||
val remove_metadata : Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** Extract metadata to a [Pdfio.bytes] *)
|
|
||||||
val get_metadata : Pdf.t -> Pdfio.bytes option
|
|
||||||
|
|
||||||
(** Print metadate to stdout *)
|
|
||||||
val print_metadata : Pdf.t -> unit
|
|
||||||
|
|
||||||
(** Set the metadata date *)
|
|
||||||
val set_metadata_date : Pdf.t -> string -> Pdf.t
|
|
||||||
|
|
||||||
(** Create XMP metadata from scratch *)
|
|
||||||
val create_metadata : Pdf.t -> Pdf.t
|
|
||||||
|
|
||||||
(** {2 Stamping} *)
|
(** {2 Stamping} *)
|
||||||
|
|
||||||
@ -145,9 +78,6 @@ val list_fonts : Pdf.t -> int list -> (int * string * string * string * string)
|
|||||||
|
|
||||||
(** {2 Adding text} *)
|
(** {2 Adding text} *)
|
||||||
|
|
||||||
(** Expand the string "now" to a PDF date string, ignoring any other string *)
|
|
||||||
val expand_date : string -> string
|
|
||||||
|
|
||||||
(** Justification of multiline text *)
|
(** Justification of multiline text *)
|
||||||
type justification =
|
type justification =
|
||||||
| LeftJustify
|
| LeftJustify
|
||||||
@ -281,10 +211,10 @@ val show_boxes : ?fast:bool -> Pdf.t -> int list -> Pdf.t
|
|||||||
(** {2 Annotations} *)
|
(** {2 Annotations} *)
|
||||||
|
|
||||||
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *)
|
(** List the annotations to standard output in a given encoding. See cpdfmanual.pdf for the format details. *)
|
||||||
val list_annotations : json:bool -> encoding -> Pdf.t -> unit
|
val list_annotations : json:bool -> Cpdfmetadata.encoding -> Pdf.t -> unit
|
||||||
|
|
||||||
(** Return the annotations as a (pagenumber, content) list *)
|
(** Return the annotations as a (pagenumber, content) list *)
|
||||||
val get_annotations : encoding -> Pdf.t -> (int * string) list
|
val get_annotations : Cpdfmetadata.encoding -> Pdf.t -> (int * string) list
|
||||||
|
|
||||||
(** Copy the annotations on a given set of pages from a to b. b is returned. *)
|
(** Copy the annotations on a given set of pages from a to b. b is returned. *)
|
||||||
val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t
|
val copy_annotations : int list -> Pdf.t -> Pdf.t -> Pdf.t
|
||||||
@ -375,12 +305,12 @@ val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t
|
|||||||
|
|
||||||
val create_pdf : int -> Pdfpaper.t -> Pdf.t
|
val create_pdf : int -> Pdfpaper.t -> Pdf.t
|
||||||
|
|
||||||
val name_of_spec : encoding ->
|
val name_of_spec : Cpdfmetadata.encoding ->
|
||||||
Pdfmarks.t list ->
|
Pdfmarks.t list ->
|
||||||
Pdf.t -> int -> string -> int -> string -> int -> int -> string
|
Pdf.t -> int -> string -> int -> string -> int -> int -> string
|
||||||
|
|
||||||
val extract_images : string ->
|
val extract_images : string ->
|
||||||
string ->
|
string ->
|
||||||
encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
||||||
|
|
||||||
|
|
||||||
|
@ -417,7 +417,7 @@ type args =
|
|||||||
mutable retain_numbering : bool;
|
mutable retain_numbering : bool;
|
||||||
mutable remove_duplicate_fonts : bool;
|
mutable remove_duplicate_fonts : bool;
|
||||||
mutable remove_duplicate_streams : bool;
|
mutable remove_duplicate_streams : bool;
|
||||||
mutable encoding : Cpdf.encoding;
|
mutable encoding : Cpdfmetadata.encoding;
|
||||||
mutable scale : float;
|
mutable scale : float;
|
||||||
mutable copyfontpage : int;
|
mutable copyfontpage : int;
|
||||||
mutable copyfontname : string option;
|
mutable copyfontname : string option;
|
||||||
@ -536,7 +536,7 @@ let args =
|
|||||||
retain_numbering = false;
|
retain_numbering = false;
|
||||||
remove_duplicate_fonts = false;
|
remove_duplicate_fonts = false;
|
||||||
remove_duplicate_streams = false;
|
remove_duplicate_streams = false;
|
||||||
encoding = Cpdf.Stripped;
|
encoding = Cpdfmetadata.Stripped;
|
||||||
scale = 1.;
|
scale = 1.;
|
||||||
copyfontpage = 1;
|
copyfontpage = 1;
|
||||||
copyfontname = None;
|
copyfontname = None;
|
||||||
@ -655,7 +655,7 @@ let reset_arguments () =
|
|||||||
args.retain_numbering <- false;
|
args.retain_numbering <- false;
|
||||||
args.remove_duplicate_fonts <- false;
|
args.remove_duplicate_fonts <- false;
|
||||||
args.remove_duplicate_streams <- false;
|
args.remove_duplicate_streams <- false;
|
||||||
args.encoding <- Cpdf.Stripped;
|
args.encoding <- Cpdfmetadata.Stripped;
|
||||||
args.scale <- 1.;
|
args.scale <- 1.;
|
||||||
args.copyfontpage <- 1;
|
args.copyfontpage <- 1;
|
||||||
args.copyfontname <- None;
|
args.copyfontname <- None;
|
||||||
@ -1779,13 +1779,13 @@ and specs =
|
|||||||
Arg.Unit setrecrypt,
|
Arg.Unit setrecrypt,
|
||||||
" Keep this file's encryption when writing");
|
" Keep this file's encryption when writing");
|
||||||
("-raw",
|
("-raw",
|
||||||
Arg.Unit (setencoding Cpdf.Raw),
|
Arg.Unit (setencoding Cpdfmetadata.Raw),
|
||||||
" Do not process text");
|
" Do not process text");
|
||||||
("-stripped",
|
("-stripped",
|
||||||
Arg.Unit (setencoding Cpdf.Stripped),
|
Arg.Unit (setencoding Cpdfmetadata.Stripped),
|
||||||
" Process text by simple stripping to ASCII");
|
" Process text by simple stripping to ASCII");
|
||||||
("-utf8",
|
("-utf8",
|
||||||
Arg.Unit (setencoding Cpdf.UTF8),
|
Arg.Unit (setencoding Cpdfmetadata.UTF8),
|
||||||
" Process text by conversion to UTF8 Unicode");
|
" Process text by conversion to UTF8 Unicode");
|
||||||
("-fast",
|
("-fast",
|
||||||
Arg.Unit setfast,
|
Arg.Unit setfast,
|
||||||
@ -2724,15 +2724,15 @@ let unescape_octals s =
|
|||||||
implode (unescape_octals [] (explode s))
|
implode (unescape_octals [] (explode s))
|
||||||
|
|
||||||
let process s =
|
let process s =
|
||||||
if args.encoding <> Cpdf.Raw
|
if args.encoding <> Cpdfmetadata.Raw
|
||||||
then Pdftext.pdfdocstring_of_utf8 s
|
then Pdftext.pdfdocstring_of_utf8 s
|
||||||
else unescape_octals s
|
else unescape_octals s
|
||||||
|
|
||||||
let set_producer s pdf =
|
let set_producer s pdf =
|
||||||
ignore (Cpdf.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf)
|
ignore (Cpdfmetadata.set_pdf_info ("/Producer", Pdf.String (process s), 0) pdf)
|
||||||
|
|
||||||
let set_creator s pdf =
|
let set_creator s pdf =
|
||||||
ignore (Cpdf.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf)
|
ignore (Cpdfmetadata.set_pdf_info ("/Creator", Pdf.String (process s), 0) pdf)
|
||||||
|
|
||||||
let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outname =
|
let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outname =
|
||||||
if args.producer <> None then set_producer (unopt args.producer) pdf;
|
if args.producer <> None then set_producer (unopt args.producer) pdf;
|
||||||
@ -3146,8 +3146,8 @@ let go () =
|
|||||||
if inname <> "" then
|
if inname <> "" then
|
||||||
Printf.printf "Linearized: %b\n" (Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname)));
|
Printf.printf "Linearized: %b\n" (Pdfread.is_linearized (Pdfio.input_of_channel (open_in_bin inname)));
|
||||||
let pdf = decrypt_if_necessary input (Some Info) pdf in
|
let pdf = decrypt_if_necessary input (Some Info) pdf in
|
||||||
Cpdf.output_info args.encoding pdf;
|
Cpdfmetadata.output_info args.encoding pdf;
|
||||||
Cpdf.output_xmp_info args.encoding pdf
|
Cpdfmetadata.output_xmp_info args.encoding pdf
|
||||||
| Some PageInfo ->
|
| Some PageInfo ->
|
||||||
begin match args.inputs, args.out with
|
begin match args.inputs, args.out with
|
||||||
| (_, pagespec, _, _, _, _)::_, _ ->
|
| (_, pagespec, _, _, _, _)::_, _ ->
|
||||||
@ -3157,7 +3157,7 @@ let go () =
|
|||||||
| _ -> error "list-bookmarks: bad command line"
|
| _ -> error "list-bookmarks: bad command line"
|
||||||
end
|
end
|
||||||
| Some Metadata ->
|
| Some Metadata ->
|
||||||
Cpdf.print_metadata (get_single_pdf (Some Metadata) true)
|
Cpdfmetadata.print_metadata (get_single_pdf (Some Metadata) true)
|
||||||
| Some Fonts ->
|
| Some Fonts ->
|
||||||
begin match args.inputs, args.out with
|
begin match args.inputs, args.out with
|
||||||
| (_, pagespec, _, _, _, _)::_, _ ->
|
| (_, pagespec, _, _, _, _)::_, _ ->
|
||||||
@ -3357,14 +3357,14 @@ let go () =
|
|||||||
| SetCreate _ | SetModify _ | SetCreator _ | SetProducer _
|
| SetCreate _ | SetModify _ | SetCreator _ | SetProducer _
|
||||||
| SetTrapped | SetUntrapped) as op) ->
|
| SetTrapped | SetUntrapped) as op) ->
|
||||||
let key, value, version =
|
let key, value, version =
|
||||||
let f s = if args.encoding <> Cpdf.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in
|
let f s = if args.encoding <> Cpdfmetadata.Raw then Pdftext.pdfdocstring_of_utf8 s else unescape_octals s in
|
||||||
match op with
|
match op with
|
||||||
| SetAuthor s -> "/Author", Pdf.String (f s), 0
|
| SetAuthor s -> "/Author", Pdf.String (f s), 0
|
||||||
| SetTitle s -> "/Title", Pdf.String (f s), 1
|
| SetTitle s -> "/Title", Pdf.String (f s), 1
|
||||||
| SetSubject s -> "/Subject", Pdf.String (f s), 1
|
| SetSubject s -> "/Subject", Pdf.String (f s), 1
|
||||||
| SetKeywords s -> "/Keywords", Pdf.String (f s), 1
|
| SetKeywords s -> "/Keywords", Pdf.String (f s), 1
|
||||||
| SetCreate s -> "/CreationDate", Pdf.String (Cpdf.expand_date s), 0
|
| SetCreate s -> "/CreationDate", Pdf.String (Cpdfmetadata.expand_date s), 0
|
||||||
| SetModify s -> "/ModDate", Pdf.String (Cpdf.expand_date s), 0
|
| SetModify s -> "/ModDate", Pdf.String (Cpdfmetadata.expand_date s), 0
|
||||||
| SetCreator s -> "/Creator", Pdf.String (f s), 0
|
| SetCreator s -> "/Creator", Pdf.String (f s), 0
|
||||||
| SetProducer s -> "/Producer", Pdf.String (f s), 0
|
| SetProducer s -> "/Producer", Pdf.String (f s), 0
|
||||||
| SetTrapped -> "/Trapped", Pdf.Boolean true, 3
|
| SetTrapped -> "/Trapped", Pdf.Boolean true, 3
|
||||||
@ -3374,12 +3374,12 @@ let go () =
|
|||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let version = if args.keepversion then pdf.Pdf.minor else version in
|
let version = if args.keepversion then pdf.Pdf.minor else version in
|
||||||
write_pdf false
|
write_pdf false
|
||||||
(Cpdf.set_pdf_info
|
(Cpdfmetadata.set_pdf_info
|
||||||
~xmp_also:args.alsosetxml
|
~xmp_also:args.alsosetxml
|
||||||
~xmp_just_set:args.justsetxml
|
~xmp_just_set:args.justsetxml
|
||||||
(key, value, version) pdf)
|
(key, value, version) pdf)
|
||||||
| Some (SetMetadataDate date) ->
|
| Some (SetMetadataDate date) ->
|
||||||
write_pdf false (Cpdf.set_metadata_date (get_single_pdf args.op false) date)
|
write_pdf false (Cpdfmetadata.set_metadata_date (get_single_pdf args.op false) date)
|
||||||
| Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _
|
| Some ((HideToolbar _ | HideMenubar _ | HideWindowUI _
|
||||||
| FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) ->
|
| FitWindow _ | CenterWindow _ | DisplayDocTitle _) as op) ->
|
||||||
begin match args.out with
|
begin match args.out with
|
||||||
@ -3396,20 +3396,20 @@ let go () =
|
|||||||
in
|
in
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let version = if args.keepversion then pdf.Pdf.minor else version in
|
let version = if args.keepversion then pdf.Pdf.minor else version in
|
||||||
write_pdf false (Cpdf.set_viewer_preference (key, value, version) pdf)
|
write_pdf false (Cpdfmetadata.set_viewer_preference (key, value, version) pdf)
|
||||||
end
|
end
|
||||||
| Some (OpenAtPage str) ->
|
| Some (OpenAtPage str) ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf str in
|
let range = parse_pagespec_allow_empty pdf str in
|
||||||
let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
|
let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
|
||||||
write_pdf false (Cpdf.set_open_action pdf false n)
|
write_pdf false (Cpdfmetadata.set_open_action pdf false n)
|
||||||
| Some (OpenAtPageFit str) ->
|
| Some (OpenAtPageFit str) ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let range = parse_pagespec_allow_empty pdf str in
|
let range = parse_pagespec_allow_empty pdf str in
|
||||||
let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
|
let n = match range with [x] -> x | _ -> error "open_at_page: range does not specify single page" in
|
||||||
write_pdf false (Cpdf.set_open_action pdf true n)
|
write_pdf false (Cpdfmetadata.set_open_action pdf true n)
|
||||||
| Some (SetMetadata metadata_file) ->
|
| Some (SetMetadata metadata_file) ->
|
||||||
write_pdf false (Cpdf.set_metadata args.keepversion metadata_file (get_single_pdf args.op false))
|
write_pdf false (Cpdfmetadata.set_metadata args.keepversion metadata_file (get_single_pdf args.op false))
|
||||||
| Some (SetVersion v) ->
|
| Some (SetVersion v) ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
let pdf =
|
let pdf =
|
||||||
@ -3419,9 +3419,9 @@ let go () =
|
|||||||
in
|
in
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| Some (SetPageLayout s) ->
|
| Some (SetPageLayout s) ->
|
||||||
write_pdf false (Cpdf.set_page_layout (get_single_pdf args.op false) s)
|
write_pdf false (Cpdfmetadata.set_page_layout (get_single_pdf args.op false) s)
|
||||||
| Some (SetPageMode s) ->
|
| Some (SetPageMode s) ->
|
||||||
write_pdf false (Cpdf.set_page_mode (get_single_pdf args.op false) s)
|
write_pdf false (Cpdfmetadata.set_page_mode (get_single_pdf args.op false) s)
|
||||||
| Some Split ->
|
| Some Split ->
|
||||||
begin match args.inputs, args.out with
|
begin match args.inputs, args.out with
|
||||||
| [(f, ranges, _, _, _, _)], File output_spec ->
|
| [(f, ranges, _, _, _, _)], File output_spec ->
|
||||||
@ -3514,7 +3514,7 @@ let go () =
|
|||||||
begin match args.inputs with
|
begin match args.inputs with
|
||||||
| [(k, _, u, o, _, _) as input] ->
|
| [(k, _, u, o, _, _) as input] ->
|
||||||
let pdf =
|
let pdf =
|
||||||
Cpdf.copy_id
|
Cpdfmetadata.copy_id
|
||||||
args.keepversion
|
args.keepversion
|
||||||
(pdfread_pdf_of_file (optstring u) (optstring o) getfrom)
|
(pdfread_pdf_of_file (optstring u) (optstring o) getfrom)
|
||||||
(get_pdf_from_input_kind input args.op k)
|
(get_pdf_from_input_kind input args.op k)
|
||||||
@ -3765,7 +3765,7 @@ let go () =
|
|||||||
args.recrypt <- false;
|
args.recrypt <- false;
|
||||||
write_pdf false (get_single_pdf args.op false)
|
write_pdf false (get_single_pdf args.op false)
|
||||||
| Some RemoveMetadata ->
|
| Some RemoveMetadata ->
|
||||||
write_pdf false (Cpdf.remove_metadata (get_single_pdf args.op false))
|
write_pdf false (Cpdfmetadata.remove_metadata (get_single_pdf args.op false))
|
||||||
| Some ExtractImages ->
|
| Some ExtractImages ->
|
||||||
let output_spec =
|
let output_spec =
|
||||||
begin match args.out with
|
begin match args.out with
|
||||||
@ -3838,7 +3838,7 @@ let go () =
|
|||||||
write_pdf false (Cpdf.remove_clipping pdf range)
|
write_pdf false (Cpdf.remove_clipping pdf range)
|
||||||
| Some CreateMetadata ->
|
| Some CreateMetadata ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
write_pdf false (Cpdf.create_metadata pdf)
|
write_pdf false (Cpdfmetadata.create_metadata pdf)
|
||||||
| Some EmbedMissingFonts ->
|
| Some EmbedMissingFonts ->
|
||||||
let fi =
|
let fi =
|
||||||
match args.inputs with
|
match args.inputs with
|
||||||
|
661
cpdfmetadata.ml
Normal file
661
cpdfmetadata.ml
Normal file
@ -0,0 +1,661 @@
|
|||||||
|
open Pdfutil
|
||||||
|
open Pdfio
|
||||||
|
open Cpdferror
|
||||||
|
|
||||||
|
type encoding =
|
||||||
|
| Raw
|
||||||
|
| UTF8
|
||||||
|
| Stripped
|
||||||
|
|
||||||
|
(* Just strip everything which isn't 7 bit ASCII *)
|
||||||
|
let crude_de_unicode s =
|
||||||
|
implode (map char_of_int (lose (fun x -> x > 127) (Pdftext.codepoints_of_pdfdocstring s)))
|
||||||
|
|
||||||
|
let encode_output enc s =
|
||||||
|
match enc with
|
||||||
|
| Raw -> s
|
||||||
|
| UTF8 -> Pdftext.utf8_of_pdfdocstring s
|
||||||
|
| Stripped -> crude_de_unicode s
|
||||||
|
|
||||||
|
let xmp_template =
|
||||||
|
{|<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||||||
|
|
||||||
|
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
||||||
|
xmlns:iX='http://ns.adobe.com/iX/1.0/'>
|
||||||
|
|
||||||
|
<rdf:Description about=''
|
||||||
|
xmlns='http://ns.adobe.com/pdf/1.3/'
|
||||||
|
xmlns:pdf='http://ns.adobe.com/pdf/1.3/'>
|
||||||
|
<pdf:CreationDate>CREATEDATE</pdf:CreationDate>
|
||||||
|
<pdf:ModDate>MODDATE</pdf:ModDate>
|
||||||
|
<pdf:Producer>PRODUCER</pdf:Producer>
|
||||||
|
<pdf:Creator>CREATOR</pdf:Creator>
|
||||||
|
<pdf:Title>TITLE</pdf:Title>
|
||||||
|
<pdf:Subject>SUBJECT</pdf:Subject>
|
||||||
|
<pdf:Author>AUTHOR</pdf:Author>
|
||||||
|
<pdf:Keywords>KEYWORDS</pdf:Keywords>
|
||||||
|
<pdf:Trapped>TRAPPED</pdf:Trapped>
|
||||||
|
</rdf:Description>
|
||||||
|
|
||||||
|
<rdf:Description about=''
|
||||||
|
xmlns='http://ns.adobe.com/xap/1.0/'
|
||||||
|
xmlns:xap='http://ns.adobe.com/xap/1.0/'>
|
||||||
|
<xap:CreateDate>CREATEDATE</xap:CreateDate>
|
||||||
|
<xap:CreatorTool>CREATOR</xap:CreatorTool>
|
||||||
|
<xap:ModifyDate>MODDATE</xap:ModifyDate>
|
||||||
|
<xap:MetadataDate>METADATADATE</xap:MetadataDate>
|
||||||
|
</rdf:Description>
|
||||||
|
|
||||||
|
<rdf:Description about=''
|
||||||
|
xmlns='http://purl.org/dc/elements/1.1/'
|
||||||
|
xmlns:dc='http://purl.org/dc/elements/1.1/'>
|
||||||
|
<dc:title>TITLE</dc:title>
|
||||||
|
</rdf:Description>
|
||||||
|
|
||||||
|
</rdf:RDF>
|
||||||
|
|
||||||
|
<?xpacket end='r'?>|}
|
||||||
|
|
||||||
|
(* \section{Set or replace metadata} *)
|
||||||
|
let set_metadata_from_bytes keepversion data pdf =
|
||||||
|
let metadata_stream =
|
||||||
|
Pdf.Stream
|
||||||
|
{contents =
|
||||||
|
(Pdf.Dictionary
|
||||||
|
["/Length", Pdf.Integer (bytes_size data);
|
||||||
|
"/Type", Pdf.Name "/Metadata";
|
||||||
|
"/Subtype", Pdf.Name "/XML"],
|
||||||
|
Pdf.Got data)}
|
||||||
|
in
|
||||||
|
let objnum = Pdf.addobj pdf metadata_stream in
|
||||||
|
let document_catalog =
|
||||||
|
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
|
| Some s -> s
|
||||||
|
| None -> error "Malformed PDF: No root."
|
||||||
|
in
|
||||||
|
let document_catalog' =
|
||||||
|
Pdf.add_dict_entry document_catalog "/Metadata" (Pdf.Indirect objnum)
|
||||||
|
in
|
||||||
|
let rootnum = Pdf.addobj pdf document_catalog' in
|
||||||
|
let trailerdict =
|
||||||
|
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)
|
||||||
|
in
|
||||||
|
{pdf with
|
||||||
|
Pdf.trailerdict = trailerdict;
|
||||||
|
Pdf.root = rootnum;
|
||||||
|
Pdf.minor =
|
||||||
|
if keepversion then pdf.Pdf.minor else max 4 pdf.Pdf.minor}
|
||||||
|
|
||||||
|
let set_metadata keepversion filename pdf =
|
||||||
|
let ch = open_in_bin filename in
|
||||||
|
let data = mkbytes (in_channel_length ch) in
|
||||||
|
for x = 0 to bytes_size data - 1 do
|
||||||
|
bset data x (input_byte ch)
|
||||||
|
done;
|
||||||
|
set_metadata_from_bytes keepversion data pdf
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(* \section{Remove metadata} *)
|
||||||
|
let remove_metadata pdf =
|
||||||
|
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
|
| None -> error "malformed file"
|
||||||
|
| Some root ->
|
||||||
|
let root' = Pdf.remove_dict_entry root "/Metadata" in
|
||||||
|
let rootnum = Pdf.addobj pdf root' in
|
||||||
|
{pdf with
|
||||||
|
Pdf.trailerdict =
|
||||||
|
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum);
|
||||||
|
Pdf.root =
|
||||||
|
rootnum}
|
||||||
|
(* Print metadata *)
|
||||||
|
let get_metadata pdf =
|
||||||
|
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
|
| None -> error "malformed file"
|
||||||
|
| Some root ->
|
||||||
|
match Pdf.lookup_direct pdf "/Metadata" root with
|
||||||
|
| Some ((Pdf.Stream _) as s) ->
|
||||||
|
Pdfcodec.decode_pdfstream pdf s;
|
||||||
|
begin match s with
|
||||||
|
| Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data
|
||||||
|
| _ -> assert false
|
||||||
|
end
|
||||||
|
| _ -> None
|
||||||
|
|
||||||
|
let print_metadata pdf =
|
||||||
|
match get_metadata pdf with
|
||||||
|
None -> ()
|
||||||
|
| Some data ->
|
||||||
|
for x = 0 to bytes_size data - 1 do
|
||||||
|
Printf.printf "%c" (char_of_int (bget data x))
|
||||||
|
done
|
||||||
|
|
||||||
|
|
||||||
|
let get_info raw pdf =
|
||||||
|
let infodict =
|
||||||
|
match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
||||||
|
| Some infodict -> infodict
|
||||||
|
| _ -> Pdf.Dictionary []
|
||||||
|
in
|
||||||
|
let getstring name =
|
||||||
|
match Pdf.lookup_direct pdf name infodict with
|
||||||
|
| Some (Pdf.String s) ->
|
||||||
|
if raw then s else crude_de_unicode s
|
||||||
|
| Some (Pdf.Boolean false) -> "False"
|
||||||
|
| Some (Pdf.Boolean true) -> "True"
|
||||||
|
| _ -> if name = "/Trapped" then "False" else ""
|
||||||
|
in
|
||||||
|
getstring
|
||||||
|
|
||||||
|
let get_info_utf8 pdf =
|
||||||
|
let infodict =
|
||||||
|
match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
||||||
|
| Some infodict -> infodict
|
||||||
|
| _ -> Pdf.Dictionary []
|
||||||
|
in
|
||||||
|
(function name ->
|
||||||
|
match Pdf.lookup_direct pdf name infodict with
|
||||||
|
| Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
|
||||||
|
| Some (Pdf.Boolean false) -> "False"
|
||||||
|
| Some (Pdf.Boolean true) -> "True"
|
||||||
|
| _ -> if name = "/Trapped" then "False" else "")
|
||||||
|
|
||||||
|
let getstring encoding pdf =
|
||||||
|
match encoding with
|
||||||
|
| Raw -> get_info true pdf
|
||||||
|
| Stripped -> get_info false pdf
|
||||||
|
| UTF8 -> get_info_utf8 pdf
|
||||||
|
|
||||||
|
let output_info encoding pdf =
|
||||||
|
let getstring = getstring encoding pdf in
|
||||||
|
Printf.printf "Version: %i.%i\n" pdf.Pdf.major pdf.Pdf.minor;
|
||||||
|
Printf.printf "Pages: %i\n" (Pdfpage.endpage pdf);
|
||||||
|
Printf.printf "Title: %s\n" (getstring "/Title");
|
||||||
|
Printf.printf "Author: %s\n" (getstring "/Author");
|
||||||
|
Printf.printf "Subject: %s\n" (getstring "/Subject");
|
||||||
|
Printf.printf "Keywords: %s\n" (getstring "/Keywords");
|
||||||
|
Printf.printf "Creator: %s\n" (getstring "/Creator");
|
||||||
|
Printf.printf "Producer: %s\n" (getstring "/Producer");
|
||||||
|
Printf.printf "Created: %s\n" (getstring "/CreationDate");
|
||||||
|
Printf.printf "Modified: %s\n" (getstring "/ModDate");
|
||||||
|
Printf.printf "Trapped: %s\n" (getstring "/Trapped")
|
||||||
|
|
||||||
|
type xmltree =
|
||||||
|
E of Cpdfxmlm.tag * xmltree list
|
||||||
|
| D of string
|
||||||
|
|
||||||
|
let xmltree_of_bytes b =
|
||||||
|
let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in
|
||||||
|
let el tag childs = E (tag, childs)
|
||||||
|
and data d = D d in
|
||||||
|
Cpdfxmlm.input_doc_tree ~el ~data i
|
||||||
|
|
||||||
|
let bytes_of_xmltree t =
|
||||||
|
let buf = Buffer.create 1024 in
|
||||||
|
let o = Cpdfxmlm.make_output (`Buffer buf) in
|
||||||
|
let frag = function
|
||||||
|
E (tag, childs) -> `El (tag, childs)
|
||||||
|
| D d -> `Data d
|
||||||
|
in
|
||||||
|
Cpdfxmlm.output_doc_tree frag o t;
|
||||||
|
bytes_of_string (Buffer.contents buf)
|
||||||
|
|
||||||
|
let rec string_of_xmltree = function
|
||||||
|
D d ->
|
||||||
|
Printf.sprintf "DATA {%s}" d
|
||||||
|
| E (tag, trees) ->
|
||||||
|
Printf.sprintf "ELT (%s, %s)"
|
||||||
|
(string_of_tag tag)
|
||||||
|
(string_of_xmltrees trees)
|
||||||
|
|
||||||
|
and string_of_tag ((n, n'), attributes) =
|
||||||
|
Printf.sprintf
|
||||||
|
"NAME |%s| |%s|, ATTRIBUTES {%s}" n n'
|
||||||
|
(string_of_attributes attributes)
|
||||||
|
|
||||||
|
and string_of_attribute ((n, n'), str) =
|
||||||
|
Printf.sprintf "ATTRNAME |%s| |%s|, STR {%s}" n n' str
|
||||||
|
|
||||||
|
and string_of_attributes attrs =
|
||||||
|
fold_left
|
||||||
|
(fun a b -> a ^ " " ^ b) "" (map string_of_attribute attrs)
|
||||||
|
|
||||||
|
and string_of_xmltrees trees =
|
||||||
|
fold_left
|
||||||
|
(fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)
|
||||||
|
|
||||||
|
let adobe = "http://ns.adobe.com/pdf/1.3/"
|
||||||
|
|
||||||
|
let xmp = "http://ns.adobe.com/xap/1.0/"
|
||||||
|
|
||||||
|
let dc = "http://purl.org/dc/elements/1.1/"
|
||||||
|
|
||||||
|
let rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
|
||||||
|
let combine_with_spaces strs =
|
||||||
|
String.trim
|
||||||
|
(fold_left (fun x y -> x ^ (if x <> "" then ", " else "") ^ y) "" strs)
|
||||||
|
|
||||||
|
(* Collect all <li> elements inside a seq, bag, or alt. Combine with commas. If
|
||||||
|
none found, return empty string instead. *)
|
||||||
|
let collect_list_items = function
|
||||||
|
E (((n, n'), _), elts) when
|
||||||
|
n = rdf && (n' = "Alt" || n' = "Seq" || n' = "Bag")
|
||||||
|
->
|
||||||
|
combine_with_spaces
|
||||||
|
(option_map
|
||||||
|
(function
|
||||||
|
E (((n, n'), _), [D d]) when n = rdf && n' = "li" ->
|
||||||
|
Some d
|
||||||
|
| _ -> None)
|
||||||
|
elts)
|
||||||
|
| _ -> ""
|
||||||
|
|
||||||
|
let collect_list_items_all all =
|
||||||
|
match keep (function E _ -> true | _ -> false) all with
|
||||||
|
h::_ -> Some (collect_list_items h)
|
||||||
|
| [] -> None
|
||||||
|
|
||||||
|
let rec get_data_for namespace name = function
|
||||||
|
D _ -> None
|
||||||
|
| E (((n, n'), _), [D d]) when n = namespace && n' = name ->
|
||||||
|
Some d
|
||||||
|
| E (((n, n'), _), e) when n = namespace && n' = name ->
|
||||||
|
collect_list_items_all e
|
||||||
|
| E (_, l) ->
|
||||||
|
match option_map (get_data_for namespace name) l with
|
||||||
|
x :: _ -> Some x
|
||||||
|
| _ -> None
|
||||||
|
|
||||||
|
let output_xmp_info encoding pdf =
|
||||||
|
let print_out tree title namespace name =
|
||||||
|
match get_data_for namespace name tree with
|
||||||
|
None -> ()
|
||||||
|
| Some data ->
|
||||||
|
Printf.printf "%s: " title;
|
||||||
|
print_endline data
|
||||||
|
in
|
||||||
|
match get_metadata pdf with
|
||||||
|
None -> ()
|
||||||
|
| Some metadata ->
|
||||||
|
try
|
||||||
|
let dtd, tree = xmltree_of_bytes metadata in
|
||||||
|
print_out tree "XMP pdf:Keywords" adobe "Keywords";
|
||||||
|
print_out tree "XMP pdf:Producer" adobe "Producer";
|
||||||
|
print_out tree "XMP pdf:Trapped" adobe "Trapped";
|
||||||
|
print_out tree "XMP pdf:Title" adobe "Title";
|
||||||
|
print_out tree "XMP pdf:Creator" adobe "Creator";
|
||||||
|
print_out tree "XMP pdf:Subject" adobe "Subject";
|
||||||
|
print_out tree "XMP pdf:Author" adobe "Author";
|
||||||
|
print_out tree "XMP pdf:CreationDate" adobe "CreationDate";
|
||||||
|
print_out tree "XMP pdf:ModDate" adobe "ModDate";
|
||||||
|
print_out tree "XMP xmp:CreateDate" xmp "CreateDate";
|
||||||
|
print_out tree "XMP xmp:CreatorTool" xmp "CreatorTool";
|
||||||
|
print_out tree "XMP xmp:MetadataDate" xmp "MetadataDate";
|
||||||
|
print_out tree "XMP xmp:ModifyDate" xmp "ModifyDate";
|
||||||
|
print_out tree "XMP dc:title" dc "title";
|
||||||
|
print_out tree "XMP dc:creator" dc "creator";
|
||||||
|
print_out tree "XMP dc:subject" dc "subject";
|
||||||
|
print_out tree "XMP dc:description" dc "description"
|
||||||
|
with
|
||||||
|
_ -> ()
|
||||||
|
|
||||||
|
(* Get XMP info equivalent of an old metadata field *)
|
||||||
|
let check = function
|
||||||
|
"/Title" -> [(adobe, "Title"); (dc, "title")]
|
||||||
|
| "/Author" -> [(adobe, "Author"); (dc, "creator")]
|
||||||
|
| "/Subject" -> [(adobe, "Subject"); (dc, "subject")]
|
||||||
|
| "/Keywords" -> [(adobe, "Keywords")]
|
||||||
|
| "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool")]
|
||||||
|
| "/Producer" -> [(adobe, "Producer")]
|
||||||
|
| "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")]
|
||||||
|
| "/ModDate" -> [(adobe, "ModificationDate"); (xmp, "ModifyDate")]
|
||||||
|
| _ -> failwith "Cpdf.check_name not known"
|
||||||
|
|
||||||
|
let get_xmp_info pdf name =
|
||||||
|
let tocheck = check name in
|
||||||
|
match get_metadata pdf with
|
||||||
|
None -> ""
|
||||||
|
| Some metadata ->
|
||||||
|
try
|
||||||
|
let _, tree = xmltree_of_bytes metadata in
|
||||||
|
let results = map (fun (kind, key) -> match get_data_for kind key tree with Some x -> x | None -> "") tocheck in
|
||||||
|
match lose (eq "") results with
|
||||||
|
x::_ -> x
|
||||||
|
| [] -> ""
|
||||||
|
with
|
||||||
|
_ -> ""
|
||||||
|
|
||||||
|
(* Set XMP info *)
|
||||||
|
let rec set_xml_field kind fieldname value = function
|
||||||
|
D data -> D data
|
||||||
|
| E (((n, n'), m), _ (*[D _]*)) when n = kind && n' = fieldname -> (* Replace anything inside, including nothing i.e <tag/> *)
|
||||||
|
E (((n, n'), m), [D value])
|
||||||
|
| E (x, ts) -> E (x, map (set_xml_field kind fieldname value) ts)
|
||||||
|
|
||||||
|
let set_pdf_info_xml kind fieldname value xmldata pdf =
|
||||||
|
let dtd, tree = xmltree_of_bytes xmldata in
|
||||||
|
let str =
|
||||||
|
match value with
|
||||||
|
Pdf.String s -> s
|
||||||
|
| Pdf.Boolean true -> "True"
|
||||||
|
| Pdf.Boolean false -> "False"
|
||||||
|
| _ -> failwith "set_pdf_info_xml: not a string"
|
||||||
|
in
|
||||||
|
let newtree = set_xml_field kind fieldname str tree in
|
||||||
|
bytes_of_xmltree (dtd, newtree)
|
||||||
|
|
||||||
|
let set_pdf_info_xml_many changes value xmldata pdf =
|
||||||
|
let xmldata = ref xmldata in
|
||||||
|
iter
|
||||||
|
(fun (kind, fieldname) ->
|
||||||
|
xmldata := set_pdf_info_xml kind fieldname value !xmldata pdf)
|
||||||
|
changes;
|
||||||
|
!xmldata
|
||||||
|
|
||||||
|
|
||||||
|
(* \section{Set an entry in the /Info dictionary} *)
|
||||||
|
|
||||||
|
(* We must parse the date to get its components, then use strftime to build the
|
||||||
|
* new string in XMP format *)
|
||||||
|
|
||||||
|
type date =
|
||||||
|
{mutable year : int;
|
||||||
|
mutable month : int; (* 1 - 12 *)
|
||||||
|
mutable day : int; (* 1 - 31 *)
|
||||||
|
mutable hour : int; (* 0 - 23 *)
|
||||||
|
mutable minute : int; (* 0 - 59 *)
|
||||||
|
mutable second : int; (* 0 - 59 *)
|
||||||
|
mutable ut_relationship : int; (* -1, 0, +1 *)
|
||||||
|
mutable offset_hours : int; (* 0 - 59 *)
|
||||||
|
mutable offset_minutes : int (* 0 - 59 *)}
|
||||||
|
|
||||||
|
let default_date () =
|
||||||
|
{year = 0;
|
||||||
|
month = 1;
|
||||||
|
day = 1;
|
||||||
|
hour = 0;
|
||||||
|
minute = 0;
|
||||||
|
second = 0;
|
||||||
|
ut_relationship = 0;
|
||||||
|
offset_hours = 0;
|
||||||
|
offset_minutes = 0}
|
||||||
|
|
||||||
|
(* XMP date format is YYYY-MM-DDThh:mm:ssTZD *)
|
||||||
|
let make_xmp_date_from_components d =
|
||||||
|
let tzd =
|
||||||
|
if d.ut_relationship = 0 && d.offset_hours = 0 && d.offset_minutes = 0 then "Z" else
|
||||||
|
(if d.ut_relationship >=0 then "+" else "-") ^
|
||||||
|
Printf.sprintf "%02i" d.offset_hours ^
|
||||||
|
":" ^
|
||||||
|
Printf.sprintf "%02i" d.offset_minutes
|
||||||
|
in
|
||||||
|
Cpdfstrftime.strftime
|
||||||
|
~time:{Cpdfstrftime._tm_sec = d.second;
|
||||||
|
Cpdfstrftime._tm_min = d.minute;
|
||||||
|
Cpdfstrftime._tm_hour = d.hour;
|
||||||
|
Cpdfstrftime._tm_mday = d.day;
|
||||||
|
Cpdfstrftime._tm_mon = d.month - 1;
|
||||||
|
Cpdfstrftime._tm_year = d.year - 1900;
|
||||||
|
Cpdfstrftime._tm_wday = 0;
|
||||||
|
Cpdfstrftime._tm_yday = 0;
|
||||||
|
Cpdfstrftime._tm_isdst = false}
|
||||||
|
"%Y-%m-%dT%H:%M:%S"
|
||||||
|
^
|
||||||
|
tzd
|
||||||
|
|
||||||
|
let xmp_date date =
|
||||||
|
let d = default_date () in
|
||||||
|
try
|
||||||
|
match explode date with
|
||||||
|
'D'::':'::r ->
|
||||||
|
begin match r with
|
||||||
|
y1::y2::y3::y4::r ->
|
||||||
|
d.year <- int_of_string (implode [y1; y2; y3; y4]);
|
||||||
|
begin match r with
|
||||||
|
m1::m2::r ->
|
||||||
|
d.month <- int_of_string (implode [m1; m2]);
|
||||||
|
begin match r with
|
||||||
|
d1::d2::r ->
|
||||||
|
d.day <- int_of_string (implode [d1; d2]);
|
||||||
|
begin match r with
|
||||||
|
h1::h2::r ->
|
||||||
|
d.hour <- int_of_string (implode [h1; h2]);
|
||||||
|
begin match r with
|
||||||
|
m1::m2::r ->
|
||||||
|
d.minute <- int_of_string (implode [m1; m2]);
|
||||||
|
begin match r with
|
||||||
|
s1::s2::r ->
|
||||||
|
d.second <- int_of_string (implode [s1; s2]);
|
||||||
|
begin match r with
|
||||||
|
o::r ->
|
||||||
|
d.ut_relationship <-
|
||||||
|
if o = '+' then 1 else
|
||||||
|
if o = '-' then -1 else
|
||||||
|
0;
|
||||||
|
begin match r with
|
||||||
|
h1::h2::'\''::r ->
|
||||||
|
d.offset_hours <- int_of_string (implode [h1; h2]);
|
||||||
|
begin match r with
|
||||||
|
m1::m2::_ ->
|
||||||
|
d.offset_minutes <- int_of_string (implode [m1; m2]);
|
||||||
|
raise Exit
|
||||||
|
| _ -> raise Exit
|
||||||
|
end
|
||||||
|
| _ -> raise Exit
|
||||||
|
end
|
||||||
|
| _ -> raise Exit
|
||||||
|
end
|
||||||
|
| _ -> raise Exit
|
||||||
|
end
|
||||||
|
| _ -> raise Exit
|
||||||
|
end
|
||||||
|
| _ -> raise Exit
|
||||||
|
end
|
||||||
|
| _ -> raise Exit
|
||||||
|
end
|
||||||
|
| _ -> raise Exit
|
||||||
|
end
|
||||||
|
| _ ->
|
||||||
|
Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date;
|
||||||
|
make_xmp_date_from_components d
|
||||||
|
end
|
||||||
|
| _ ->
|
||||||
|
Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date;
|
||||||
|
make_xmp_date_from_components d
|
||||||
|
with
|
||||||
|
Exit -> make_xmp_date_from_components d
|
||||||
|
|
||||||
|
let set_pdf_info ?(xmp_also=false) ?(xmp_just_set=false) (key, value, version) pdf =
|
||||||
|
let infodict =
|
||||||
|
match Pdf.lookup_direct pdf "/Info" pdf.Pdf.trailerdict with
|
||||||
|
| Some d -> d
|
||||||
|
| None -> Pdf.Dictionary []
|
||||||
|
in
|
||||||
|
let infodict' = Pdf.add_dict_entry infodict key value in
|
||||||
|
let objnum = Pdf.addobj pdf infodict' in
|
||||||
|
if not xmp_just_set then
|
||||||
|
begin
|
||||||
|
pdf.Pdf.trailerdict <-
|
||||||
|
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Info" (Pdf.Indirect objnum);
|
||||||
|
pdf.Pdf.minor <-
|
||||||
|
max pdf.Pdf.minor version
|
||||||
|
end;
|
||||||
|
if xmp_also then
|
||||||
|
begin match get_metadata pdf with
|
||||||
|
None -> pdf
|
||||||
|
| Some xmldata ->
|
||||||
|
let xmp_date = function Pdf.String s -> Pdf.String (xmp_date s) | _ -> failwith "xmp_date not a string" in
|
||||||
|
let changes, value =
|
||||||
|
match key with
|
||||||
|
| "/Producer" -> [(adobe, "Producer")], value
|
||||||
|
| "/Creator" -> [(adobe, "Creator"); (xmp, "CreatorTool"); (dc, "creator")], value
|
||||||
|
| "/Author" -> [(adobe, "Author")], value
|
||||||
|
| "/Title" -> [(adobe, "Title"); (dc, "title")], value
|
||||||
|
| "/Subject" -> [(adobe, "Subject"); (dc, "subject")], value
|
||||||
|
| "/Keywords" -> [(adobe, "Keywords")], value
|
||||||
|
| "/CreationDate" -> [(adobe, "CreationDate"); (xmp, "CreateDate")], xmp_date value
|
||||||
|
| "/ModDate" -> [(adobe, "ModDate"); (xmp, "ModifyDate")], xmp_date value
|
||||||
|
| "/Trapped" -> [(adobe, "Trapped")], value
|
||||||
|
| _ -> failwith "Unknown call to set_pdf_info"
|
||||||
|
in
|
||||||
|
set_metadata_from_bytes
|
||||||
|
true
|
||||||
|
(set_pdf_info_xml_many changes value xmldata pdf)
|
||||||
|
pdf
|
||||||
|
end
|
||||||
|
else
|
||||||
|
pdf
|
||||||
|
|
||||||
|
let expand_date = function
|
||||||
|
| "now" ->
|
||||||
|
begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with
|
||||||
|
| Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S"
|
||||||
|
| _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S"
|
||||||
|
end
|
||||||
|
| x -> x
|
||||||
|
|
||||||
|
(* Set metadata date *)
|
||||||
|
let set_metadata_date pdf date =
|
||||||
|
match get_metadata pdf with
|
||||||
|
None -> pdf
|
||||||
|
| Some xmldata ->
|
||||||
|
let changes= [(xmp, "MetadataDate")] in
|
||||||
|
let value = match date with "now" -> xmp_date (expand_date "now") | x -> x in
|
||||||
|
set_metadata_from_bytes
|
||||||
|
true
|
||||||
|
(set_pdf_info_xml_many changes (Pdf.String value) xmldata pdf)
|
||||||
|
pdf
|
||||||
|
|
||||||
|
|
||||||
|
(* \section{Copy an /ID from one file to another} *)
|
||||||
|
let copy_id keepversion copyfrom copyto =
|
||||||
|
match Pdf.lookup_direct copyfrom "/ID" copyfrom.Pdf.trailerdict with
|
||||||
|
| None -> copyto (* error "Source PDF file has no /ID entry to copy from" *)
|
||||||
|
| Some id ->
|
||||||
|
copyto.Pdf.trailerdict <-
|
||||||
|
Pdf.add_dict_entry copyto.Pdf.trailerdict "/ID" id;
|
||||||
|
copyto.Pdf.minor <-
|
||||||
|
if keepversion then copyto.Pdf.minor else max copyto.Pdf.minor 1;
|
||||||
|
copyto
|
||||||
|
|
||||||
|
let replacements pdf =
|
||||||
|
let info = get_info_utf8 pdf in
|
||||||
|
[("CREATEDATE", xmp_date (let i = info "/CreationDate" in if i = "" then expand_date "now" else i));
|
||||||
|
("MODDATE", xmp_date (let i = info "/ModDate" in if i = "" then expand_date "now" else i));
|
||||||
|
("PRODUCER", info "/Producer");
|
||||||
|
("CREATOR", info "/Creator");
|
||||||
|
("TITLE", info "/Title");
|
||||||
|
("SUBJECT", info "/Subject");
|
||||||
|
("AUTHOR", info "/Author");
|
||||||
|
("KEYWORDS", info "/Keywords");
|
||||||
|
("TRAPPED", info "/Trapped");
|
||||||
|
("METADATADATE", xmp_date (expand_date "now"))]
|
||||||
|
|
||||||
|
let create_metadata pdf =
|
||||||
|
let xmp = ref xmp_template in
|
||||||
|
iter
|
||||||
|
(fun (s, r) -> xmp := string_replace_all s r !xmp)
|
||||||
|
(replacements pdf);
|
||||||
|
set_metadata_from_bytes false (bytes_of_string !xmp) pdf
|
||||||
|
|
||||||
|
(* \section{Set viewer preferences} *)
|
||||||
|
let set_viewer_preference (key, value, version) pdf =
|
||||||
|
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
|
| Some catalog ->
|
||||||
|
let viewer_preferences =
|
||||||
|
match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
|
||||||
|
| Some d -> d
|
||||||
|
| None -> Pdf.Dictionary []
|
||||||
|
in
|
||||||
|
let viewer_preferences' =
|
||||||
|
Pdf.add_dict_entry viewer_preferences key value
|
||||||
|
in
|
||||||
|
let catalog' =
|
||||||
|
Pdf.add_dict_entry catalog "/ViewerPreferences" viewer_preferences'
|
||||||
|
in
|
||||||
|
let catalognum = Pdf.addobj pdf catalog' in
|
||||||
|
let trailerdict' =
|
||||||
|
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
|
||||||
|
in
|
||||||
|
{pdf with
|
||||||
|
Pdf.minor = max pdf.Pdf.minor version;
|
||||||
|
Pdf.root = catalognum;
|
||||||
|
Pdf.trailerdict = trailerdict'}
|
||||||
|
| None -> error "bad root"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(* \section{Set page layout} *)
|
||||||
|
let set_page_layout pdf s =
|
||||||
|
match s with
|
||||||
|
| "SinglePage" | "OneColumn" | "TwoColumnLeft"
|
||||||
|
| "TwoColumnRight" | "TwoPageLeft" | "TwoPageRight" ->
|
||||||
|
begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
|
| Some catalog ->
|
||||||
|
let catalog' =
|
||||||
|
Pdf.add_dict_entry catalog "/PageLayout" (Pdf.Name ("/" ^ s))
|
||||||
|
in
|
||||||
|
let catalognum = Pdf.addobj pdf catalog' in
|
||||||
|
let trailerdict' =
|
||||||
|
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
|
||||||
|
in
|
||||||
|
{pdf with
|
||||||
|
Pdf.root = catalognum;
|
||||||
|
Pdf.trailerdict = trailerdict'}
|
||||||
|
| None -> error "bad root"
|
||||||
|
end
|
||||||
|
| _ -> error "Unknown page layout"
|
||||||
|
|
||||||
|
|
||||||
|
(* \section{Set page mode} *)
|
||||||
|
let set_page_mode pdf s =
|
||||||
|
match s with
|
||||||
|
| "UseNone" | "UseOutlines" | "UseThumbs"
|
||||||
|
| "FullScreen" | "UseOC" | "UseAttachments" ->
|
||||||
|
begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
|
| Some catalog ->
|
||||||
|
let catalog' =
|
||||||
|
Pdf.add_dict_entry catalog "/PageMode" (Pdf.Name ("/" ^ s))
|
||||||
|
in
|
||||||
|
let catalognum = Pdf.addobj pdf catalog' in
|
||||||
|
let trailerdict' =
|
||||||
|
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
|
||||||
|
in
|
||||||
|
{pdf with
|
||||||
|
Pdf.root = catalognum;
|
||||||
|
Pdf.trailerdict = trailerdict'}
|
||||||
|
| None -> error "bad root"
|
||||||
|
end
|
||||||
|
| _ -> error "Unknown page mode"
|
||||||
|
|
||||||
|
(* Set open action *)
|
||||||
|
let set_open_action pdf fit pagenumber =
|
||||||
|
if pagenumber > Pdfpage.endpage pdf || pagenumber < 0 then
|
||||||
|
raise (error "set_open_action: invalid page number")
|
||||||
|
else
|
||||||
|
let pageobjectnumber = select pagenumber (Pdf.page_reference_numbers pdf) in
|
||||||
|
let destination =
|
||||||
|
if fit then
|
||||||
|
Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/Fit"]
|
||||||
|
else
|
||||||
|
Pdf.Array [Pdf.Indirect pageobjectnumber; Pdf.Name "/XYZ"; Pdf.Null; Pdf.Null; Pdf.Null]
|
||||||
|
in
|
||||||
|
let open_action =
|
||||||
|
Pdf.Dictionary [("/D", destination); ("/S", Pdf.Name "/GoTo")]
|
||||||
|
in
|
||||||
|
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
|
| Some catalog ->
|
||||||
|
let catalog' =
|
||||||
|
Pdf.add_dict_entry catalog "/OpenAction" open_action
|
||||||
|
in
|
||||||
|
let catalognum = Pdf.addobj pdf catalog' in
|
||||||
|
let trailerdict' =
|
||||||
|
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect catalognum)
|
||||||
|
in
|
||||||
|
{pdf with Pdf.root = catalognum; Pdf.trailerdict = trailerdict'}
|
||||||
|
| None -> error "bad root"
|
||||||
|
|
||||||
|
|
||||||
|
let set_version v pdf =
|
||||||
|
pdf.Pdf.minor <- v
|
74
cpdfmetadata.mli
Normal file
74
cpdfmetadata.mli
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
|
||||||
|
(** {2 Types and Exceptions} *)
|
||||||
|
|
||||||
|
(** Possible output encodings for some function. [Raw] does no processing at
|
||||||
|
all - the PDF string is output as-is. [UTF8] converts loslessly to UTF8.
|
||||||
|
[Stripped] extracts the unicode codepoints and returns only those which
|
||||||
|
correspond to 7 bit ASCII. *)
|
||||||
|
type encoding = Raw | UTF8 | Stripped
|
||||||
|
|
||||||
|
val encode_output : encoding -> string -> string
|
||||||
|
|
||||||
|
(** {2 Metadata and settings} *)
|
||||||
|
|
||||||
|
(** [copy_id keepversion copyfrom copyto] copies the ID, if any, from
|
||||||
|
[copyfrom] to [copyto]. If [keepversion] is true, the PDF version of [copyto]
|
||||||
|
won't be affected. *)
|
||||||
|
val copy_id : bool -> Pdf.t -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** [set_pdf_info (key, value, version)] sets the entry [key] in the /Info directory, updating
|
||||||
|
the PDF minor version to [version].*)
|
||||||
|
val set_pdf_info : ?xmp_also:bool -> ?xmp_just_set:bool -> (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
val get_xmp_info : Pdf.t -> string -> string
|
||||||
|
|
||||||
|
(** [set_pdf_info (key, value, version)] sets the entry [key] in the
|
||||||
|
/ViewerPreferences directory, updating the PDF minor version to [version].*)
|
||||||
|
val set_viewer_preference : (string * Pdf.pdfobject * int) -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
|
||||||
|
val set_page_layout : Pdf.t -> string -> Pdf.t
|
||||||
|
|
||||||
|
(** Set the page layout to the given name (sans slash) e.g SinglePage *)
|
||||||
|
val set_page_mode : Pdf.t -> string -> Pdf.t
|
||||||
|
|
||||||
|
(** Set the open action. If the boolean is true, /Fit will be used, otherwise /XYZ *)
|
||||||
|
val set_open_action : Pdf.t -> bool -> int -> Pdf.t
|
||||||
|
|
||||||
|
(** Set the PDF version number *)
|
||||||
|
val set_version : int -> Pdf.t -> unit
|
||||||
|
|
||||||
|
(** Given a PDF, returns a function which can lookup a given dictionary entry
|
||||||
|
from the /Info dictionary, returning it as a UTF8 string *)
|
||||||
|
val get_info_utf8 : Pdf.t -> string -> string
|
||||||
|
|
||||||
|
(** Output to standard output general information about a PDF. *)
|
||||||
|
val output_info : encoding -> Pdf.t -> unit
|
||||||
|
|
||||||
|
(** Output to standard output information from any XMP metadata stream in a PDF. *)
|
||||||
|
val output_xmp_info : encoding -> Pdf.t -> unit
|
||||||
|
|
||||||
|
(** Create XMP metadata from scratch *)
|
||||||
|
val create_metadata : Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** {2 XML Metadata} *)
|
||||||
|
|
||||||
|
(** [set_metadata keepversion filename pdf] sets the XML metadata of a PDF to the contents of [filename]. If [keepversion] is true, the PDF version will not be altered. *)
|
||||||
|
val set_metadata : bool -> string -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** The same, but the content comes from [bytes]. *)
|
||||||
|
val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** Remove the metadata from a file *)
|
||||||
|
val remove_metadata : Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
(** Extract metadata to a [Pdfio.bytes] *)
|
||||||
|
val get_metadata : Pdf.t -> Pdfio.bytes option
|
||||||
|
|
||||||
|
(** Print metadate to stdout *)
|
||||||
|
val print_metadata : Pdf.t -> unit
|
||||||
|
|
||||||
|
(** Set the metadata date *)
|
||||||
|
val set_metadata_date : Pdf.t -> string -> Pdf.t
|
||||||
|
|
||||||
|
val expand_date : string -> string
|
Loading…
x
Reference in New Issue
Block a user