This commit is contained in:
John Whitington 2021-12-21 15:33:56 +00:00
parent 9bdeccb343
commit 27d13d9e3b
8 changed files with 121 additions and 114 deletions

View File

@ -4,7 +4,7 @@ MODS = cpdfyojson cpdfxmlm \
cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \ cpdfattach cpdfpagespec cpdfposition cpdfpresent cpdfmetadata \
cpdfbookmarks cpdfpage cpdfaddtext cpdf cpdfimage cpdffont cpdftype \ cpdfbookmarks cpdfpage cpdfaddtext cpdf cpdfimage cpdffont cpdftype \
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfdraft cpdfspot \ cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfdraft cpdfspot \
cpdfpagelabels cpdfcreate cpdfannot cpdfcommand cpdfpagelabels cpdfcreate cpdfannot cpdfxobject cpdfcommand
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml

111
cpdf.ml
View File

@ -424,31 +424,6 @@ let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isov
Pdfpage.resources = Pdfpage.resources =
combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources} combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources}
(* Alter bookmark destinations given a hash table of (old page reference
* number, new page reference number) pairings *)
let change_destination t = function
Pdfdest.XYZ (Pdfdest.PageObject p, a, b, c) ->
Pdfdest.XYZ (Pdfdest.PageObject (Hashtbl.find t p), a, b, c)
| Pdfdest.Fit (Pdfdest.PageObject p) ->
Pdfdest.Fit (Pdfdest.PageObject (Hashtbl.find t p))
| Pdfdest.FitH (Pdfdest.PageObject p, x) ->
Pdfdest.FitH (Pdfdest.PageObject (Hashtbl.find t p), x)
| Pdfdest.FitV (Pdfdest.PageObject p, x) ->
Pdfdest.FitV (Pdfdest.PageObject (Hashtbl.find t p), x)
| Pdfdest.FitR (Pdfdest.PageObject p, a, b, c, d) ->
Pdfdest.FitR (Pdfdest.PageObject (Hashtbl.find t p), a, b, c, d)
| Pdfdest.FitB (Pdfdest.PageObject p) ->
Pdfdest.Fit (Pdfdest.PageObject (Hashtbl.find t p))
| Pdfdest.FitBH (Pdfdest.PageObject p, x) ->
Pdfdest.FitBH (Pdfdest.PageObject (Hashtbl.find t p), x)
| Pdfdest.FitBV (Pdfdest.PageObject p, x) ->
Pdfdest.FitBV (Pdfdest.PageObject (Hashtbl.find t p), x)
| x -> x
let change_bookmark t m =
{m with Pdfmarks.target =
try change_destination t m.Pdfmarks.target with Not_found -> m.Pdfmarks.target}
let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf = let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf =
let prefix = Pdfpage.shortest_unused_prefix pdf in let prefix = Pdfpage.shortest_unused_prefix pdf in
Pdfpage.add_prefix over prefix; Pdfpage.add_prefix over prefix;
@ -492,93 +467,9 @@ let stamp relative_to_cropbox position topline midline fast scale_to_fit isover
in in
let new_refnumbers = Pdf.page_reference_numbers changed in let new_refnumbers = Pdf.page_reference_numbers changed in
let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in
let new_marks = map (change_bookmark changetable) marks in let new_marks = map (Cpdfbookmarks.change_bookmark changetable) marks in
Pdfmarks.add_bookmarks new_marks changed Pdfmarks.add_bookmarks new_marks changed
let add_xobject_to_page xobjname xobjnum page pdf =
let resources' =
let xobjects =
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some xobjects -> xobjects
| _ -> Pdf.Dictionary []
in
let new_xobjects =
Pdf.add_dict_entry xobjects xobjname (Pdf.Indirect xobjnum)
in
Pdf.add_dict_entry page.Pdfpage.resources "/XObject" new_xobjects
in
{page with Pdfpage.resources = resources'}
let add_page_as_xobject pdf range page name =
let xobject_data =
match Pdfops.stream_of_ops (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) with
Pdf.Stream {contents = (_, Got b)} -> b
| _ -> assert false
in
let xobject_dict =
["/Type", Pdf.Name "/XObject";
"/Subtype", Pdf.Name "/Form";
"/BBox", page.Pdfpage.mediabox;
"/Resources", page.Pdfpage.resources;
"/Length", Pdf.Integer (bytes_size xobject_data)]
in
let xobject =
Pdf.Stream {contents = (Pdf.Dictionary xobject_dict, Pdf.Got xobject_data)}
in
let xobject_objnum = Pdf.addobj pdf xobject in
let pages = Pdfpage.pages_of_pagetree pdf in
let new_pages =
List.map2
(fun page pnum ->
if mem pnum range
then add_xobject_to_page name xobject_objnum page pdf
else page)
pages
(indx pages)
in
Pdfpage.change_pages true pdf new_pages
(* n.b the use of change_pages here ensures no inheritable resources in the
* stamp, therefore creation of xobject from page is as simple as expected. *)
let stamp_as_xobject pdf range over =
let prefix = Pdfpage.shortest_unused_prefix pdf in
Pdfpage.add_prefix over prefix;
let marks = Pdfmarks.read_bookmarks pdf in
let marks_refnumbers = Pdf.page_reference_numbers pdf in
let pdf = Pdfmarks.remove_bookmarks pdf in
let over = Pdfmarks.remove_bookmarks over in
let pageseqs = ilist 1 (Pdfpage.endpage pdf) in
let over_firstpage_pdf =
match Pdfpage.pages_of_pagetree over with
| [] -> error "empty PDF"
| h::_ -> Pdfpage.change_pages ~changes:[(1, 1)] true over [h]
in
let merged =
Pdfmerge.merge_pdfs
false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]]
in
let merged =
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
in
let merged = Cpdfmetadata.copy_id true pdf merged in
let merged_pages = Pdfpage.pages_of_pagetree merged in
let under_pages, over_page =
all_but_last merged_pages, last merged_pages
in
let new_pages = under_pages in
let changed =
let changes =
map (fun x -> (x, x)) (ilist 1 (length new_pages))
in
Pdfpage.change_pages ~changes true merged new_pages
in
let new_refnumbers = Pdf.page_reference_numbers changed in
let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in
let new_marks = map (change_bookmark changetable) marks in
let pdf = Pdfmarks.add_bookmarks new_marks changed in
let name = "/" ^ Pdfpage.shortest_unused_prefix pdf ^ "CPDFXObj" in
(add_page_as_xobject pdf range over_page name, name)
(* Combine pages from two PDFs. For now, assume equal length. *) (* Combine pages from two PDFs. For now, assume equal length. *)
(* If [over] has more pages than [under], chop the excess. If the converse, pad (* If [over] has more pages than [under], chop the excess. If the converse, pad

View File

@ -121,8 +121,6 @@ val process_xobjects : Pdf.t -> Pdfpage.t -> (Pdf.t -> Pdf.pdfobject -> Pdf.pdfo
val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t
val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string
val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit
val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit

View File

@ -393,3 +393,28 @@ let bookmarks_open_to_level n pdf =
in in
Pdfmarks.add_bookmarks newmarks pdf Pdfmarks.add_bookmarks newmarks pdf
(* Alter bookmark destinations given a hash table of (old page reference
* number, new page reference number) pairings *)
let change_destination t = function
Pdfdest.XYZ (Pdfdest.PageObject p, a, b, c) ->
Pdfdest.XYZ (Pdfdest.PageObject (Hashtbl.find t p), a, b, c)
| Pdfdest.Fit (Pdfdest.PageObject p) ->
Pdfdest.Fit (Pdfdest.PageObject (Hashtbl.find t p))
| Pdfdest.FitH (Pdfdest.PageObject p, x) ->
Pdfdest.FitH (Pdfdest.PageObject (Hashtbl.find t p), x)
| Pdfdest.FitV (Pdfdest.PageObject p, x) ->
Pdfdest.FitV (Pdfdest.PageObject (Hashtbl.find t p), x)
| Pdfdest.FitR (Pdfdest.PageObject p, a, b, c, d) ->
Pdfdest.FitR (Pdfdest.PageObject (Hashtbl.find t p), a, b, c, d)
| Pdfdest.FitB (Pdfdest.PageObject p) ->
Pdfdest.Fit (Pdfdest.PageObject (Hashtbl.find t p))
| Pdfdest.FitBH (Pdfdest.PageObject p, x) ->
Pdfdest.FitBH (Pdfdest.PageObject (Hashtbl.find t p), x)
| Pdfdest.FitBV (Pdfdest.PageObject p, x) ->
Pdfdest.FitBV (Pdfdest.PageObject (Hashtbl.find t p), x)
| x -> x
let change_bookmark t m =
{m with Pdfmarks.target =
try change_destination t m.Pdfmarks.target with Not_found -> m.Pdfmarks.target}

View File

@ -18,4 +18,7 @@ val name_of_spec : Cpdfmetadata.encoding ->
Pdf.t -> int -> string -> int -> string -> int -> int -> string Pdf.t -> int -> string -> int -> string -> int -> int -> string
val add_bookmark_title : string -> bool -> Pdf.t -> Pdf.t val add_bookmark_title : string -> bool -> Pdf.t -> Pdf.t
val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t
val change_bookmark : (int, int) Hashtbl.t -> Pdfmarks.t -> Pdfmarks.t

View File

@ -3900,7 +3900,7 @@ let go () =
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
let pdf, xobj_name = let pdf, xobj_name =
Cpdf.stamp_as_xobject pdf range stamp_pdf Cpdfxobject.stamp_as_xobject pdf range stamp_pdf
in in
Printf.printf "%s\n" xobj_name; Printf.printf "%s\n" xobj_name;
flush stdout; flush stdout;

88
cpdfxobject.ml Normal file
View File

@ -0,0 +1,88 @@
open Pdfutil
open Pdfio
open Cpdferror
let add_xobject_to_page xobjname xobjnum page pdf =
let resources' =
let xobjects =
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some xobjects -> xobjects
| _ -> Pdf.Dictionary []
in
let new_xobjects =
Pdf.add_dict_entry xobjects xobjname (Pdf.Indirect xobjnum)
in
Pdf.add_dict_entry page.Pdfpage.resources "/XObject" new_xobjects
in
{page with Pdfpage.resources = resources'}
let add_page_as_xobject pdf range page name =
let xobject_data =
match Pdfops.stream_of_ops (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content) with
Pdf.Stream {contents = (_, Got b)} -> b
| _ -> assert false
in
let xobject_dict =
["/Type", Pdf.Name "/XObject";
"/Subtype", Pdf.Name "/Form";
"/BBox", page.Pdfpage.mediabox;
"/Resources", page.Pdfpage.resources;
"/Length", Pdf.Integer (bytes_size xobject_data)]
in
let xobject =
Pdf.Stream {contents = (Pdf.Dictionary xobject_dict, Pdf.Got xobject_data)}
in
let xobject_objnum = Pdf.addobj pdf xobject in
let pages = Pdfpage.pages_of_pagetree pdf in
let new_pages =
List.map2
(fun page pnum ->
if mem pnum range
then add_xobject_to_page name xobject_objnum page pdf
else page)
pages
(indx pages)
in
Pdfpage.change_pages true pdf new_pages
(* n.b the use of change_pages here ensures no inheritable resources in the
* stamp, therefore creation of xobject from page is as simple as expected. *)
let stamp_as_xobject pdf range over =
let prefix = Pdfpage.shortest_unused_prefix pdf in
Pdfpage.add_prefix over prefix;
let marks = Pdfmarks.read_bookmarks pdf in
let marks_refnumbers = Pdf.page_reference_numbers pdf in
let pdf = Pdfmarks.remove_bookmarks pdf in
let over = Pdfmarks.remove_bookmarks over in
let pageseqs = ilist 1 (Pdfpage.endpage pdf) in
let over_firstpage_pdf =
match Pdfpage.pages_of_pagetree over with
| [] -> error "empty PDF"
| h::_ -> Pdfpage.change_pages ~changes:[(1, 1)] true over [h]
in
let merged =
Pdfmerge.merge_pdfs
false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]]
in
let merged =
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
in
let merged = Cpdfmetadata.copy_id true pdf merged in
let merged_pages = Pdfpage.pages_of_pagetree merged in
let under_pages, over_page =
all_but_last merged_pages, last merged_pages
in
let new_pages = under_pages in
let changed =
let changes =
map (fun x -> (x, x)) (ilist 1 (length new_pages))
in
Pdfpage.change_pages ~changes true merged new_pages
in
let new_refnumbers = Pdf.page_reference_numbers changed in
let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in
let new_marks = map (Cpdfbookmarks.change_bookmark changetable) marks in
let pdf = Pdfmarks.add_bookmarks new_marks changed in
let name = "/" ^ Pdfpage.shortest_unused_prefix pdf ^ "CPDFXObj" in
(add_page_as_xobject pdf range over_page name, name)

2
cpdfxobject.mli Normal file
View File

@ -0,0 +1,2 @@
val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string