mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-06-05 22:09:39 +02:00
more
This commit is contained in:
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
|||||||
# Build the cpdf command line tools and top level
|
# Build the cpdf command line tools and top level
|
||||||
MODS = cpdfyojson cpdfxmlm \
|
MODS = cpdfyojson cpdfxmlm \
|
||||||
cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
||||||
cpdfpagespec cpdfposition cpdffont cpdf cpdfcommand
|
cpdfpagespec cpdfposition cpdf cpdffont cpdfcommand
|
||||||
|
|
||||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||||
|
|
||||||
|
135
cpdfcommand.ml
135
cpdfcommand.ml
@@ -2265,21 +2265,6 @@ let filesize name =
|
|||||||
with
|
with
|
||||||
_ -> 0
|
_ -> 0
|
||||||
|
|
||||||
(* Embed missing fonts with Ghostscript. *)
|
|
||||||
let embed_missing_fonts fi fo =
|
|
||||||
if args.path_to_ghostscript = "" then begin
|
|
||||||
Printf.eprintf "Please supply path to gs with -gs\n%!";
|
|
||||||
exit 2
|
|
||||||
end;
|
|
||||||
let gscall =
|
|
||||||
args.path_to_ghostscript ^
|
|
||||||
" -dNOPAUSE " ^ (if args.gs_quiet then "-dQUIET" else "") ^ " -sDEVICE=pdfwrite -sOUTPUTFILE=" ^ Filename.quote fo ^
|
|
||||||
" -dBATCH " ^ Filename.quote fi
|
|
||||||
in
|
|
||||||
match Sys.command gscall with
|
|
||||||
| 0 -> exit 0
|
|
||||||
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
|
|
||||||
|
|
||||||
(* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF
|
(* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF
|
||||||
* cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *)
|
* cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *)
|
||||||
let mend_pdf_file_with_ghostscript filename =
|
let mend_pdf_file_with_ghostscript filename =
|
||||||
@@ -2780,68 +2765,6 @@ let split_pdf
|
|||||||
enc 0 original_filename squeeze spec pdf
|
enc 0 original_filename squeeze spec pdf
|
||||||
(splitinto chunksize (indx pdf_pages)) pdf_pages
|
(splitinto chunksize (indx pdf_pages)) pdf_pages
|
||||||
|
|
||||||
|
|
||||||
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
|
|
||||||
let copy_font frompdf fontname fontpage range pdf =
|
|
||||||
match Pdf.renumber_pdfs [frompdf; pdf] with
|
|
||||||
| [] | [_] | _::_::_::_ -> assert false
|
|
||||||
| [frompdf; pdf] ->
|
|
||||||
(* 1. Get fontpage *)
|
|
||||||
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
|
|
||||||
let frompdf_page =
|
|
||||||
try select fontpage frompdf_pages with
|
|
||||||
Not_found -> failwith "copy_font: Page not found in input pdf"
|
|
||||||
in
|
|
||||||
(* 2. Extract font *)
|
|
||||||
let fonts =
|
|
||||||
match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with
|
|
||||||
| Some f -> f
|
|
||||||
| None -> failwith "copy_font: font not found"
|
|
||||||
in
|
|
||||||
let fromfont =
|
|
||||||
match Pdf.lookup_direct frompdf fontname fonts with
|
|
||||||
| Some f -> f
|
|
||||||
| None -> failwith "copy_font: font not found"
|
|
||||||
in
|
|
||||||
let basefontname =
|
|
||||||
match Pdf.lookup_direct frompdf "/BaseFont" fromfont with
|
|
||||||
| Some (Pdf.Name n) -> n
|
|
||||||
| _ -> "/CopyFontAddedNoName"
|
|
||||||
in
|
|
||||||
(* 3. Get all objects forming font (except main /Font one) *)
|
|
||||||
let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in
|
|
||||||
(* 4. Copy them to from frompdf to pdf. *)
|
|
||||||
iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers;
|
|
||||||
(* 5. Get pages from pdf *)
|
|
||||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
(* 6. Add the font to pages in range *)
|
|
||||||
let pages' =
|
|
||||||
map
|
|
||||||
(function (page, pagenum) ->
|
|
||||||
if mem pagenum range then
|
|
||||||
let font =
|
|
||||||
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
|
||||||
| Some f -> f
|
|
||||||
| None -> Pdf.Dictionary []
|
|
||||||
in
|
|
||||||
let font' =
|
|
||||||
match font with
|
|
||||||
| (Pdf.Dictionary _) as d ->
|
|
||||||
Pdf.add_dict_entry d basefontname fromfont
|
|
||||||
| _ -> failwith "copy_font: error"
|
|
||||||
in
|
|
||||||
let resources' =
|
|
||||||
Pdf.add_dict_entry page.Pdfpage.resources "/Font" font'
|
|
||||||
in
|
|
||||||
{page with
|
|
||||||
Pdfpage.resources = resources'}
|
|
||||||
else page)
|
|
||||||
(combine pdf_pages (indx pdf_pages));
|
|
||||||
in
|
|
||||||
(* 7. Put the pages back into the pdf, and return *)
|
|
||||||
let pdf, root = Pdfpage.add_pagetree pages' pdf in
|
|
||||||
Pdfpage.add_root root [] pdf
|
|
||||||
|
|
||||||
(* Extract Images. *)
|
(* Extract Images. *)
|
||||||
let pnm_to_channel_24 channel w h s =
|
let pnm_to_channel_24 channel w h s =
|
||||||
let white () = output_char channel ' '
|
let white () = output_char channel ' '
|
||||||
@@ -2989,59 +2912,7 @@ let copy_cropbox_to_mediabox pdf range =
|
|||||||
pdf
|
pdf
|
||||||
range
|
range
|
||||||
|
|
||||||
(* Missing Fonts *)
|
|
||||||
|
|
||||||
let is_missing pdf dict =
|
|
||||||
match Pdf.lookup_direct pdf "/FontDescriptor" dict with
|
|
||||||
| None -> true
|
|
||||||
| Some d ->
|
|
||||||
match Pdf.lookup_direct pdf "/FontFile" d with
|
|
||||||
| Some _ -> false
|
|
||||||
| None ->
|
|
||||||
match Pdf.lookup_direct pdf "/FontFile2" d with
|
|
||||||
| Some _ -> false
|
|
||||||
| None ->
|
|
||||||
match Pdf.lookup_direct pdf "/FontFile3" d with
|
|
||||||
| Some _ -> false
|
|
||||||
| None -> true
|
|
||||||
|
|
||||||
let missing_font pdf page (name, dict) =
|
|
||||||
if is_missing pdf dict then
|
|
||||||
let subtype =
|
|
||||||
match Pdf.lookup_direct pdf "/Subtype" dict with
|
|
||||||
| Some (Pdf.Name n) -> n
|
|
||||||
| _ -> ""
|
|
||||||
and basefont =
|
|
||||||
match Pdf.lookup_direct pdf "/BaseFont" dict with
|
|
||||||
| Some (Pdf.Name n) -> n
|
|
||||||
| _ -> ""
|
|
||||||
and encoding =
|
|
||||||
match Pdf.lookup_direct pdf "/Encoding" dict with
|
|
||||||
| Some (Pdf.Name n) -> n
|
|
||||||
| _ -> ""
|
|
||||||
in
|
|
||||||
if Pdftext.standard_font_of_name basefont <> None then () else
|
|
||||||
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
|
|
||||||
|
|
||||||
let missing_fonts pdf range =
|
|
||||||
Cpdf.iter_pages
|
|
||||||
(fun num page ->
|
|
||||||
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
|
||||||
| Some (Pdf.Dictionary fontdict) ->
|
|
||||||
(* Extract descendant fonts *)
|
|
||||||
let name_dict_pairs =
|
|
||||||
flatten
|
|
||||||
(map
|
|
||||||
(fun (name, dict) ->
|
|
||||||
match Pdf.lookup_direct pdf "/DescendantFonts" dict with
|
|
||||||
| Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts
|
|
||||||
| _ -> [(name, dict)])
|
|
||||||
fontdict)
|
|
||||||
in
|
|
||||||
iter (missing_font pdf num) name_dict_pairs
|
|
||||||
| _ -> ())
|
|
||||||
pdf
|
|
||||||
range
|
|
||||||
|
|
||||||
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
|
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
|
||||||
the contents of the mediabox will be used if the from fox is not available. If
|
the contents of the mediabox will be used if the from fox is not available. If
|
||||||
@@ -3475,7 +3346,7 @@ let go () =
|
|||||||
| Some x -> x
|
| Some x -> x
|
||||||
| None -> failwith "copy_font: no font name given"
|
| None -> failwith "copy_font: no font name given"
|
||||||
in
|
in
|
||||||
let outpdf = copy_font frompdf copyfontname args.copyfontpage range pdf in
|
let outpdf = Cpdffont.copy_font frompdf copyfontname args.copyfontpage range pdf in
|
||||||
write_pdf true outpdf
|
write_pdf true outpdf
|
||||||
| _ -> error "copyfont: bad command line"
|
| _ -> error "copyfont: bad command line"
|
||||||
end
|
end
|
||||||
@@ -4168,7 +4039,7 @@ let go () =
|
|||||||
| Some MissingFonts ->
|
| Some MissingFonts ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
missing_fonts pdf range
|
Cpdffont.missing_fonts pdf range
|
||||||
| Some ExtractText ->
|
| Some ExtractText ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
@@ -4222,7 +4093,7 @@ let go () =
|
|||||||
File fo -> fo
|
File fo -> fo
|
||||||
| _ -> error "Output method not supported for -embed-missing-fonts"
|
| _ -> error "Output method not supported for -embed-missing-fonts"
|
||||||
in
|
in
|
||||||
embed_missing_fonts fi fo
|
Cpdffont.embed_missing_fonts args.path_to_ghostscript args.gs_quiet fi fo
|
||||||
| Some (BookmarksOpenToLevel n) ->
|
| Some (BookmarksOpenToLevel n) ->
|
||||||
let pdf = get_single_pdf args.op false in
|
let pdf = get_single_pdf args.op false in
|
||||||
write_pdf false (bookmarks_open_to_level n pdf)
|
write_pdf false (bookmarks_open_to_level n pdf)
|
||||||
|
116
cpdffont.ml
116
cpdffont.ml
@@ -1,3 +1,5 @@
|
|||||||
|
open Pdfutil
|
||||||
|
|
||||||
(* Embed missing fonts with Ghostscript. *)
|
(* Embed missing fonts with Ghostscript. *)
|
||||||
let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
|
let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
|
||||||
if path_to_ghostscript = "" then begin
|
if path_to_ghostscript = "" then begin
|
||||||
@@ -12,3 +14,117 @@ let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
|
|||||||
match Sys.command gscall with
|
match Sys.command gscall with
|
||||||
| 0 -> exit 0
|
| 0 -> exit 0
|
||||||
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
|
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
|
||||||
|
|
||||||
|
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
|
||||||
|
let copy_font frompdf fontname fontpage range pdf =
|
||||||
|
match Pdf.renumber_pdfs [frompdf; pdf] with
|
||||||
|
| [] | [_] | _::_::_::_ -> assert false
|
||||||
|
| [frompdf; pdf] ->
|
||||||
|
(* 1. Get fontpage *)
|
||||||
|
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
|
||||||
|
let frompdf_page =
|
||||||
|
try select fontpage frompdf_pages with
|
||||||
|
Not_found -> failwith "copy_font: Page not found in input pdf"
|
||||||
|
in
|
||||||
|
(* 2. Extract font *)
|
||||||
|
let fonts =
|
||||||
|
match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with
|
||||||
|
| Some f -> f
|
||||||
|
| None -> failwith "copy_font: font not found"
|
||||||
|
in
|
||||||
|
let fromfont =
|
||||||
|
match Pdf.lookup_direct frompdf fontname fonts with
|
||||||
|
| Some f -> f
|
||||||
|
| None -> failwith "copy_font: font not found"
|
||||||
|
in
|
||||||
|
let basefontname =
|
||||||
|
match Pdf.lookup_direct frompdf "/BaseFont" fromfont with
|
||||||
|
| Some (Pdf.Name n) -> n
|
||||||
|
| _ -> "/CopyFontAddedNoName"
|
||||||
|
in
|
||||||
|
(* 3. Get all objects forming font (except main /Font one) *)
|
||||||
|
let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in
|
||||||
|
(* 4. Copy them to from frompdf to pdf. *)
|
||||||
|
iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers;
|
||||||
|
(* 5. Get pages from pdf *)
|
||||||
|
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
(* 6. Add the font to pages in range *)
|
||||||
|
let pages' =
|
||||||
|
map
|
||||||
|
(function (page, pagenum) ->
|
||||||
|
if mem pagenum range then
|
||||||
|
let font =
|
||||||
|
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
||||||
|
| Some f -> f
|
||||||
|
| None -> Pdf.Dictionary []
|
||||||
|
in
|
||||||
|
let font' =
|
||||||
|
match font with
|
||||||
|
| (Pdf.Dictionary _) as d ->
|
||||||
|
Pdf.add_dict_entry d basefontname fromfont
|
||||||
|
| _ -> failwith "copy_font: error"
|
||||||
|
in
|
||||||
|
let resources' =
|
||||||
|
Pdf.add_dict_entry page.Pdfpage.resources "/Font" font'
|
||||||
|
in
|
||||||
|
{page with
|
||||||
|
Pdfpage.resources = resources'}
|
||||||
|
else page)
|
||||||
|
(combine pdf_pages (indx pdf_pages));
|
||||||
|
in
|
||||||
|
(* 7. Put the pages back into the pdf, and return *)
|
||||||
|
let pdf, root = Pdfpage.add_pagetree pages' pdf in
|
||||||
|
Pdfpage.add_root root [] pdf
|
||||||
|
|
||||||
|
(* Missing Fonts *)
|
||||||
|
let is_missing pdf dict =
|
||||||
|
match Pdf.lookup_direct pdf "/FontDescriptor" dict with
|
||||||
|
| None -> true
|
||||||
|
| Some d ->
|
||||||
|
match Pdf.lookup_direct pdf "/FontFile" d with
|
||||||
|
| Some _ -> false
|
||||||
|
| None ->
|
||||||
|
match Pdf.lookup_direct pdf "/FontFile2" d with
|
||||||
|
| Some _ -> false
|
||||||
|
| None ->
|
||||||
|
match Pdf.lookup_direct pdf "/FontFile3" d with
|
||||||
|
| Some _ -> false
|
||||||
|
| None -> true
|
||||||
|
|
||||||
|
let missing_font pdf page (name, dict) =
|
||||||
|
if is_missing pdf dict then
|
||||||
|
let subtype =
|
||||||
|
match Pdf.lookup_direct pdf "/Subtype" dict with
|
||||||
|
| Some (Pdf.Name n) -> n
|
||||||
|
| _ -> ""
|
||||||
|
and basefont =
|
||||||
|
match Pdf.lookup_direct pdf "/BaseFont" dict with
|
||||||
|
| Some (Pdf.Name n) -> n
|
||||||
|
| _ -> ""
|
||||||
|
and encoding =
|
||||||
|
match Pdf.lookup_direct pdf "/Encoding" dict with
|
||||||
|
| Some (Pdf.Name n) -> n
|
||||||
|
| _ -> ""
|
||||||
|
in
|
||||||
|
if Pdftext.standard_font_of_name basefont <> None then () else
|
||||||
|
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
|
||||||
|
|
||||||
|
let missing_fonts pdf range =
|
||||||
|
Cpdf.iter_pages
|
||||||
|
(fun num page ->
|
||||||
|
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
||||||
|
| Some (Pdf.Dictionary fontdict) ->
|
||||||
|
(* Extract descendant fonts *)
|
||||||
|
let name_dict_pairs =
|
||||||
|
flatten
|
||||||
|
(map
|
||||||
|
(fun (name, dict) ->
|
||||||
|
match Pdf.lookup_direct pdf "/DescendantFonts" dict with
|
||||||
|
| Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts
|
||||||
|
| _ -> [(name, dict)])
|
||||||
|
fontdict)
|
||||||
|
in
|
||||||
|
iter (missing_font pdf num) name_dict_pairs
|
||||||
|
| _ -> ())
|
||||||
|
pdf
|
||||||
|
range
|
||||||
|
@@ -1 +1,5 @@
|
|||||||
val embed_missing_fonts : string -> bool -> string -> string -> unit
|
val embed_missing_fonts : string -> bool -> string -> string -> unit
|
||||||
|
|
||||||
|
val copy_font : Pdf.t -> string -> int -> int list -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
|
val missing_fonts : Pdf.t -> int list -> unit
|
||||||
|
Reference in New Issue
Block a user