This commit is contained in:
John Whitington
2021-10-18 16:22:38 +01:00
parent 52dc0585c8
commit b70615ec16
4 changed files with 124 additions and 133 deletions

View File

@@ -1,7 +1,7 @@
# Build the cpdf command line tools and top level # Build the cpdf command line tools and top level
MODS = cpdfyojson cpdfxmlm \ MODS = cpdfyojson cpdfxmlm \
cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \ cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
cpdfpagespec cpdfposition cpdffont cpdf cpdfcommand cpdfpagespec cpdfposition cpdf cpdffont cpdfcommand
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml

View File

@@ -2265,21 +2265,6 @@ let filesize name =
with with
_ -> 0 _ -> 0
(* Embed missing fonts with Ghostscript. *)
let embed_missing_fonts fi fo =
if args.path_to_ghostscript = "" then begin
Printf.eprintf "Please supply path to gs with -gs\n%!";
exit 2
end;
let gscall =
args.path_to_ghostscript ^
" -dNOPAUSE " ^ (if args.gs_quiet then "-dQUIET" else "") ^ " -sDEVICE=pdfwrite -sOUTPUTFILE=" ^ Filename.quote fo ^
" -dBATCH " ^ Filename.quote fi
in
match Sys.command gscall with
| 0 -> exit 0
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
(* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF (* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF
* cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *) * cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *)
let mend_pdf_file_with_ghostscript filename = let mend_pdf_file_with_ghostscript filename =
@@ -2780,68 +2765,6 @@ let split_pdf
enc 0 original_filename squeeze spec pdf enc 0 original_filename squeeze spec pdf
(splitinto chunksize (indx pdf_pages)) pdf_pages (splitinto chunksize (indx pdf_pages)) pdf_pages
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
let copy_font frompdf fontname fontpage range pdf =
match Pdf.renumber_pdfs [frompdf; pdf] with
| [] | [_] | _::_::_::_ -> assert false
| [frompdf; pdf] ->
(* 1. Get fontpage *)
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
let frompdf_page =
try select fontpage frompdf_pages with
Not_found -> failwith "copy_font: Page not found in input pdf"
in
(* 2. Extract font *)
let fonts =
match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with
| Some f -> f
| None -> failwith "copy_font: font not found"
in
let fromfont =
match Pdf.lookup_direct frompdf fontname fonts with
| Some f -> f
| None -> failwith "copy_font: font not found"
in
let basefontname =
match Pdf.lookup_direct frompdf "/BaseFont" fromfont with
| Some (Pdf.Name n) -> n
| _ -> "/CopyFontAddedNoName"
in
(* 3. Get all objects forming font (except main /Font one) *)
let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in
(* 4. Copy them to from frompdf to pdf. *)
iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers;
(* 5. Get pages from pdf *)
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
(* 6. Add the font to pages in range *)
let pages' =
map
(function (page, pagenum) ->
if mem pagenum range then
let font =
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
| Some f -> f
| None -> Pdf.Dictionary []
in
let font' =
match font with
| (Pdf.Dictionary _) as d ->
Pdf.add_dict_entry d basefontname fromfont
| _ -> failwith "copy_font: error"
in
let resources' =
Pdf.add_dict_entry page.Pdfpage.resources "/Font" font'
in
{page with
Pdfpage.resources = resources'}
else page)
(combine pdf_pages (indx pdf_pages));
in
(* 7. Put the pages back into the pdf, and return *)
let pdf, root = Pdfpage.add_pagetree pages' pdf in
Pdfpage.add_root root [] pdf
(* Extract Images. *) (* Extract Images. *)
let pnm_to_channel_24 channel w h s = let pnm_to_channel_24 channel w h s =
let white () = output_char channel ' ' let white () = output_char channel ' '
@@ -2989,59 +2912,7 @@ let copy_cropbox_to_mediabox pdf range =
pdf pdf
range range
(* Missing Fonts *)
let is_missing pdf dict =
match Pdf.lookup_direct pdf "/FontDescriptor" dict with
| None -> true
| Some d ->
match Pdf.lookup_direct pdf "/FontFile" d with
| Some _ -> false
| None ->
match Pdf.lookup_direct pdf "/FontFile2" d with
| Some _ -> false
| None ->
match Pdf.lookup_direct pdf "/FontFile3" d with
| Some _ -> false
| None -> true
let missing_font pdf page (name, dict) =
if is_missing pdf dict then
let subtype =
match Pdf.lookup_direct pdf "/Subtype" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
and basefont =
match Pdf.lookup_direct pdf "/BaseFont" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
and encoding =
match Pdf.lookup_direct pdf "/Encoding" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
in
if Pdftext.standard_font_of_name basefont <> None then () else
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
let missing_fonts pdf range =
Cpdf.iter_pages
(fun num page ->
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
| Some (Pdf.Dictionary fontdict) ->
(* Extract descendant fonts *)
let name_dict_pairs =
flatten
(map
(fun (name, dict) ->
match Pdf.lookup_direct pdf "/DescendantFonts" dict with
| Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts
| _ -> [(name, dict)])
fontdict)
in
iter (missing_font pdf num) name_dict_pairs
| _ -> ())
pdf
range
(* copy the contents of the box f to the box t. If mediabox_if_missing is set, (* copy the contents of the box f to the box t. If mediabox_if_missing is set,
the contents of the mediabox will be used if the from fox is not available. If the contents of the mediabox will be used if the from fox is not available. If
@@ -3475,7 +3346,7 @@ let go () =
| Some x -> x | Some x -> x
| None -> failwith "copy_font: no font name given" | None -> failwith "copy_font: no font name given"
in in
let outpdf = copy_font frompdf copyfontname args.copyfontpage range pdf in let outpdf = Cpdffont.copy_font frompdf copyfontname args.copyfontpage range pdf in
write_pdf true outpdf write_pdf true outpdf
| _ -> error "copyfont: bad command line" | _ -> error "copyfont: bad command line"
end end
@@ -4168,7 +4039,7 @@ let go () =
| Some MissingFonts -> | Some MissingFonts ->
let pdf = get_single_pdf args.op true in let pdf = get_single_pdf args.op true in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
missing_fonts pdf range Cpdffont.missing_fonts pdf range
| Some ExtractText -> | Some ExtractText ->
let pdf = get_single_pdf args.op true in let pdf = get_single_pdf args.op true in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
@@ -4222,7 +4093,7 @@ let go () =
File fo -> fo File fo -> fo
| _ -> error "Output method not supported for -embed-missing-fonts" | _ -> error "Output method not supported for -embed-missing-fonts"
in in
embed_missing_fonts fi fo Cpdffont.embed_missing_fonts args.path_to_ghostscript args.gs_quiet fi fo
| Some (BookmarksOpenToLevel n) -> | Some (BookmarksOpenToLevel n) ->
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
write_pdf false (bookmarks_open_to_level n pdf) write_pdf false (bookmarks_open_to_level n pdf)

View File

@@ -1,3 +1,5 @@
open Pdfutil
(* Embed missing fonts with Ghostscript. *) (* Embed missing fonts with Ghostscript. *)
let embed_missing_fonts path_to_ghostscript gs_quiet fi fo = let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
if path_to_ghostscript = "" then begin if path_to_ghostscript = "" then begin
@@ -12,3 +14,117 @@ let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
match Sys.command gscall with match Sys.command gscall with
| 0 -> exit 0 | 0 -> exit 0
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2 | _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
let copy_font frompdf fontname fontpage range pdf =
match Pdf.renumber_pdfs [frompdf; pdf] with
| [] | [_] | _::_::_::_ -> assert false
| [frompdf; pdf] ->
(* 1. Get fontpage *)
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
let frompdf_page =
try select fontpage frompdf_pages with
Not_found -> failwith "copy_font: Page not found in input pdf"
in
(* 2. Extract font *)
let fonts =
match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with
| Some f -> f
| None -> failwith "copy_font: font not found"
in
let fromfont =
match Pdf.lookup_direct frompdf fontname fonts with
| Some f -> f
| None -> failwith "copy_font: font not found"
in
let basefontname =
match Pdf.lookup_direct frompdf "/BaseFont" fromfont with
| Some (Pdf.Name n) -> n
| _ -> "/CopyFontAddedNoName"
in
(* 3. Get all objects forming font (except main /Font one) *)
let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in
(* 4. Copy them to from frompdf to pdf. *)
iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers;
(* 5. Get pages from pdf *)
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
(* 6. Add the font to pages in range *)
let pages' =
map
(function (page, pagenum) ->
if mem pagenum range then
let font =
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
| Some f -> f
| None -> Pdf.Dictionary []
in
let font' =
match font with
| (Pdf.Dictionary _) as d ->
Pdf.add_dict_entry d basefontname fromfont
| _ -> failwith "copy_font: error"
in
let resources' =
Pdf.add_dict_entry page.Pdfpage.resources "/Font" font'
in
{page with
Pdfpage.resources = resources'}
else page)
(combine pdf_pages (indx pdf_pages));
in
(* 7. Put the pages back into the pdf, and return *)
let pdf, root = Pdfpage.add_pagetree pages' pdf in
Pdfpage.add_root root [] pdf
(* Missing Fonts *)
let is_missing pdf dict =
match Pdf.lookup_direct pdf "/FontDescriptor" dict with
| None -> true
| Some d ->
match Pdf.lookup_direct pdf "/FontFile" d with
| Some _ -> false
| None ->
match Pdf.lookup_direct pdf "/FontFile2" d with
| Some _ -> false
| None ->
match Pdf.lookup_direct pdf "/FontFile3" d with
| Some _ -> false
| None -> true
let missing_font pdf page (name, dict) =
if is_missing pdf dict then
let subtype =
match Pdf.lookup_direct pdf "/Subtype" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
and basefont =
match Pdf.lookup_direct pdf "/BaseFont" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
and encoding =
match Pdf.lookup_direct pdf "/Encoding" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
in
if Pdftext.standard_font_of_name basefont <> None then () else
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
let missing_fonts pdf range =
Cpdf.iter_pages
(fun num page ->
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
| Some (Pdf.Dictionary fontdict) ->
(* Extract descendant fonts *)
let name_dict_pairs =
flatten
(map
(fun (name, dict) ->
match Pdf.lookup_direct pdf "/DescendantFonts" dict with
| Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts
| _ -> [(name, dict)])
fontdict)
in
iter (missing_font pdf num) name_dict_pairs
| _ -> ())
pdf
range

View File

@@ -1 +1,5 @@
val embed_missing_fonts : string -> bool -> string -> string -> unit val embed_missing_fonts : string -> bool -> string -> string -> unit
val copy_font : Pdf.t -> string -> int -> int list -> Pdf.t -> Pdf.t
val missing_fonts : Pdf.t -> int list -> unit