This commit is contained in:
John Whitington
2021-10-18 16:22:38 +01:00
parent 52dc0585c8
commit b70615ec16
4 changed files with 124 additions and 133 deletions

View File

@@ -1,7 +1,7 @@
# Build the cpdf command line tools and top level
MODS = cpdfyojson cpdfxmlm \
cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
cpdfpagespec cpdfposition cpdffont cpdf cpdfcommand
cpdfpagespec cpdfposition cpdf cpdffont cpdfcommand
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml

View File

@@ -2265,21 +2265,6 @@ let filesize name =
with
_ -> 0
(* Embed missing fonts with Ghostscript. *)
let embed_missing_fonts fi fo =
if args.path_to_ghostscript = "" then begin
Printf.eprintf "Please supply path to gs with -gs\n%!";
exit 2
end;
let gscall =
args.path_to_ghostscript ^
" -dNOPAUSE " ^ (if args.gs_quiet then "-dQUIET" else "") ^ " -sDEVICE=pdfwrite -sOUTPUTFILE=" ^ Filename.quote fo ^
" -dBATCH " ^ Filename.quote fi
in
match Sys.command gscall with
| 0 -> exit 0
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
(* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF
* cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *)
let mend_pdf_file_with_ghostscript filename =
@@ -2780,68 +2765,6 @@ let split_pdf
enc 0 original_filename squeeze spec pdf
(splitinto chunksize (indx pdf_pages)) pdf_pages
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
let copy_font frompdf fontname fontpage range pdf =
match Pdf.renumber_pdfs [frompdf; pdf] with
| [] | [_] | _::_::_::_ -> assert false
| [frompdf; pdf] ->
(* 1. Get fontpage *)
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
let frompdf_page =
try select fontpage frompdf_pages with
Not_found -> failwith "copy_font: Page not found in input pdf"
in
(* 2. Extract font *)
let fonts =
match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with
| Some f -> f
| None -> failwith "copy_font: font not found"
in
let fromfont =
match Pdf.lookup_direct frompdf fontname fonts with
| Some f -> f
| None -> failwith "copy_font: font not found"
in
let basefontname =
match Pdf.lookup_direct frompdf "/BaseFont" fromfont with
| Some (Pdf.Name n) -> n
| _ -> "/CopyFontAddedNoName"
in
(* 3. Get all objects forming font (except main /Font one) *)
let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in
(* 4. Copy them to from frompdf to pdf. *)
iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers;
(* 5. Get pages from pdf *)
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
(* 6. Add the font to pages in range *)
let pages' =
map
(function (page, pagenum) ->
if mem pagenum range then
let font =
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
| Some f -> f
| None -> Pdf.Dictionary []
in
let font' =
match font with
| (Pdf.Dictionary _) as d ->
Pdf.add_dict_entry d basefontname fromfont
| _ -> failwith "copy_font: error"
in
let resources' =
Pdf.add_dict_entry page.Pdfpage.resources "/Font" font'
in
{page with
Pdfpage.resources = resources'}
else page)
(combine pdf_pages (indx pdf_pages));
in
(* 7. Put the pages back into the pdf, and return *)
let pdf, root = Pdfpage.add_pagetree pages' pdf in
Pdfpage.add_root root [] pdf
(* Extract Images. *)
let pnm_to_channel_24 channel w h s =
let white () = output_char channel ' '
@@ -2989,59 +2912,7 @@ let copy_cropbox_to_mediabox pdf range =
pdf
range
(* Missing Fonts *)
let is_missing pdf dict =
match Pdf.lookup_direct pdf "/FontDescriptor" dict with
| None -> true
| Some d ->
match Pdf.lookup_direct pdf "/FontFile" d with
| Some _ -> false
| None ->
match Pdf.lookup_direct pdf "/FontFile2" d with
| Some _ -> false
| None ->
match Pdf.lookup_direct pdf "/FontFile3" d with
| Some _ -> false
| None -> true
let missing_font pdf page (name, dict) =
if is_missing pdf dict then
let subtype =
match Pdf.lookup_direct pdf "/Subtype" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
and basefont =
match Pdf.lookup_direct pdf "/BaseFont" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
and encoding =
match Pdf.lookup_direct pdf "/Encoding" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
in
if Pdftext.standard_font_of_name basefont <> None then () else
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
let missing_fonts pdf range =
Cpdf.iter_pages
(fun num page ->
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
| Some (Pdf.Dictionary fontdict) ->
(* Extract descendant fonts *)
let name_dict_pairs =
flatten
(map
(fun (name, dict) ->
match Pdf.lookup_direct pdf "/DescendantFonts" dict with
| Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts
| _ -> [(name, dict)])
fontdict)
in
iter (missing_font pdf num) name_dict_pairs
| _ -> ())
pdf
range
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
the contents of the mediabox will be used if the from fox is not available. If
@@ -3475,7 +3346,7 @@ let go () =
| Some x -> x
| None -> failwith "copy_font: no font name given"
in
let outpdf = copy_font frompdf copyfontname args.copyfontpage range pdf in
let outpdf = Cpdffont.copy_font frompdf copyfontname args.copyfontpage range pdf in
write_pdf true outpdf
| _ -> error "copyfont: bad command line"
end
@@ -4168,7 +4039,7 @@ let go () =
| Some MissingFonts ->
let pdf = get_single_pdf args.op true in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
missing_fonts pdf range
Cpdffont.missing_fonts pdf range
| Some ExtractText ->
let pdf = get_single_pdf args.op true in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
@@ -4222,7 +4093,7 @@ let go () =
File fo -> fo
| _ -> error "Output method not supported for -embed-missing-fonts"
in
embed_missing_fonts fi fo
Cpdffont.embed_missing_fonts args.path_to_ghostscript args.gs_quiet fi fo
| Some (BookmarksOpenToLevel n) ->
let pdf = get_single_pdf args.op false in
write_pdf false (bookmarks_open_to_level n pdf)

View File

@@ -1,3 +1,5 @@
open Pdfutil
(* Embed missing fonts with Ghostscript. *)
let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
if path_to_ghostscript = "" then begin
@@ -12,3 +14,117 @@ let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
match Sys.command gscall with
| 0 -> exit 0
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
let copy_font frompdf fontname fontpage range pdf =
match Pdf.renumber_pdfs [frompdf; pdf] with
| [] | [_] | _::_::_::_ -> assert false
| [frompdf; pdf] ->
(* 1. Get fontpage *)
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
let frompdf_page =
try select fontpage frompdf_pages with
Not_found -> failwith "copy_font: Page not found in input pdf"
in
(* 2. Extract font *)
let fonts =
match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with
| Some f -> f
| None -> failwith "copy_font: font not found"
in
let fromfont =
match Pdf.lookup_direct frompdf fontname fonts with
| Some f -> f
| None -> failwith "copy_font: font not found"
in
let basefontname =
match Pdf.lookup_direct frompdf "/BaseFont" fromfont with
| Some (Pdf.Name n) -> n
| _ -> "/CopyFontAddedNoName"
in
(* 3. Get all objects forming font (except main /Font one) *)
let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in
(* 4. Copy them to from frompdf to pdf. *)
iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers;
(* 5. Get pages from pdf *)
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
(* 6. Add the font to pages in range *)
let pages' =
map
(function (page, pagenum) ->
if mem pagenum range then
let font =
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
| Some f -> f
| None -> Pdf.Dictionary []
in
let font' =
match font with
| (Pdf.Dictionary _) as d ->
Pdf.add_dict_entry d basefontname fromfont
| _ -> failwith "copy_font: error"
in
let resources' =
Pdf.add_dict_entry page.Pdfpage.resources "/Font" font'
in
{page with
Pdfpage.resources = resources'}
else page)
(combine pdf_pages (indx pdf_pages));
in
(* 7. Put the pages back into the pdf, and return *)
let pdf, root = Pdfpage.add_pagetree pages' pdf in
Pdfpage.add_root root [] pdf
(* Missing Fonts *)
let is_missing pdf dict =
match Pdf.lookup_direct pdf "/FontDescriptor" dict with
| None -> true
| Some d ->
match Pdf.lookup_direct pdf "/FontFile" d with
| Some _ -> false
| None ->
match Pdf.lookup_direct pdf "/FontFile2" d with
| Some _ -> false
| None ->
match Pdf.lookup_direct pdf "/FontFile3" d with
| Some _ -> false
| None -> true
let missing_font pdf page (name, dict) =
if is_missing pdf dict then
let subtype =
match Pdf.lookup_direct pdf "/Subtype" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
and basefont =
match Pdf.lookup_direct pdf "/BaseFont" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
and encoding =
match Pdf.lookup_direct pdf "/Encoding" dict with
| Some (Pdf.Name n) -> n
| _ -> ""
in
if Pdftext.standard_font_of_name basefont <> None then () else
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
let missing_fonts pdf range =
Cpdf.iter_pages
(fun num page ->
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
| Some (Pdf.Dictionary fontdict) ->
(* Extract descendant fonts *)
let name_dict_pairs =
flatten
(map
(fun (name, dict) ->
match Pdf.lookup_direct pdf "/DescendantFonts" dict with
| Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts
| _ -> [(name, dict)])
fontdict)
in
iter (missing_font pdf num) name_dict_pairs
| _ -> ())
pdf
range

View File

@@ -1 +1,5 @@
val embed_missing_fonts : string -> bool -> string -> string -> unit
val copy_font : Pdf.t -> string -> int -> int list -> Pdf.t -> Pdf.t
val missing_fonts : Pdf.t -> int list -> unit