mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-06-05 22:09:39 +02:00
more
This commit is contained in:
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
||||
# Build the cpdf command line tools and top level
|
||||
MODS = cpdfyojson cpdfxmlm \
|
||||
cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
||||
cpdfpagespec cpdfposition cpdffont cpdf cpdfcommand
|
||||
cpdfpagespec cpdfposition cpdf cpdffont cpdfcommand
|
||||
|
||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||
|
||||
|
135
cpdfcommand.ml
135
cpdfcommand.ml
@@ -2265,21 +2265,6 @@ let filesize name =
|
||||
with
|
||||
_ -> 0
|
||||
|
||||
(* Embed missing fonts with Ghostscript. *)
|
||||
let embed_missing_fonts fi fo =
|
||||
if args.path_to_ghostscript = "" then begin
|
||||
Printf.eprintf "Please supply path to gs with -gs\n%!";
|
||||
exit 2
|
||||
end;
|
||||
let gscall =
|
||||
args.path_to_ghostscript ^
|
||||
" -dNOPAUSE " ^ (if args.gs_quiet then "-dQUIET" else "") ^ " -sDEVICE=pdfwrite -sOUTPUTFILE=" ^ Filename.quote fo ^
|
||||
" -dBATCH " ^ Filename.quote fi
|
||||
in
|
||||
match Sys.command gscall with
|
||||
| 0 -> exit 0
|
||||
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
|
||||
|
||||
(* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF
|
||||
* cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *)
|
||||
let mend_pdf_file_with_ghostscript filename =
|
||||
@@ -2780,68 +2765,6 @@ let split_pdf
|
||||
enc 0 original_filename squeeze spec pdf
|
||||
(splitinto chunksize (indx pdf_pages)) pdf_pages
|
||||
|
||||
|
||||
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
|
||||
let copy_font frompdf fontname fontpage range pdf =
|
||||
match Pdf.renumber_pdfs [frompdf; pdf] with
|
||||
| [] | [_] | _::_::_::_ -> assert false
|
||||
| [frompdf; pdf] ->
|
||||
(* 1. Get fontpage *)
|
||||
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
|
||||
let frompdf_page =
|
||||
try select fontpage frompdf_pages with
|
||||
Not_found -> failwith "copy_font: Page not found in input pdf"
|
||||
in
|
||||
(* 2. Extract font *)
|
||||
let fonts =
|
||||
match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with
|
||||
| Some f -> f
|
||||
| None -> failwith "copy_font: font not found"
|
||||
in
|
||||
let fromfont =
|
||||
match Pdf.lookup_direct frompdf fontname fonts with
|
||||
| Some f -> f
|
||||
| None -> failwith "copy_font: font not found"
|
||||
in
|
||||
let basefontname =
|
||||
match Pdf.lookup_direct frompdf "/BaseFont" fromfont with
|
||||
| Some (Pdf.Name n) -> n
|
||||
| _ -> "/CopyFontAddedNoName"
|
||||
in
|
||||
(* 3. Get all objects forming font (except main /Font one) *)
|
||||
let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in
|
||||
(* 4. Copy them to from frompdf to pdf. *)
|
||||
iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers;
|
||||
(* 5. Get pages from pdf *)
|
||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||
(* 6. Add the font to pages in range *)
|
||||
let pages' =
|
||||
map
|
||||
(function (page, pagenum) ->
|
||||
if mem pagenum range then
|
||||
let font =
|
||||
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
||||
| Some f -> f
|
||||
| None -> Pdf.Dictionary []
|
||||
in
|
||||
let font' =
|
||||
match font with
|
||||
| (Pdf.Dictionary _) as d ->
|
||||
Pdf.add_dict_entry d basefontname fromfont
|
||||
| _ -> failwith "copy_font: error"
|
||||
in
|
||||
let resources' =
|
||||
Pdf.add_dict_entry page.Pdfpage.resources "/Font" font'
|
||||
in
|
||||
{page with
|
||||
Pdfpage.resources = resources'}
|
||||
else page)
|
||||
(combine pdf_pages (indx pdf_pages));
|
||||
in
|
||||
(* 7. Put the pages back into the pdf, and return *)
|
||||
let pdf, root = Pdfpage.add_pagetree pages' pdf in
|
||||
Pdfpage.add_root root [] pdf
|
||||
|
||||
(* Extract Images. *)
|
||||
let pnm_to_channel_24 channel w h s =
|
||||
let white () = output_char channel ' '
|
||||
@@ -2989,59 +2912,7 @@ let copy_cropbox_to_mediabox pdf range =
|
||||
pdf
|
||||
range
|
||||
|
||||
(* Missing Fonts *)
|
||||
|
||||
let is_missing pdf dict =
|
||||
match Pdf.lookup_direct pdf "/FontDescriptor" dict with
|
||||
| None -> true
|
||||
| Some d ->
|
||||
match Pdf.lookup_direct pdf "/FontFile" d with
|
||||
| Some _ -> false
|
||||
| None ->
|
||||
match Pdf.lookup_direct pdf "/FontFile2" d with
|
||||
| Some _ -> false
|
||||
| None ->
|
||||
match Pdf.lookup_direct pdf "/FontFile3" d with
|
||||
| Some _ -> false
|
||||
| None -> true
|
||||
|
||||
let missing_font pdf page (name, dict) =
|
||||
if is_missing pdf dict then
|
||||
let subtype =
|
||||
match Pdf.lookup_direct pdf "/Subtype" dict with
|
||||
| Some (Pdf.Name n) -> n
|
||||
| _ -> ""
|
||||
and basefont =
|
||||
match Pdf.lookup_direct pdf "/BaseFont" dict with
|
||||
| Some (Pdf.Name n) -> n
|
||||
| _ -> ""
|
||||
and encoding =
|
||||
match Pdf.lookup_direct pdf "/Encoding" dict with
|
||||
| Some (Pdf.Name n) -> n
|
||||
| _ -> ""
|
||||
in
|
||||
if Pdftext.standard_font_of_name basefont <> None then () else
|
||||
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
|
||||
|
||||
let missing_fonts pdf range =
|
||||
Cpdf.iter_pages
|
||||
(fun num page ->
|
||||
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
||||
| Some (Pdf.Dictionary fontdict) ->
|
||||
(* Extract descendant fonts *)
|
||||
let name_dict_pairs =
|
||||
flatten
|
||||
(map
|
||||
(fun (name, dict) ->
|
||||
match Pdf.lookup_direct pdf "/DescendantFonts" dict with
|
||||
| Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts
|
||||
| _ -> [(name, dict)])
|
||||
fontdict)
|
||||
in
|
||||
iter (missing_font pdf num) name_dict_pairs
|
||||
| _ -> ())
|
||||
pdf
|
||||
range
|
||||
|
||||
(* copy the contents of the box f to the box t. If mediabox_if_missing is set,
|
||||
the contents of the mediabox will be used if the from fox is not available. If
|
||||
@@ -3475,7 +3346,7 @@ let go () =
|
||||
| Some x -> x
|
||||
| None -> failwith "copy_font: no font name given"
|
||||
in
|
||||
let outpdf = copy_font frompdf copyfontname args.copyfontpage range pdf in
|
||||
let outpdf = Cpdffont.copy_font frompdf copyfontname args.copyfontpage range pdf in
|
||||
write_pdf true outpdf
|
||||
| _ -> error "copyfont: bad command line"
|
||||
end
|
||||
@@ -4168,7 +4039,7 @@ let go () =
|
||||
| Some MissingFonts ->
|
||||
let pdf = get_single_pdf args.op true in
|
||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||
missing_fonts pdf range
|
||||
Cpdffont.missing_fonts pdf range
|
||||
| Some ExtractText ->
|
||||
let pdf = get_single_pdf args.op true in
|
||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||
@@ -4222,7 +4093,7 @@ let go () =
|
||||
File fo -> fo
|
||||
| _ -> error "Output method not supported for -embed-missing-fonts"
|
||||
in
|
||||
embed_missing_fonts fi fo
|
||||
Cpdffont.embed_missing_fonts args.path_to_ghostscript args.gs_quiet fi fo
|
||||
| Some (BookmarksOpenToLevel n) ->
|
||||
let pdf = get_single_pdf args.op false in
|
||||
write_pdf false (bookmarks_open_to_level n pdf)
|
||||
|
116
cpdffont.ml
116
cpdffont.ml
@@ -1,3 +1,5 @@
|
||||
open Pdfutil
|
||||
|
||||
(* Embed missing fonts with Ghostscript. *)
|
||||
let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
|
||||
if path_to_ghostscript = "" then begin
|
||||
@@ -12,3 +14,117 @@ let embed_missing_fonts path_to_ghostscript gs_quiet fi fo =
|
||||
match Sys.command gscall with
|
||||
| 0 -> exit 0
|
||||
| _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
|
||||
|
||||
(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *)
|
||||
let copy_font frompdf fontname fontpage range pdf =
|
||||
match Pdf.renumber_pdfs [frompdf; pdf] with
|
||||
| [] | [_] | _::_::_::_ -> assert false
|
||||
| [frompdf; pdf] ->
|
||||
(* 1. Get fontpage *)
|
||||
let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in
|
||||
let frompdf_page =
|
||||
try select fontpage frompdf_pages with
|
||||
Not_found -> failwith "copy_font: Page not found in input pdf"
|
||||
in
|
||||
(* 2. Extract font *)
|
||||
let fonts =
|
||||
match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with
|
||||
| Some f -> f
|
||||
| None -> failwith "copy_font: font not found"
|
||||
in
|
||||
let fromfont =
|
||||
match Pdf.lookup_direct frompdf fontname fonts with
|
||||
| Some f -> f
|
||||
| None -> failwith "copy_font: font not found"
|
||||
in
|
||||
let basefontname =
|
||||
match Pdf.lookup_direct frompdf "/BaseFont" fromfont with
|
||||
| Some (Pdf.Name n) -> n
|
||||
| _ -> "/CopyFontAddedNoName"
|
||||
in
|
||||
(* 3. Get all objects forming font (except main /Font one) *)
|
||||
let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in
|
||||
(* 4. Copy them to from frompdf to pdf. *)
|
||||
iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers;
|
||||
(* 5. Get pages from pdf *)
|
||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||
(* 6. Add the font to pages in range *)
|
||||
let pages' =
|
||||
map
|
||||
(function (page, pagenum) ->
|
||||
if mem pagenum range then
|
||||
let font =
|
||||
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
||||
| Some f -> f
|
||||
| None -> Pdf.Dictionary []
|
||||
in
|
||||
let font' =
|
||||
match font with
|
||||
| (Pdf.Dictionary _) as d ->
|
||||
Pdf.add_dict_entry d basefontname fromfont
|
||||
| _ -> failwith "copy_font: error"
|
||||
in
|
||||
let resources' =
|
||||
Pdf.add_dict_entry page.Pdfpage.resources "/Font" font'
|
||||
in
|
||||
{page with
|
||||
Pdfpage.resources = resources'}
|
||||
else page)
|
||||
(combine pdf_pages (indx pdf_pages));
|
||||
in
|
||||
(* 7. Put the pages back into the pdf, and return *)
|
||||
let pdf, root = Pdfpage.add_pagetree pages' pdf in
|
||||
Pdfpage.add_root root [] pdf
|
||||
|
||||
(* Missing Fonts *)
|
||||
let is_missing pdf dict =
|
||||
match Pdf.lookup_direct pdf "/FontDescriptor" dict with
|
||||
| None -> true
|
||||
| Some d ->
|
||||
match Pdf.lookup_direct pdf "/FontFile" d with
|
||||
| Some _ -> false
|
||||
| None ->
|
||||
match Pdf.lookup_direct pdf "/FontFile2" d with
|
||||
| Some _ -> false
|
||||
| None ->
|
||||
match Pdf.lookup_direct pdf "/FontFile3" d with
|
||||
| Some _ -> false
|
||||
| None -> true
|
||||
|
||||
let missing_font pdf page (name, dict) =
|
||||
if is_missing pdf dict then
|
||||
let subtype =
|
||||
match Pdf.lookup_direct pdf "/Subtype" dict with
|
||||
| Some (Pdf.Name n) -> n
|
||||
| _ -> ""
|
||||
and basefont =
|
||||
match Pdf.lookup_direct pdf "/BaseFont" dict with
|
||||
| Some (Pdf.Name n) -> n
|
||||
| _ -> ""
|
||||
and encoding =
|
||||
match Pdf.lookup_direct pdf "/Encoding" dict with
|
||||
| Some (Pdf.Name n) -> n
|
||||
| _ -> ""
|
||||
in
|
||||
if Pdftext.standard_font_of_name basefont <> None then () else
|
||||
Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding
|
||||
|
||||
let missing_fonts pdf range =
|
||||
Cpdf.iter_pages
|
||||
(fun num page ->
|
||||
match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with
|
||||
| Some (Pdf.Dictionary fontdict) ->
|
||||
(* Extract descendant fonts *)
|
||||
let name_dict_pairs =
|
||||
flatten
|
||||
(map
|
||||
(fun (name, dict) ->
|
||||
match Pdf.lookup_direct pdf "/DescendantFonts" dict with
|
||||
| Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts
|
||||
| _ -> [(name, dict)])
|
||||
fontdict)
|
||||
in
|
||||
iter (missing_font pdf num) name_dict_pairs
|
||||
| _ -> ())
|
||||
pdf
|
||||
range
|
||||
|
@@ -1 +1,5 @@
|
||||
val embed_missing_fonts : string -> bool -> string -> string -> unit
|
||||
|
||||
val copy_font : Pdf.t -> string -> int -> int list -> Pdf.t -> Pdf.t
|
||||
|
||||
val missing_fonts : Pdf.t -> int list -> unit
|
||||
|
Reference in New Issue
Block a user