diff --git a/Makefile b/Makefile index fa8e1f3..11f8fbe 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Build the cpdf command line tools and top level MODS = cpdfyojson cpdfxmlm \ cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \ - cpdfpagespec cpdfposition cpdffont cpdf cpdfcommand + cpdfpagespec cpdfposition cpdf cpdffont cpdfcommand SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml diff --git a/cpdfcommand.ml b/cpdfcommand.ml index ab6e62d..70fd5b2 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -2265,21 +2265,6 @@ let filesize name = with _ -> 0 -(* Embed missing fonts with Ghostscript. *) -let embed_missing_fonts fi fo = - if args.path_to_ghostscript = "" then begin - Printf.eprintf "Please supply path to gs with -gs\n%!"; - exit 2 - end; - let gscall = - args.path_to_ghostscript ^ - " -dNOPAUSE " ^ (if args.gs_quiet then "-dQUIET" else "") ^ " -sDEVICE=pdfwrite -sOUTPUTFILE=" ^ Filename.quote fo ^ - " -dBATCH " ^ Filename.quote fi - in - match Sys.command gscall with - | 0 -> exit 0 - | _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2 - (* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF * cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *) let mend_pdf_file_with_ghostscript filename = @@ -2780,68 +2765,6 @@ let split_pdf enc 0 original_filename squeeze spec pdf (splitinto chunksize (indx pdf_pages)) pdf_pages - -(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *) -let copy_font frompdf fontname fontpage range pdf = - match Pdf.renumber_pdfs [frompdf; pdf] with - | [] | [_] | _::_::_::_ -> assert false - | [frompdf; pdf] -> - (* 1. Get fontpage *) - let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in - let frompdf_page = - try select fontpage frompdf_pages with - Not_found -> failwith "copy_font: Page not found in input pdf" - in - (* 2. Extract font *) - let fonts = - match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with - | Some f -> f - | None -> failwith "copy_font: font not found" - in - let fromfont = - match Pdf.lookup_direct frompdf fontname fonts with - | Some f -> f - | None -> failwith "copy_font: font not found" - in - let basefontname = - match Pdf.lookup_direct frompdf "/BaseFont" fromfont with - | Some (Pdf.Name n) -> n - | _ -> "/CopyFontAddedNoName" - in - (* 3. Get all objects forming font (except main /Font one) *) - let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in - (* 4. Copy them to from frompdf to pdf. *) - iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers; - (* 5. Get pages from pdf *) - let pdf_pages = Pdfpage.pages_of_pagetree pdf in - (* 6. Add the font to pages in range *) - let pages' = - map - (function (page, pagenum) -> - if mem pagenum range then - let font = - match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with - | Some f -> f - | None -> Pdf.Dictionary [] - in - let font' = - match font with - | (Pdf.Dictionary _) as d -> - Pdf.add_dict_entry d basefontname fromfont - | _ -> failwith "copy_font: error" - in - let resources' = - Pdf.add_dict_entry page.Pdfpage.resources "/Font" font' - in - {page with - Pdfpage.resources = resources'} - else page) - (combine pdf_pages (indx pdf_pages)); - in - (* 7. Put the pages back into the pdf, and return *) - let pdf, root = Pdfpage.add_pagetree pages' pdf in - Pdfpage.add_root root [] pdf - (* Extract Images. *) let pnm_to_channel_24 channel w h s = let white () = output_char channel ' ' @@ -2989,59 +2912,7 @@ let copy_cropbox_to_mediabox pdf range = pdf range -(* Missing Fonts *) -let is_missing pdf dict = - match Pdf.lookup_direct pdf "/FontDescriptor" dict with - | None -> true - | Some d -> - match Pdf.lookup_direct pdf "/FontFile" d with - | Some _ -> false - | None -> - match Pdf.lookup_direct pdf "/FontFile2" d with - | Some _ -> false - | None -> - match Pdf.lookup_direct pdf "/FontFile3" d with - | Some _ -> false - | None -> true - -let missing_font pdf page (name, dict) = - if is_missing pdf dict then - let subtype = - match Pdf.lookup_direct pdf "/Subtype" dict with - | Some (Pdf.Name n) -> n - | _ -> "" - and basefont = - match Pdf.lookup_direct pdf "/BaseFont" dict with - | Some (Pdf.Name n) -> n - | _ -> "" - and encoding = - match Pdf.lookup_direct pdf "/Encoding" dict with - | Some (Pdf.Name n) -> n - | _ -> "" - in - if Pdftext.standard_font_of_name basefont <> None then () else - Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding - -let missing_fonts pdf range = - Cpdf.iter_pages - (fun num page -> - match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with - | Some (Pdf.Dictionary fontdict) -> - (* Extract descendant fonts *) - let name_dict_pairs = - flatten - (map - (fun (name, dict) -> - match Pdf.lookup_direct pdf "/DescendantFonts" dict with - | Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts - | _ -> [(name, dict)]) - fontdict) - in - iter (missing_font pdf num) name_dict_pairs - | _ -> ()) - pdf - range (* copy the contents of the box f to the box t. If mediabox_if_missing is set, the contents of the mediabox will be used if the from fox is not available. If @@ -3475,7 +3346,7 @@ let go () = | Some x -> x | None -> failwith "copy_font: no font name given" in - let outpdf = copy_font frompdf copyfontname args.copyfontpage range pdf in + let outpdf = Cpdffont.copy_font frompdf copyfontname args.copyfontpage range pdf in write_pdf true outpdf | _ -> error "copyfont: bad command line" end @@ -4168,7 +4039,7 @@ let go () = | Some MissingFonts -> let pdf = get_single_pdf args.op true in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - missing_fonts pdf range + Cpdffont.missing_fonts pdf range | Some ExtractText -> let pdf = get_single_pdf args.op true in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in @@ -4222,7 +4093,7 @@ let go () = File fo -> fo | _ -> error "Output method not supported for -embed-missing-fonts" in - embed_missing_fonts fi fo + Cpdffont.embed_missing_fonts args.path_to_ghostscript args.gs_quiet fi fo | Some (BookmarksOpenToLevel n) -> let pdf = get_single_pdf args.op false in write_pdf false (bookmarks_open_to_level n pdf) diff --git a/cpdffont.ml b/cpdffont.ml index 3c0e8e4..0f8de49 100644 --- a/cpdffont.ml +++ b/cpdffont.ml @@ -1,3 +1,5 @@ +open Pdfutil + (* Embed missing fonts with Ghostscript. *) let embed_missing_fonts path_to_ghostscript gs_quiet fi fo = if path_to_ghostscript = "" then begin @@ -12,3 +14,117 @@ let embed_missing_fonts path_to_ghostscript gs_quiet fi fo = match Sys.command gscall with | 0 -> exit 0 | _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2 + +(* Copy a font from [frompdf] with name [fontname] on page [fontpage] to [pdf] on all pages in [range] *) +let copy_font frompdf fontname fontpage range pdf = + match Pdf.renumber_pdfs [frompdf; pdf] with + | [] | [_] | _::_::_::_ -> assert false + | [frompdf; pdf] -> + (* 1. Get fontpage *) + let frompdf_pages = Pdfpage.pages_of_pagetree frompdf in + let frompdf_page = + try select fontpage frompdf_pages with + Not_found -> failwith "copy_font: Page not found in input pdf" + in + (* 2. Extract font *) + let fonts = + match Pdf.lookup_direct frompdf "/Font" frompdf_page.Pdfpage.resources with + | Some f -> f + | None -> failwith "copy_font: font not found" + in + let fromfont = + match Pdf.lookup_direct frompdf fontname fonts with + | Some f -> f + | None -> failwith "copy_font: font not found" + in + let basefontname = + match Pdf.lookup_direct frompdf "/BaseFont" fromfont with + | Some (Pdf.Name n) -> n + | _ -> "/CopyFontAddedNoName" + in + (* 3. Get all objects forming font (except main /Font one) *) + let objnumbers = Pdf.objects_referenced [] [] frompdf fromfont in + (* 4. Copy them to from frompdf to pdf. *) + iter (function objnum -> Pdf.addobj_given_num pdf (objnum, Pdf.lookup_obj frompdf objnum)) objnumbers; + (* 5. Get pages from pdf *) + let pdf_pages = Pdfpage.pages_of_pagetree pdf in + (* 6. Add the font to pages in range *) + let pages' = + map + (function (page, pagenum) -> + if mem pagenum range then + let font = + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some f -> f + | None -> Pdf.Dictionary [] + in + let font' = + match font with + | (Pdf.Dictionary _) as d -> + Pdf.add_dict_entry d basefontname fromfont + | _ -> failwith "copy_font: error" + in + let resources' = + Pdf.add_dict_entry page.Pdfpage.resources "/Font" font' + in + {page with + Pdfpage.resources = resources'} + else page) + (combine pdf_pages (indx pdf_pages)); + in + (* 7. Put the pages back into the pdf, and return *) + let pdf, root = Pdfpage.add_pagetree pages' pdf in + Pdfpage.add_root root [] pdf + +(* Missing Fonts *) +let is_missing pdf dict = + match Pdf.lookup_direct pdf "/FontDescriptor" dict with + | None -> true + | Some d -> + match Pdf.lookup_direct pdf "/FontFile" d with + | Some _ -> false + | None -> + match Pdf.lookup_direct pdf "/FontFile2" d with + | Some _ -> false + | None -> + match Pdf.lookup_direct pdf "/FontFile3" d with + | Some _ -> false + | None -> true + +let missing_font pdf page (name, dict) = + if is_missing pdf dict then + let subtype = + match Pdf.lookup_direct pdf "/Subtype" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + and basefont = + match Pdf.lookup_direct pdf "/BaseFont" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + and encoding = + match Pdf.lookup_direct pdf "/Encoding" dict with + | Some (Pdf.Name n) -> n + | _ -> "" + in + if Pdftext.standard_font_of_name basefont <> None then () else + Printf.printf "%i, %s, %s, %s, %s\n" page name subtype basefont encoding + +let missing_fonts pdf range = + Cpdf.iter_pages + (fun num page -> + match Pdf.lookup_direct pdf "/Font" page.Pdfpage.resources with + | Some (Pdf.Dictionary fontdict) -> + (* Extract descendant fonts *) + let name_dict_pairs = + flatten + (map + (fun (name, dict) -> + match Pdf.lookup_direct pdf "/DescendantFonts" dict with + | Some (Pdf.Array desc_fonts) -> map (fun d -> name, d) desc_fonts + | _ -> [(name, dict)]) + fontdict) + in + iter (missing_font pdf num) name_dict_pairs + | _ -> ()) + pdf + range diff --git a/cpdffont.mli b/cpdffont.mli index 499b5cc..8c5b5c8 100644 --- a/cpdffont.mli +++ b/cpdffont.mli @@ -1 +1,5 @@ val embed_missing_fonts : string -> bool -> string -> string -> unit + +val copy_font : Pdf.t -> string -> int -> int list -> Pdf.t -> Pdf.t + +val missing_fonts : Pdf.t -> int list -> unit