mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-01-20 21:01:41 +01:00
Rehabilitate -extract-font
This commit is contained in:
parent
35bf9f14d7
commit
8a1fdc23a4
1
Changes
1
Changes
@ -8,6 +8,7 @@ o List document and page info in JSON format
|
||||
o List page labels in JSON format
|
||||
o List fonts in JSON format
|
||||
o Identify PDF/A, PDF/X, PDF/E, PDF/VT, PDF/UA
|
||||
o Extract font files from a document
|
||||
|
||||
Extended features:
|
||||
|
||||
|
@ -189,7 +189,7 @@ type op =
|
||||
| ExtractImages
|
||||
| ImageResolution of float
|
||||
| MissingFonts
|
||||
| ExtractFontFile
|
||||
| ExtractFontFile of string
|
||||
| ExtractText
|
||||
| OpenAtPage of string
|
||||
| OpenAtPageFit of string
|
||||
@ -320,7 +320,7 @@ let string_of_op = function
|
||||
| ExtractImages -> "ExtractImages"
|
||||
| ImageResolution _ -> "ImageResolution"
|
||||
| MissingFonts -> "MissingFonts"
|
||||
| ExtractFontFile -> "ExtractFontFile"
|
||||
| ExtractFontFile _ -> "ExtractFontFile"
|
||||
| ExtractText -> "ExtractText"
|
||||
| OpenAtPage _ -> "OpenAtPage"
|
||||
| OpenAtPageFit _ -> "OpenAtPageFit"
|
||||
@ -837,7 +837,7 @@ let banned banlist = function
|
||||
| ShowBoxes | TrimMarks | CreateMetadata | SetMetadataDate _ | SetVersion _
|
||||
| SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _
|
||||
| SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | ReplaceDictEntry _ | PrintDictEntry _ | SetMetadata _
|
||||
| ExtractText | ExtractImages | ExtractFontFile
|
||||
| ExtractText | ExtractImages | ExtractFontFile _
|
||||
| AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce
|
||||
| OCGRename | OCGList | OCGOrderAll | PrintFontEncoding _ | TableOfContents | Typeset _ | Composition _
|
||||
| TextWidth _ | SetAnnotations _ | CopyAnnotations _
|
||||
@ -1871,6 +1871,9 @@ let settextwidth s =
|
||||
let setdraw () =
|
||||
args.op <- Some Draw
|
||||
|
||||
let setextractfontfile s =
|
||||
args.op <- Some (ExtractFontFile s)
|
||||
|
||||
let () = Cpdfdrawcontrol.getfontname := fun () -> args.fontname
|
||||
let () = Cpdfdrawcontrol.getfontsize := fun () -> args.fontsize
|
||||
let () = Cpdfdrawcontrol.setfontname := setfont
|
||||
@ -2668,6 +2671,9 @@ and specs =
|
||||
("-print-font-table-page",
|
||||
Arg.Int setfontpage,
|
||||
" Set page for -print-font-table");
|
||||
("-extract-font",
|
||||
Arg.String setextractfontfile,
|
||||
" Extract a font");
|
||||
("-table-of-contents",
|
||||
Arg.Unit (setop TableOfContents),
|
||||
" Typeset a table of contents from bookmarks");
|
||||
@ -2749,7 +2755,7 @@ and specs =
|
||||
("-debug-stderr-to-stdout", Arg.Unit setstderrtostdout, "");
|
||||
("-stay-on-error", Arg.Unit setstayonerror, "");
|
||||
(* These items are unfinished *)
|
||||
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
|
||||
|
||||
("-extract-text", Arg.Unit (setop ExtractText), "");
|
||||
("-extract-text-font-size", Arg.Float setextracttextfontsize, "");
|
||||
]
|
||||
@ -3439,17 +3445,17 @@ let go () =
|
||||
write_pdf true (Cpdffont.remove_fonts pdf)
|
||||
| _ -> error "remove fonts: bad command line"
|
||||
end
|
||||
| Some ExtractFontFile ->
|
||||
| Some (ExtractFontFile spec) ->
|
||||
begin match args.inputs, args.out with
|
||||
| (_, pagespec, u, o, _, _)::_, _ ->
|
||||
let pdf = get_single_pdf (Some ExtractFontFile) false in
|
||||
let page = args.copyfontpage
|
||||
and name =
|
||||
match args.copyfontname with
|
||||
| Some x -> x
|
||||
| None -> failwith "extract fontfile: no font name given"
|
||||
in
|
||||
Cpdffont.extract_fontfile page name pdf
|
||||
| (_, pagespec, u, o, _, _)::_, File filename ->
|
||||
let pdf = get_single_pdf (Some (ExtractFontFile spec)) false in
|
||||
begin match String.split_on_char ',' spec with
|
||||
| [pnum; name] ->
|
||||
begin try Cpdffont.extract_fontfile (int_of_string pnum) name filename pdf with
|
||||
Failure _ (*"int_of_string"*) -> error "extract font: bad page number"
|
||||
end
|
||||
| _ -> error "extract font: bad specification"
|
||||
end
|
||||
| _ -> error "extract fontfile: bad command line"
|
||||
end
|
||||
| Some CountPages ->
|
||||
|
19
cpdffont.ml
19
cpdffont.ml
@ -199,8 +199,7 @@ let print_font_table pdf fontname pagenumber =
|
||||
done
|
||||
| _ -> failwith "addtext: font not found for width"
|
||||
|
||||
(* Extracts font to font.dat in CWD. *)
|
||||
let extract_fontfile pagenumber fontname pdf =
|
||||
let extract_fontfile pagenumber fontname filename pdf =
|
||||
let resources = (select pagenumber (Pdfpage.pages_of_pagetree pdf)).Pdfpage.resources in
|
||||
match Pdf.lookup_direct pdf "/Font" resources with
|
||||
| None -> failwith "extract_fontfile: font not found"
|
||||
@ -212,32 +211,22 @@ let extract_fontfile pagenumber fontname pdf =
|
||||
| Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.fontfile = Some fontfile}} ->
|
||||
begin let objnum =
|
||||
match fontfile with
|
||||
| Pdftext.FontFile i -> i
|
||||
| Pdftext.FontFile2 i -> i
|
||||
| Pdftext.FontFile3 i -> i
|
||||
| Pdftext.FontFile i | Pdftext.FontFile2 i | Pdftext.FontFile3 i -> i
|
||||
in
|
||||
match Pdf.lookup_obj pdf objnum with
|
||||
| Pdf.Stream s as obj ->
|
||||
Pdfcodec.decode_pdfstream pdf obj;
|
||||
begin match s with
|
||||
| {contents = (_, Pdf.Got bytes)} ->
|
||||
let fh = open_out_bin "font.dat" in
|
||||
let fh = open_out_bin filename in
|
||||
for x = 0 to bytes_size bytes - 1 do output_byte fh (bget bytes x) done;
|
||||
close_out fh;
|
||||
(* Now try to read using Pdfcff module *)
|
||||
(*let font = Pdftruetype.to_type3 pdf font in*)
|
||||
(*let extractor = Pdftext.text_extractor_of_font pdf fontobj in*)
|
||||
(*flprint "glyph names for incodes 0,1,2,3...";
|
||||
iter print_string (Pdftext.glyphnames_of_text extractor "\000\001\002\003\004\005\006\007");
|
||||
flprint "\n";*)
|
||||
()
|
||||
close_out fh
|
||||
| _ -> failwith "extract_fontfile"
|
||||
end
|
||||
| _ -> failwith "extract_fontfile"
|
||||
end
|
||||
| _ -> failwith "unsupported or unfound font"
|
||||
|
||||
|
||||
(* Remove Embedded fonts. This is done by removing the Font Descriptor. *)
|
||||
let remove_fontdescriptor pdf = function
|
||||
| Pdf.Dictionary d as font ->
|
||||
|
@ -23,7 +23,7 @@ val missing_fonts : Pdf.t -> int list -> unit
|
||||
val print_font_table : Pdf.t -> string -> int -> unit
|
||||
|
||||
(** Extract a font file to disk. *)
|
||||
val extract_fontfile : int -> string -> Pdf.t -> unit
|
||||
val extract_fontfile : int -> string -> string -> Pdf.t -> unit
|
||||
|
||||
(** Remove fonts from a document. *)
|
||||
val remove_fonts : Pdf.t -> Pdf.t
|
||||
|
Loading…
Reference in New Issue
Block a user