mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-04-03 05:01:34 +02:00
Expand font extraction to xobject fonts
This commit is contained in:
parent
1d2aab34f4
commit
3bb5019c46
65
cpdffont.ml
65
cpdffont.ml
@ -208,33 +208,46 @@ let print_font_table pdf fontname pagenumber =
|
||||
done
|
||||
| _ -> failwith "addtext: font not found for width"
|
||||
|
||||
let font_from_name pdf fontname pagenumber =
|
||||
try
|
||||
let resources = ref (select pagenumber (Pdfpage.pages_of_pagetree pdf)).Pdfpage.resources in
|
||||
let chain = ref (tl (String.split_on_char '/' fontname)) in
|
||||
let font = ref Pdf.Null in
|
||||
while !chain <> [] do
|
||||
match !chain with
|
||||
| [f] ->
|
||||
font := unopt (Pdf.lookup_chain pdf !resources ["/Font"; ("/" ^ f)]);
|
||||
chain := []
|
||||
| x::xs ->
|
||||
resources := unopt (Pdf.lookup_chain pdf !resources ["/XObject"; "/" ^ x; "/Resources"]);
|
||||
chain := xs
|
||||
| [] -> ()
|
||||
done;
|
||||
!font
|
||||
with
|
||||
_ -> Pdfe.log (Printf.sprintf "Not found: font %s on page %i\n" fontname pagenumber); Pdf.Null
|
||||
|
||||
let extract_fontfile pagenumber fontname filename pdf =
|
||||
let resources = (select pagenumber (Pdfpage.pages_of_pagetree pdf)).Pdfpage.resources in
|
||||
match Pdf.lookup_direct pdf "/Font" resources with
|
||||
| None -> failwith "extract_fontfile: font not found"
|
||||
| Some fonts ->
|
||||
let fontobj = Pdf.lookup_fail ("no font " ^ fontname) pdf fontname fonts in
|
||||
let font = Pdftext.read_font pdf fontobj in
|
||||
match font with
|
||||
| Pdftext.CIDKeyedFont (_, {Pdftext.cid_fontdescriptor = {Pdftext.fontfile = Some fontfile}}, _)
|
||||
| Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.fontfile = Some fontfile}} ->
|
||||
begin let objnum =
|
||||
match fontfile with
|
||||
| Pdftext.FontFile i | Pdftext.FontFile2 i | Pdftext.FontFile3 i -> i
|
||||
in
|
||||
match Pdf.lookup_obj pdf objnum with
|
||||
| Pdf.Stream s as obj ->
|
||||
Pdfcodec.decode_pdfstream pdf obj;
|
||||
begin match s with
|
||||
| {contents = (_, Pdf.Got bytes)} ->
|
||||
let fh = open_out_bin filename in
|
||||
for x = 0 to bytes_size bytes - 1 do output_byte fh (bget bytes x) done;
|
||||
close_out fh
|
||||
| _ -> failwith "extract_fontfile"
|
||||
end
|
||||
| _ -> failwith "extract_fontfile"
|
||||
end
|
||||
| _ -> failwith "unsupported or unfound font"
|
||||
match Pdftext.read_font pdf (font_from_name pdf fontname pagenumber) with
|
||||
| Pdftext.CIDKeyedFont (_, {Pdftext.cid_fontdescriptor = {Pdftext.fontfile = Some fontfile}}, _)
|
||||
| Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.fontfile = Some fontfile}} ->
|
||||
begin let objnum =
|
||||
match fontfile with
|
||||
| Pdftext.FontFile i | Pdftext.FontFile2 i | Pdftext.FontFile3 i -> i
|
||||
in
|
||||
match Pdf.lookup_obj pdf objnum with
|
||||
| Pdf.Stream s as obj ->
|
||||
Pdfcodec.decode_pdfstream pdf obj;
|
||||
begin match s with
|
||||
| {contents = (_, Pdf.Got bytes)} ->
|
||||
let fh = open_out_bin filename in
|
||||
for x = 0 to bytes_size bytes - 1 do output_byte fh (bget bytes x) done;
|
||||
close_out fh
|
||||
| _ -> failwith "extract_fontfile"
|
||||
end
|
||||
| _ -> failwith "extract_fontfile"
|
||||
end
|
||||
| _ -> failwith "unsupported or unfound font"
|
||||
|
||||
(* Remove Embedded fonts. This is done by removing the Font Descriptor. *)
|
||||
let remove_fontdescriptor pdf = function
|
||||
|
Loading…
x
Reference in New Issue
Block a user