mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	Expand font extraction to xobject fonts
This commit is contained in:
		
							
								
								
									
										65
									
								
								cpdffont.ml
									
									
									
									
									
								
							
							
						
						
									
										65
									
								
								cpdffont.ml
									
									
									
									
									
								
							| @@ -208,33 +208,46 @@ let print_font_table pdf fontname pagenumber = | |||||||
|             done |             done | ||||||
|     | _ -> failwith "addtext: font not found for width" |     | _ -> failwith "addtext: font not found for width" | ||||||
|  |  | ||||||
|  | let font_from_name pdf fontname pagenumber = | ||||||
|  |   try | ||||||
|  |     let resources = ref (select pagenumber (Pdfpage.pages_of_pagetree pdf)).Pdfpage.resources in | ||||||
|  |     let chain = ref (tl (String.split_on_char '/' fontname)) in | ||||||
|  |     let font = ref Pdf.Null in | ||||||
|  |       while !chain <> [] do | ||||||
|  |         match !chain with | ||||||
|  |         | [f] -> | ||||||
|  |             font := unopt (Pdf.lookup_chain pdf !resources ["/Font"; ("/" ^ f)]); | ||||||
|  |             chain := [] | ||||||
|  |         | x::xs -> | ||||||
|  |             resources := unopt (Pdf.lookup_chain pdf !resources ["/XObject"; "/" ^ x; "/Resources"]); | ||||||
|  |             chain := xs | ||||||
|  |         | [] -> () | ||||||
|  |       done; | ||||||
|  |       !font | ||||||
|  |   with | ||||||
|  |     _ -> Pdfe.log (Printf.sprintf "Not found: font %s on page %i\n" fontname pagenumber); Pdf.Null | ||||||
|  |  | ||||||
| let extract_fontfile pagenumber fontname filename pdf = | let extract_fontfile pagenumber fontname filename pdf = | ||||||
|   let resources = (select pagenumber (Pdfpage.pages_of_pagetree pdf)).Pdfpage.resources in |   match Pdftext.read_font pdf (font_from_name pdf fontname pagenumber) with | ||||||
|     match Pdf.lookup_direct pdf "/Font" resources with |   | Pdftext.CIDKeyedFont (_, {Pdftext.cid_fontdescriptor = {Pdftext.fontfile = Some fontfile}}, _) | ||||||
|     | None -> failwith "extract_fontfile: font not found" |   | Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.fontfile = Some fontfile}} -> | ||||||
|     | Some fonts -> |       begin let objnum = | ||||||
|         let fontobj = Pdf.lookup_fail ("no font " ^ fontname) pdf fontname fonts in |         match fontfile with | ||||||
|           let font = Pdftext.read_font pdf fontobj in |         | Pdftext.FontFile i | Pdftext.FontFile2 i | Pdftext.FontFile3 i -> i | ||||||
|             match font with |       in | ||||||
|             | Pdftext.CIDKeyedFont (_, {Pdftext.cid_fontdescriptor = {Pdftext.fontfile = Some fontfile}}, _) |         match Pdf.lookup_obj pdf objnum with | ||||||
|             | Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.fontfile = Some fontfile}} -> |         | Pdf.Stream s as obj -> | ||||||
|                 begin let objnum = |             Pdfcodec.decode_pdfstream pdf obj; | ||||||
|                   match fontfile with |             begin match s with | ||||||
|                   | Pdftext.FontFile i | Pdftext.FontFile2 i | Pdftext.FontFile3 i -> i |             | {contents = (_, Pdf.Got bytes)} -> | ||||||
|                 in |                  let fh = open_out_bin filename in | ||||||
|                   match Pdf.lookup_obj pdf objnum with |                    for x = 0 to bytes_size bytes - 1 do output_byte fh (bget bytes x) done; | ||||||
|                   | Pdf.Stream s as obj -> |                    close_out fh | ||||||
|                       Pdfcodec.decode_pdfstream pdf obj; |             | _ -> failwith "extract_fontfile" | ||||||
|                       begin match s with |             end | ||||||
|                       | {contents = (_, Pdf.Got bytes)} -> |         | _ -> failwith "extract_fontfile" | ||||||
|                            let fh = open_out_bin filename in |       end | ||||||
|                              for x = 0 to bytes_size bytes - 1 do output_byte fh (bget bytes x) done; |   | _ -> failwith "unsupported or unfound font" | ||||||
|                              close_out fh |  | ||||||
|                       | _ -> failwith "extract_fontfile" |  | ||||||
|                       end |  | ||||||
|                   | _ -> failwith "extract_fontfile" |  | ||||||
|                 end |  | ||||||
|             | _ -> failwith "unsupported or unfound font" |  | ||||||
|  |  | ||||||
| (* Remove Embedded fonts. This is done by removing the Font Descriptor. *) | (* Remove Embedded fonts. This is done by removing the Font Descriptor. *) | ||||||
| let remove_fontdescriptor pdf = function | let remove_fontdescriptor pdf = function | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user