mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	more
This commit is contained in:
		
							
								
								
									
										14
									
								
								cpdf.ml
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								cpdf.ml
									
									
									
									
									
								
							| @@ -1125,8 +1125,8 @@ let print_fonts pdf range = | |||||||
|  |  | ||||||
| (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever | (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever | ||||||
|    is in the font (for existing fonts). *) |    is in the font (for existing fonts). *) | ||||||
| let charcodes_of_utf8 pdf font s = | let charcodes_of_utf8 font s = | ||||||
|   let extractor = Pdftext.charcode_extractor_of_font ~debug:false pdf font in |   let extractor = Pdftext.charcode_extractor_of_font_real ~debug:false font in | ||||||
|   let codepoints = Pdftext.codepoints_of_utf8 s in |   let codepoints = Pdftext.codepoints_of_utf8 s in | ||||||
|     let charcodes = |     let charcodes = | ||||||
|       option_map |       option_map | ||||||
| @@ -1141,12 +1141,8 @@ let charcodes_of_utf8 pdf font s = | |||||||
| (* Process codepoints back to UTF8, assuming it came from UTF8 to start with *) | (* Process codepoints back to UTF8, assuming it came from UTF8 to start with *) | ||||||
| let utf8_of_winansi s = | let utf8_of_winansi s = | ||||||
|   let text_extractor = |   let text_extractor = | ||||||
|     Pdftext.text_extractor_of_font |     Pdftext.text_extractor_of_font_real | ||||||
|       (Pdf.empty ()) |       (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding)) | ||||||
|       (Pdf.Dictionary |  | ||||||
|         [("/BaseFont", Pdf.Name "/TimesRoman"); |  | ||||||
|          ("/Subtype", Pdf.Name "/Type1"); |  | ||||||
|          ("/Encoding", Pdf.Name "/WinAnsiEncoding")])  |  | ||||||
|   in |   in | ||||||
|     let codepoints = Pdftext.codepoints_of_text text_extractor s in |     let codepoints = Pdftext.codepoints_of_text text_extractor s in | ||||||
|       Pdftext.utf8_of_codepoints codepoints |       Pdftext.utf8_of_codepoints codepoints | ||||||
| @@ -1601,7 +1597,7 @@ let | |||||||
|             end |             end | ||||||
|         | _ -> failwith "addtext: font dictionary not present" |         | _ -> failwith "addtext: font dictionary not present" | ||||||
|   in |   in | ||||||
|   let text = if raw then text else charcodes_of_utf8 pdf fontpdfobj text in |   let text = if raw then text else charcodes_of_utf8 (Pdftext.read_font pdf fontpdfobj) text in | ||||||
|     let lines = map unescape_string (split_at_newline text) in |     let lines = map unescape_string (split_at_newline text) in | ||||||
|       let pdf = ref pdf in |       let pdf = ref pdf in | ||||||
|         let voffset = |         let voffset = | ||||||
|   | |||||||
| @@ -2919,13 +2919,10 @@ let collate (names, pdfs, ranges) = | |||||||
|     split3 (rev !nis) |     split3 (rev !nis) | ||||||
|  |  | ||||||
| let of_utf8 (f, fontsize) t = | let of_utf8 (f, fontsize) t = | ||||||
|   let pdf = Pdf.empty () in |      Pdftext.codepoints_of_utf8 t | ||||||
|   let fontdict = Pdftext.write_font pdf f in |   |> option_map (Pdftext.charcode_extractor_of_font_real f) | ||||||
|   let extractor = Pdftext.charcode_extractor_of_font pdf (Pdf.Indirect fontdict) in |   |> map char_of_int | ||||||
|        Pdftext.codepoints_of_utf8 t |   |> implode | ||||||
|     |> option_map extractor |  | ||||||
|     |> map char_of_int |  | ||||||
|     |> implode |  | ||||||
|  |  | ||||||
| let of_pdfdocencoding (f, fontsize) t = | let of_pdfdocencoding (f, fontsize) t = | ||||||
|   of_utf8 (f, fontsize) (Pdftext.utf8_of_pdfdocstring t) |   of_utf8 (f, fontsize) (Pdftext.utf8_of_pdfdocstring t) | ||||||
| @@ -2948,9 +2945,13 @@ let rec of_utf8_with_newlines t = | |||||||
|       if c <> "" then items := Text (explode c)::!items; |       if c <> "" then items := Text (explode c)::!items; | ||||||
|     rev !items |     rev !items | ||||||
|  |  | ||||||
|  | (* FIXME margins, hyphenation of too-long words, efficiency *) | ||||||
| let typeset text = | let typeset text = | ||||||
|   let pdf = Pdf.empty () in |   let pdf = Pdf.empty () in | ||||||
|   let f = (Pdftext.StandardFont (Pdftext.Courier, Pdftext.WinAnsiEncoding), 12.) in |   let f =  | ||||||
|  |     (begin match args.font with StandardFont sf -> Pdftext.StandardFont (sf, Pdftext.WinAnsiEncoding) | _ -> failwith "typeset bad font" end, | ||||||
|  |      args.fontsize) | ||||||
|  |   in | ||||||
|   let pages = |   let pages = | ||||||
|     Cpdftype.typeset |     Cpdftype.typeset | ||||||
|       20. 20. 20. 20. Pdfpaper.a4 pdf ([Cpdftype.Font f] @ of_utf8_with_newlines (string_of_bytes text)) |       20. 20. 20. 20. Pdfpaper.a4 pdf ([Cpdftype.Font f] @ of_utf8_with_newlines (string_of_bytes text)) | ||||||
| @@ -2973,13 +2974,22 @@ let typeset_table_of_contents ~font pdf = | |||||||
|       Pdfpaper.make Pdfunits.PdfPoint width height |       Pdfpaper.make Pdfunits.PdfPoint width height | ||||||
|   in |   in | ||||||
|   let lines = |   let lines = | ||||||
|  |     let refnums = Pdf.page_reference_numbers pdf in | ||||||
|  |     let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in | ||||||
|     map |     map | ||||||
|       (fun mark -> |       (fun mark -> | ||||||
|          [Cpdftype.BeginDest mark.Pdfmarks.target; |          let label = | ||||||
|           Cpdftype.HGlue {Cpdftype.glen = float mark.Pdfmarks.level *. args.fontsize *. 2.; Cpdftype.gstretch = 0.}; |            let labels = Pdfpagelabels.read pdf in | ||||||
|           Cpdftype.Text (explode (of_pdfdocencoding f mark.Pdfmarks.text)); |            let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in | ||||||
|           Cpdftype.EndDest; |              try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum | ||||||
|           Cpdftype.NewLine]) |          in | ||||||
|  |            [Cpdftype.BeginDest mark.Pdfmarks.target; | ||||||
|  |             Cpdftype.HGlue {Cpdftype.glen = float mark.Pdfmarks.level *. args.fontsize *. 2.; Cpdftype.gstretch = 0.}; | ||||||
|  |             Cpdftype.Text (explode (of_pdfdocencoding f mark.Pdfmarks.text ^ " " ^ of_pdfdocencoding f label)); | ||||||
|  |             (*Cpdftype.Text [' ']; | ||||||
|  |             Cpdftype.Text (explode (of_pdfdocencoding f label));*) | ||||||
|  |             Cpdftype.EndDest; | ||||||
|  |             Cpdftype.NewLine]) | ||||||
|       (Pdfmarks.read_bookmarks pdf) |       (Pdfmarks.read_bookmarks pdf) | ||||||
|   in |   in | ||||||
|   let toc_pages = |   let toc_pages = | ||||||
|   | |||||||
| @@ -156,7 +156,7 @@ let print_font_table pdf fontname pagenumber = | |||||||
|             | Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.charset = Some cs}} -> Some cs |             | Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.charset = Some cs}} -> Some cs | ||||||
|             | _ -> None |             | _ -> None | ||||||
|           in |           in | ||||||
|           let extractor = Pdftext.text_extractor_of_font pdf font in |           let extractor = Pdftext.text_extractor_of_font_real pdftextfont in | ||||||
|           let unicodedata = Cpdfunicodedata.unicodedata () in |           let unicodedata = Cpdfunicodedata.unicodedata () in | ||||||
|           let unicodetable = Hashtbl.create 16000 in |           let unicodetable = Hashtbl.create 16000 in | ||||||
|            iter |            iter | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								cpdftype.ml
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								cpdftype.ml
									
									
									
									
									
								
							| @@ -1,11 +1,6 @@ | |||||||
| (* A typesetter for cpdf. A list of elements is manipulated zero or more times | (* A typesetter for cpdf. A list of elements is manipulated zero or more times | ||||||
|    to lay it out, paginate it, and so on. It is then typeset to produce a list |    to lay it out, paginate it, and so on. It is then typeset to produce a list | ||||||
|    of pages *) |    of pages *) | ||||||
|  |  | ||||||
| (* FIXME We need to make Pdfstandard14 width calculations much more efficient |  | ||||||
|    by caching so that we are not making a table up for each character! *) |  | ||||||
| (* FIXME We need to reintroduce kerning in Pdfstandard14. *) |  | ||||||
| (* FIXME Fix up charcode / text extractors to take fonts not fontdicts *) |  | ||||||
| open Pdfutil | open Pdfutil | ||||||
|  |  | ||||||
| (* Glue *) | (* Glue *) | ||||||
| @@ -55,8 +50,15 @@ let initial_state () = | |||||||
|    dest = None} |    dest = None} | ||||||
|  |  | ||||||
| let font_widths f fontsize = | let font_widths f fontsize = | ||||||
|   let w = fontsize *. (600. /. 1000.) in |   let stdfont = | ||||||
|     Array.make 256 w |     match f with Pdftext.StandardFont (sf, _) -> sf | _ -> failwith "not a standard font" | ||||||
|  |   in | ||||||
|  |     Array.init | ||||||
|  |       256 | ||||||
|  |       (fun x -> | ||||||
|  |            fontsize | ||||||
|  |         *. float_of_int (Pdfstandard14.textwidth false Pdftext.WinAnsiEncoding stdfont (string_of_char (char_of_int x))) | ||||||
|  |         /. 1000.) | ||||||
|  |  | ||||||
| let width_of_string ws s = | let width_of_string ws s = | ||||||
|   let w = ref 0. in |   let w = ref 0. in | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user