From 9516784ece47fbcb719d875f76bc6b611a540843 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 6 Dec 2021 15:55:46 -0800 Subject: [PATCH] more --- Makefile | 2 +- cpdfcommand.ml | 46 ++++------------------------------------------ cpdftexttopdf.ml | 39 +++++++++++++++++++++++++++++++++++++++ cpdftexttopdf.mli | 1 + cpdftoc.ml | 1 - 5 files changed, 45 insertions(+), 44 deletions(-) create mode 100644 cpdftexttopdf.ml create mode 100644 cpdftexttopdf.mli diff --git a/Makefile b/Makefile index b340916..f9cd91c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Build the cpdf command line tools and top level MODS = cpdfyojson cpdfxmlm \ cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \ - cpdfpagespec cpdfposition cpdf cpdffont cpdftype cpdftoc cpdfcommand + cpdfpagespec cpdfposition cpdf cpdffont cpdftype cpdftexttopdf cpdftoc cpdfcommand SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml diff --git a/cpdfcommand.ml b/cpdfcommand.ml index d77a831..a40aeb8 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -2918,47 +2918,6 @@ let collate (names, pdfs, ranges) = done; split3 (rev !nis) -let of_utf8 (f, fontsize) t = - Pdftext.codepoints_of_utf8 t - |> option_map (Pdftext.charcode_extractor_of_font_real f) - |> map char_of_int - |> implode - -let of_pdfdocencoding (f, fontsize) t = - of_utf8 (f, fontsize) (Pdftext.utf8_of_pdfdocstring t) - -let rec of_utf8_with_newlines t = - let items = ref [] in - let buf = Buffer.create 256 in - String.iter - (function - | '\n' -> - let c = Buffer.contents buf in - if c <> "" then items := Cpdftype.Text (explode c)::!items; - items := Cpdftype.NewLine::!items; - Buffer.clear buf - | x -> - Buffer.add_char buf x) - t; - (* Do last one *) - let c = Buffer.contents buf in - if c <> "" then items := Text (explode c)::!items; - rev !items - -(* FIXME margins, hyphenation of too-long words, efficiency *) -let typeset text = - let pdf = Pdf.empty () in - let f = - (begin match args.font with StandardFont sf -> Pdftext.StandardFont (sf, Pdftext.WinAnsiEncoding) | _ -> failwith "typeset bad font" end, - args.fontsize) - in - let pages = - Cpdftype.typeset - 20. 20. 20. 20. Pdfpaper.a4 pdf ([Cpdftype.Font f] @ of_utf8_with_newlines (string_of_bytes text)) - in - let pdf, pageroot = Pdfpage.add_pagetree pages pdf in - Pdfpage.add_root pageroot [] pdf - (* Main function *) let go () = match args.op with @@ -3888,7 +3847,10 @@ let go () = write_pdf false pdf | Some (Typeset filename) -> let text = Pdfio.bytes_of_input_channel (open_in filename) in - let pdf = typeset text in + let font = + match args.font with StandardFont f -> f | _ -> error "text to PDF: not a standard font" + in + let pdf = Cpdftexttopdf.typeset ~font ~fontsize:args.fontsize text in write_pdf false pdf (* Advise the user if a combination of command line flags makes little sense, diff --git a/cpdftexttopdf.ml b/cpdftexttopdf.ml new file mode 100644 index 0000000..020795b --- /dev/null +++ b/cpdftexttopdf.ml @@ -0,0 +1,39 @@ +open Pdfutil + +let of_utf8 (f, fontsize) t = + Pdftext.codepoints_of_utf8 t + |> option_map (Pdftext.charcode_extractor_of_font_real f) + |> map char_of_int + |> implode + +let of_pdfdocencoding (f, fontsize) t = + of_utf8 (f, fontsize) (Pdftext.utf8_of_pdfdocstring t) + +let rec of_utf8_with_newlines t = + let items = ref [] in + let buf = Buffer.create 256 in + String.iter + (function + | '\n' -> + let c = Buffer.contents buf in + if c <> "" then items := Cpdftype.Text (explode c)::!items; + items := Cpdftype.NewLine::!items; + Buffer.clear buf + | x -> + Buffer.add_char buf x) + t; + (* Do last one *) + let c = Buffer.contents buf in + if c <> "" then items := Text (explode c)::!items; + rev !items + +(* FIXME margins, hyphenation of too-long words, efficiency *) +let typeset ~font ~fontsize text = + let pdf = Pdf.empty () in + let f = (Pdftext.StandardFont (font, Pdftext.WinAnsiEncoding), fontsize) in + let pages = + Cpdftype.typeset + 20. 20. 20. 20. Pdfpaper.a4 pdf ([Cpdftype.Font f] @ of_utf8_with_newlines (Pdfio.string_of_bytes text)) + in + let pdf, pageroot = Pdfpage.add_pagetree pages pdf in + Pdfpage.add_root pageroot [] pdf diff --git a/cpdftexttopdf.mli b/cpdftexttopdf.mli new file mode 100644 index 0000000..5deb16a --- /dev/null +++ b/cpdftexttopdf.mli @@ -0,0 +1 @@ +val typeset : font:Pdftext.standard_font -> fontsize:float -> Pdfio.bytes -> Pdf.t diff --git a/cpdftoc.ml b/cpdftoc.ml index bfcc6db..0fa4f94 100644 --- a/cpdftoc.ml +++ b/cpdftoc.ml @@ -93,4 +93,3 @@ let typeset_table_of_contents ~font ~fontsize ~title pdf = let labels' = label::map (fun l -> {l with Pdfpagelabels.startpage = l.Pdfpagelabels.startpage + toc_pages_len}) labels in Pdfpagelabels.write pdf labels'; pdf -