This commit is contained in:
John Whitington 2021-12-06 15:46:52 -08:00
parent 10fc61f98a
commit 9f776cb6c6
4 changed files with 99 additions and 90 deletions

View File

@ -1,7 +1,7 @@
# Build the cpdf command line tools and top level # Build the cpdf command line tools and top level
MODS = cpdfyojson cpdfxmlm \ MODS = cpdfyojson cpdfxmlm \
cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \ cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
cpdfpagespec cpdfposition cpdf cpdffont cpdftype cpdfcommand cpdfpagespec cpdfposition cpdf cpdffont cpdftype cpdftoc cpdfcommand
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml

View File

@ -2959,94 +2959,6 @@ let typeset text =
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
Pdfpage.add_root pageroot [] pdf Pdfpage.add_root pageroot [] pdf
(* FIXME: Calculate margins based on page size (+ cropbox!) *)
let rec split_toc_title a = function
| '\\'::'n'::r -> rev a :: split_toc_title [] r
| x::xs -> split_toc_title (x::a) xs
| [] -> [rev a]
let typeset_table_of_contents ~font pdf =
let marks = Pdfmarks.read_bookmarks pdf in
if marks = [] then (Printf.eprintf "No bookmarks, not making table of contents\n%!"; pdf) else
let f = (Pdftext.StandardFont (font, Pdftext.WinAnsiEncoding), args.fontsize) in
let big = (Pdftext.StandardFont (font, Pdftext.WinAnsiEncoding), args.fontsize *. 2.) in
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
let firstpage_papersize, pmaxx, pmaxy =
let width, height, xmax, ymax =
match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with
xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin, xmax, ymax
in
Pdfpaper.make Pdfunits.PdfPoint width height, xmax, ymax
in
let firstpage_cropbox =
match Pdf.lookup_direct pdf "/CropBox" firstpage.Pdfpage.rest with
| Some r -> Some (Pdf.parse_rectangle r)
| None -> None
in
let labels = Pdfpagelabels.read pdf in
let lines =
let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
map
(fun mark ->
let label =
let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in
try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum
in
[Cpdftype.BeginDest mark.Pdfmarks.target;
Cpdftype.HGlue {Cpdftype.glen = float mark.Pdfmarks.level *. args.fontsize *. 2.; Cpdftype.gstretch = 0.};
Cpdftype.Text (explode (of_pdfdocencoding f mark.Pdfmarks.text));
Cpdftype.HGlue {Cpdftype.glen = 100.; Cpdftype.gstretch = 0.};
Cpdftype.Text (explode (of_pdfdocencoding f label));
(*Cpdftype.Text [' '];
Cpdftype.Text (explode (of_pdfdocencoding f label));*)
Cpdftype.EndDest;
Cpdftype.NewLine])
(Pdfmarks.read_bookmarks pdf)
in
let toc_pages =
let title =
flatten
(map
(fun l -> [Cpdftype.Text l; Cpdftype.NewLine])
(split_toc_title [] (explode args.toc_title)))
in
let lm, rm, tm, bm =
match firstpage_cropbox with
| None -> (50., 50., 50., 50.)
| Some (cminx, cminy, cmaxx, cmaxy) ->
(cminx +. 50., (pmaxx -. cmaxx) +. 50., cminy +. 50., (pmaxy -. cmaxy) +. 50.)
in
Cpdftype.typeset lm rm tm bm firstpage_papersize pdf
([Cpdftype.Font big] @ title @
[Cpdftype.VGlue {glen = args.fontsize *. 2.; gstretch = 0.};
Cpdftype.Font f] @ flatten lines)
in
let toc_pages =
match firstpage_cropbox with
| Some (a, b, c, d) ->
let rect =
Pdf.Array [Pdf.Real a; Pdf.Real b; Pdf.Real c; Pdf.Real d]
in
map
(fun p -> {p with Pdfpage.rest = Pdf.add_dict_entry p.Pdfpage.rest "/CropBox" rect})
toc_pages
| None -> toc_pages
in
let original_pages = Pdfpage.pages_of_pagetree pdf in
let toc_pages_len = length toc_pages in
let changes = map (fun n -> (n, n + toc_pages_len)) (indx original_pages) in
let pdf = Pdfpage.change_pages ~changes true pdf (toc_pages @ original_pages) in
let label =
{Pdfpagelabels.labelstyle = NoLabelPrefixOnly;
Pdfpagelabels.labelprefix = None;
Pdfpagelabels.startpage = 1;
Pdfpagelabels.startvalue = 1}
in
let labels' = label::map (fun l -> {l with Pdfpagelabels.startpage = l.Pdfpagelabels.startpage + toc_pages_len}) labels in
Pdfpagelabels.write pdf labels';
pdf
(* Main function *) (* Main function *)
let go () = let go () =
match args.op with match args.op with
@ -3972,7 +3884,7 @@ let go () =
let font = let font =
match args.font with StandardFont f -> f | _ -> error "TOC requires standard font only" match args.font with StandardFont f -> f | _ -> error "TOC requires standard font only"
in in
let pdf = typeset_table_of_contents ~font pdf in let pdf = Cpdftoc.typeset_table_of_contents ~font ~fontsize:args.fontsize ~title:args.toc_title pdf in
write_pdf false pdf write_pdf false pdf
| Some (Typeset filename) -> | Some (Typeset filename) ->
let text = Pdfio.bytes_of_input_channel (open_in filename) in let text = Pdfio.bytes_of_input_channel (open_in filename) in

96
cpdftoc.ml Normal file
View File

@ -0,0 +1,96 @@
open Pdfutil
let rec split_toc_title a = function
| '\\'::'n'::r -> rev a :: split_toc_title [] r
| x::xs -> split_toc_title (x::a) xs
| [] -> [rev a]
let of_utf8 (f, fontsize) t =
Pdftext.codepoints_of_utf8 t
|> option_map (Pdftext.charcode_extractor_of_font_real f)
|> map char_of_int
|> implode
let of_pdfdocencoding (f, fontsize) t =
of_utf8 (f, fontsize) (Pdftext.utf8_of_pdfdocstring t)
let typeset_table_of_contents ~font ~fontsize ~title pdf =
let marks = Pdfmarks.read_bookmarks pdf in
if marks = [] then (Printf.eprintf "No bookmarks, not making table of contents\n%!"; pdf) else
let f = (Pdftext.StandardFont (font, Pdftext.WinAnsiEncoding), fontsize) in
let big = (Pdftext.StandardFont (font, Pdftext.WinAnsiEncoding), fontsize *. 2.) in
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
let firstpage_papersize, pmaxx, pmaxy =
let width, height, xmax, ymax =
match Pdf.parse_rectangle firstpage.Pdfpage.mediabox with
xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin, xmax, ymax
in
Pdfpaper.make Pdfunits.PdfPoint width height, xmax, ymax
in
let firstpage_cropbox =
match Pdf.lookup_direct pdf "/CropBox" firstpage.Pdfpage.rest with
| Some r -> Some (Pdf.parse_rectangle r)
| None -> None
in
let labels = Pdfpagelabels.read pdf in
let lines =
let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
map
(fun mark ->
let label =
let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in
try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum
in
[Cpdftype.BeginDest mark.Pdfmarks.target;
Cpdftype.HGlue {Cpdftype.glen = float mark.Pdfmarks.level *. fontsize *. 2.; Cpdftype.gstretch = 0.};
Cpdftype.Text (explode (of_pdfdocencoding f mark.Pdfmarks.text));
Cpdftype.HGlue {Cpdftype.glen = 100.; Cpdftype.gstretch = 0.};
Cpdftype.Text (explode (of_pdfdocencoding f label));
Cpdftype.EndDest;
Cpdftype.NewLine])
(Pdfmarks.read_bookmarks pdf)
in
let toc_pages =
let title =
flatten
(map
(fun l -> [Cpdftype.Text l; Cpdftype.NewLine])
(split_toc_title [] (explode title)))
in
let lm, rm, tm, bm =
match firstpage_cropbox with
| None -> (50., 50., 50., 50.)
| Some (cminx, cminy, cmaxx, cmaxy) ->
(cminx +. 50., (pmaxx -. cmaxx) +. 50., cminy +. 50., (pmaxy -. cmaxy) +. 50.)
in
Cpdftype.typeset lm rm tm bm firstpage_papersize pdf
([Cpdftype.Font big] @ title @
[Cpdftype.VGlue {glen = fontsize *. 2.; gstretch = 0.};
Cpdftype.Font f] @ flatten lines)
in
let toc_pages =
match firstpage_cropbox with
| Some (a, b, c, d) ->
let rect =
Pdf.Array [Pdf.Real a; Pdf.Real b; Pdf.Real c; Pdf.Real d]
in
map
(fun p -> {p with Pdfpage.rest = Pdf.add_dict_entry p.Pdfpage.rest "/CropBox" rect})
toc_pages
| None -> toc_pages
in
let original_pages = Pdfpage.pages_of_pagetree pdf in
let toc_pages_len = length toc_pages in
let changes = map (fun n -> (n, n + toc_pages_len)) (indx original_pages) in
let pdf = Pdfpage.change_pages ~changes true pdf (toc_pages @ original_pages) in
let label =
{Pdfpagelabels.labelstyle = NoLabelPrefixOnly;
Pdfpagelabels.labelprefix = None;
Pdfpagelabels.startpage = 1;
Pdfpagelabels.startvalue = 1}
in
let labels' = label::map (fun l -> {l with Pdfpagelabels.startpage = l.Pdfpagelabels.startpage + toc_pages_len}) labels in
Pdfpagelabels.write pdf labels';
pdf

1
cpdftoc.mli Normal file
View File

@ -0,0 +1 @@
val typeset_table_of_contents : font:Pdftext.standard_font -> fontsize:float -> title:string -> Pdf.t -> Pdf.t