Clean up content streams
This commit is contained in:
parent
cb858ec5dd
commit
845511161e
24
cpdfdraw.ml
24
cpdfdraw.ml
|
@ -169,7 +169,6 @@ let process_specials pdf endpage filename bates batespad num page s =
|
||||||
in
|
in
|
||||||
Cpdfaddtext.process_text (res ()).time s pairs
|
Cpdfaddtext.process_text (res ()).time s pairs
|
||||||
|
|
||||||
(* FIXME cache (just for paragraph) *)
|
|
||||||
let font_widths f fontsize =
|
let font_widths f fontsize =
|
||||||
match f with
|
match f with
|
||||||
| Pdftext.StandardFont (sf, encoding) ->
|
| Pdftext.StandardFont (sf, encoding) ->
|
||||||
|
@ -191,9 +190,6 @@ let runs_of_utf8 s =
|
||||||
let triples = option_map (Cpdfembed.get_char fontpack) codepoints in
|
let triples = option_map (Cpdfembed.get_char fontpack) codepoints in
|
||||||
let collated = Cpdfembed.collate_runs triples in
|
let collated = Cpdfembed.collate_runs triples in
|
||||||
let font_widths fontnum font font_size =
|
let font_widths fontnum font font_size =
|
||||||
(* Need to cache font widths here. TODO need to cache futher up too for
|
|
||||||
more speed. Check -typeset speed now we need widths. Or, make width
|
|
||||||
calculation optional? *)
|
|
||||||
match Hashtbl.find_opt widthcache (fontnum, font_size) with
|
match Hashtbl.find_opt widthcache (fontnum, font_size) with
|
||||||
| Some table -> table
|
| Some table -> table
|
||||||
| None ->
|
| None ->
|
||||||
|
@ -290,6 +286,24 @@ let structdata = ref []
|
||||||
|
|
||||||
(* TODO: Tagging in XObjects, move tag state into res () etc. *)
|
(* TODO: Tagging in XObjects, move tag state into res () etc. *)
|
||||||
|
|
||||||
|
let rec remove_tfs prev = function
|
||||||
|
| [] -> []
|
||||||
|
| Pdfops.Op_Tf (f, _)::t when f = prev -> remove_tfs prev t
|
||||||
|
| Pdfops.Op_Tf (f, s) as h::t -> h::remove_tfs f t
|
||||||
|
| h::t -> h::remove_tfs prev t
|
||||||
|
|
||||||
|
let rec merge_adjacent_tjs ops =
|
||||||
|
let merge_tjs l =
|
||||||
|
Pdfops.Op_Tj (String.concat "" (map (function Pdfops.Op_Tj s -> s | _ -> assert false) l))
|
||||||
|
in
|
||||||
|
match cleavewhile (function Pdfops.Op_Tj _ -> true | _ -> false) ops with
|
||||||
|
| [], h::t -> h::merge_adjacent_tjs t
|
||||||
|
| [], [] -> []
|
||||||
|
| l, t -> merge_tjs l::merge_adjacent_tjs t
|
||||||
|
|
||||||
|
let clean_up ops =
|
||||||
|
merge_adjacent_tjs (remove_tfs "" ops)
|
||||||
|
|
||||||
(* TODO: Use Uuseg for proper unicode segmentation. *)
|
(* TODO: Use Uuseg for proper unicode segmentation. *)
|
||||||
let format_paragraph indent j w s =
|
let format_paragraph indent j w s =
|
||||||
let ss = String.split_on_char ' ' s in
|
let ss = String.split_on_char ' ' s in
|
||||||
|
@ -334,7 +348,7 @@ let format_paragraph indent j w s =
|
||||||
end;
|
end;
|
||||||
done;
|
done;
|
||||||
allops =| rev (Pdfops.Op_T'::justify !ops);
|
allops =| rev (Pdfops.Op_T'::justify !ops);
|
||||||
flatten (rev !allops)
|
clean_up (flatten (rev !allops))
|
||||||
|
|
||||||
let current_eltinfo = null_hash ()
|
let current_eltinfo = null_hash ()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue