mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-01-06 22:12:32 +01:00
Truetype cmap matterhorns
This commit is contained in:
parent
9609100383
commit
707578d724
@ -504,6 +504,9 @@ let find_main encoding subset =
|
|||||||
in
|
in
|
||||||
(first, splitinto 224 rest)
|
(first, splitinto 224 rest)
|
||||||
|
|
||||||
|
let collecting_cmaps = ref false
|
||||||
|
let collected_cmaps = ref []
|
||||||
|
|
||||||
let parse ~subset data encoding =
|
let parse ~subset data encoding =
|
||||||
let mk_b byte_offset = bitbytes_of_input (let i = input_of_bytes data in i.seek_in byte_offset; i) in
|
let mk_b byte_offset = bitbytes_of_input (let i = input_of_bytes data in i.seek_in byte_offset; i) in
|
||||||
let b = mk_b 0 in
|
let b = mk_b 0 in
|
||||||
@ -547,7 +550,9 @@ let parse ~subset data encoding =
|
|||||||
in
|
in
|
||||||
let ascent, descent, capheight, xheight, avgwidth =
|
let ascent, descent, capheight, xheight, avgwidth =
|
||||||
match os2 with
|
match os2 with
|
||||||
| None -> raise (Pdf.PDFError "No os/2 table found in truetype font")
|
| None ->
|
||||||
|
if !collecting_cmaps then (0, 0, 0, 0, 0) else
|
||||||
|
raise (Pdf.PDFError "No os/2 table found in truetype font")
|
||||||
| Some (o, l) -> let b = mk_b (i32toi o) in read_os2_table unitsPerEm b (i32toi l)
|
| Some (o, l) -> let b = mk_b (i32toi o) in read_os2_table unitsPerEm b (i32toi l)
|
||||||
in
|
in
|
||||||
let italicangle =
|
let italicangle =
|
||||||
@ -579,6 +584,7 @@ let parse ~subset data encoding =
|
|||||||
let subtable_offset = read_ulong b in
|
let subtable_offset = read_ulong b in
|
||||||
if !dbg then Printf.printf "subtable %i. platform_id = %i, encoding_id = %i, subtable_offset = %li\n"
|
if !dbg then Printf.printf "subtable %i. platform_id = %i, encoding_id = %i, subtable_offset = %li\n"
|
||||||
x platform_id encoding_id subtable_offset;
|
x platform_id encoding_id subtable_offset;
|
||||||
|
collected_cmaps := (platform_id, encoding_id)::!collected_cmaps;
|
||||||
let b = mk_b (i32toi cmapoffset + i32toi subtable_offset) in
|
let b = mk_b (i32toi cmapoffset + i32toi subtable_offset) in
|
||||||
let fmt = read_ushort b in
|
let fmt = read_ushort b in
|
||||||
let lngth = read_ushort b in
|
let lngth = read_ushort b in
|
||||||
@ -684,3 +690,11 @@ let parse ~subset data encoding =
|
|||||||
let parse ~subset data encoding =
|
let parse ~subset data encoding =
|
||||||
try parse ~subset data encoding with
|
try parse ~subset data encoding with
|
||||||
e -> raise (Cpdferror.error ("Failed to parse TrueType font: " ^ Printexc.to_string e))
|
e -> raise (Cpdferror.error ("Failed to parse TrueType font: " ^ Printexc.to_string e))
|
||||||
|
|
||||||
|
(** Return the list of cmaps from a font file (used for PDF/UA verification). *)
|
||||||
|
let cmaps data =
|
||||||
|
set collecting_cmaps;
|
||||||
|
collected_cmaps := [];
|
||||||
|
let _ = try ignore (parse ~subset:[] data Pdftext.WinAnsiEncoding) with e -> () in
|
||||||
|
clear collecting_cmaps;
|
||||||
|
!collected_cmaps
|
||||||
|
@ -27,3 +27,6 @@ type t =
|
|||||||
additional characters in the font. You should supply a subset (a list of
|
additional characters in the font. You should supply a subset (a list of
|
||||||
unicode codepoints whose corresponding glyphs are required). *)
|
unicode codepoints whose corresponding glyphs are required). *)
|
||||||
val parse : subset:int list -> Pdfio.bytes -> Pdftext.encoding -> t list
|
val parse : subset:int list -> Pdfio.bytes -> Pdftext.encoding -> t list
|
||||||
|
|
||||||
|
(** Return the list of cmaps from a font file (used for PDF/UA verification). *)
|
||||||
|
val cmaps : Pdfio.bytes -> (int * int) list
|
||||||
|
96
cpdfua.ml
96
cpdfua.ml
@ -925,19 +925,6 @@ let matterhorn_31_015 _ _ pdf =
|
|||||||
let matterhorn_31_016 _ _ pdf =
|
let matterhorn_31_016 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* A non-symbolic TrueType font is used for rendering, but none of the cmap
|
|
||||||
entries in the embedded font program is a non-symbolic cmap. *)
|
|
||||||
let matterhorn_31_017 _ _ pdf =
|
|
||||||
unimpl ()
|
|
||||||
|
|
||||||
(* A non-symbolic TrueType font is used for rendering, but for at least one
|
|
||||||
glyph to be rendered the glyph cannot be looked up by any of the
|
|
||||||
non-symbolic cmap entries in the embedded font program. *)
|
|
||||||
let matterhorn_31_018 _ _ pdf =
|
|
||||||
unimpl ()
|
|
||||||
|
|
||||||
(* The font dictionary for a non-symbolic TrueType font does not contain an
|
|
||||||
Encoding entry. *)
|
|
||||||
let is_non_symbolic pdf o =
|
let is_non_symbolic pdf o =
|
||||||
match Pdf.lookup_direct pdf "/FontDescriptor" o with
|
match Pdf.lookup_direct pdf "/FontDescriptor" o with
|
||||||
| Some fd ->
|
| Some fd ->
|
||||||
@ -947,6 +934,43 @@ let is_non_symbolic pdf o =
|
|||||||
end
|
end
|
||||||
| None -> true
|
| None -> true
|
||||||
|
|
||||||
|
let truetype_fontfile pdf o =
|
||||||
|
match Pdf.lookup_chain pdf o ["/FontDescriptor"; "/FontFile2"] with
|
||||||
|
| Some (Pdf.Stream s) ->
|
||||||
|
Pdfcodec.decode_pdfstream_until_unknown pdf (Pdf.Stream s);
|
||||||
|
begin match s with
|
||||||
|
| {contents = (_, Pdf.Got bs)} -> Some bs
|
||||||
|
| _ -> None
|
||||||
|
end
|
||||||
|
| _ -> None
|
||||||
|
|
||||||
|
(* A non-symbolic TrueType font is used for rendering, but none of the cmap
|
||||||
|
entries in the embedded font program is a non-symbolic cmap. *)
|
||||||
|
let matterhorn_31_017 _ _ pdf =
|
||||||
|
Pdf.objiter
|
||||||
|
(fun _ o ->
|
||||||
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
| Some (Pdf.Name "/TrueType") ->
|
||||||
|
if not (is_non_symbolic pdf o) then
|
||||||
|
let fontfile = truetype_fontfile pdf o in
|
||||||
|
if fontfile = None then () else
|
||||||
|
let cmaps = Cpdftruetype.cmaps (unopt fontfile) in
|
||||||
|
(*iter (fun (x, y) -> Printf.printf "%i, %i\n" x y) cmaps;*)
|
||||||
|
(* Must all be symbolic *)
|
||||||
|
if (List.for_all (function (1, 8) | (3, 0) -> true | _ -> false) cmaps) then merror ()
|
||||||
|
else
|
||||||
|
()
|
||||||
|
| _ -> ())
|
||||||
|
pdf
|
||||||
|
|
||||||
|
(* A non-symbolic TrueType font is used for rendering, but for at least one
|
||||||
|
glyph to be rendered the glyph cannot be looked up by any of the
|
||||||
|
non-symbolic cmap entries in the embedded font program. *)
|
||||||
|
let matterhorn_31_018 _ _ pdf =
|
||||||
|
unimpl ()
|
||||||
|
|
||||||
|
(* The font dictionary for a non-symbolic TrueType font does not contain an
|
||||||
|
Encoding entry. *)
|
||||||
let matterhorn_31_019 _ _ pdf =
|
let matterhorn_31_019 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
@ -1025,7 +1049,19 @@ let matterhorn_31_022 _ _ pdf =
|
|||||||
TrueType font dictionary but the embedded font program does not contain a
|
TrueType font dictionary but the embedded font program does not contain a
|
||||||
(3,1) Microsoft Unicode cmap. *)
|
(3,1) Microsoft Unicode cmap. *)
|
||||||
let matterhorn_31_023 _ _ pdf =
|
let matterhorn_31_023 _ _ pdf =
|
||||||
unimpl ()
|
Pdf.objiter
|
||||||
|
(fun _ o ->
|
||||||
|
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_chain pdf o ["/Encoding"; "/Differences"] with
|
||||||
|
| Some (Pdf.Name "/TrueType"), Some _ ->
|
||||||
|
if is_non_symbolic pdf o then
|
||||||
|
let fontfile = truetype_fontfile pdf o in
|
||||||
|
if fontfile = None then () else
|
||||||
|
let cmaps = Cpdftruetype.cmaps (unopt fontfile) in
|
||||||
|
if mem (3, 1) cmaps then () else merror ()
|
||||||
|
else
|
||||||
|
()
|
||||||
|
| _ -> ())
|
||||||
|
pdf
|
||||||
|
|
||||||
(* The Encoding entry is present in the font dictionary for a symbolic TrueType
|
(* The Encoding entry is present in the font dictionary for a symbolic TrueType
|
||||||
font. *)
|
font. *)
|
||||||
@ -1043,12 +1079,38 @@ let matterhorn_31_024 _ _ pdf =
|
|||||||
|
|
||||||
(* The embedded font program for a symbolic TrueType font contains no cmap. *)
|
(* The embedded font program for a symbolic TrueType font contains no cmap. *)
|
||||||
let matterhorn_31_025 _ _ pdf =
|
let matterhorn_31_025 _ _ pdf =
|
||||||
unimpl ()
|
Pdf.objiter
|
||||||
|
(fun _ o ->
|
||||||
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
| Some (Pdf.Name "/TrueType") ->
|
||||||
|
if not (is_non_symbolic pdf o) then
|
||||||
|
let fontfile = truetype_fontfile pdf o in
|
||||||
|
if fontfile = None then () else
|
||||||
|
let cmaps = Cpdftruetype.cmaps (unopt fontfile) in
|
||||||
|
(*iter (fun (x, y) -> Printf.printf "%i, %i\n" x y) cmaps;*)
|
||||||
|
if cmaps = [] then merror ()
|
||||||
|
else
|
||||||
|
()
|
||||||
|
| _ -> ())
|
||||||
|
pdf
|
||||||
|
|
||||||
(* The embedded font program for a symbolic TrueType font contains more than
|
(* The embedded font program for a symbolic TrueType font contains more than
|
||||||
one cmap, but none of the cmap entries is a (3,0) Microsoft Symbol cmap. *)
|
one cmap, but none of the cmap entries is a (3,0) Microsoft Symbol cmap. *)
|
||||||
let matterhorn_31_026 _ _ pdf =
|
let matterhorn_31_026 _ _ pdf =
|
||||||
unimpl ()
|
Pdf.objiter
|
||||||
|
(fun _ o ->
|
||||||
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
| Some (Pdf.Name "/TrueType") ->
|
||||||
|
if true (*not (is_non_symbolic pdf o)*) (*FIXME reinstate test*) then
|
||||||
|
let fontfile = truetype_fontfile pdf o in
|
||||||
|
if fontfile = None then () else
|
||||||
|
let cmaps = Cpdftruetype.cmaps (unopt fontfile) in
|
||||||
|
(*iter (fun (x, y) -> Printf.printf "%i, %i\n" x y) cmaps;*)
|
||||||
|
if length cmaps > 1 && not (mem (3, 0) cmaps) then merror ()
|
||||||
|
else
|
||||||
|
()
|
||||||
|
| _ -> ())
|
||||||
|
pdf
|
||||||
|
|
||||||
(* A font dictionary does not contain the ToUnicode entry and none of the
|
(* A font dictionary does not contain the ToUnicode entry and none of the
|
||||||
following is true: the font uses MacRomanEncoding, MacExpertEncoding or
|
following is true: the font uses MacRomanEncoding, MacExpertEncoding or
|
||||||
@ -1228,6 +1290,8 @@ let matterhorn =
|
|||||||
("31-030", "One or more characters used in text showing operators reference the .notdef glyph.", "UA1:7.21.8-1", matterhorn_31_030);
|
("31-030", "One or more characters used in text showing operators reference the .notdef glyph.", "UA1:7.21.8-1", matterhorn_31_030);
|
||||||
]
|
]
|
||||||
|
|
||||||
|
(* FIXME Allow the use of just a single test, and expose it in cpdf command line *)
|
||||||
|
|
||||||
let test_matterhorn pdf =
|
let test_matterhorn pdf =
|
||||||
(* A circularity in the role map prevents all structure checks, so we do it first at stop if it fails. *)
|
(* A circularity in the role map prevents all structure checks, so we do it first at stop if it fails. *)
|
||||||
let circularity_error =
|
let circularity_error =
|
||||||
|
Loading…
Reference in New Issue
Block a user