2023-04-13 17:56:32 +02:00
|
|
|
(* UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
|
|
|
|
|
|
|
|
Unicode Data Files include all data files under the directories
|
|
|
|
http://www.unicode.org/Public/, http://www.unicode.org/reports/, and
|
|
|
|
http://www.unicode.org/cldr/data/. Unicode Data Files do not include PDF online
|
|
|
|
code charts under the directory http://www.unicode.org/Public/. Software
|
|
|
|
includes any source code published in the Unicode Standard or under the
|
|
|
|
directories http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
|
|
|
and http://www.unicode.org/cldr/data/.
|
|
|
|
|
|
|
|
NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING,
|
|
|
|
INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S DATA FILES ("DATA
|
|
|
|
FILES"), AND/OR SOFTWARE ("SOFTWARE"), YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO
|
|
|
|
BE BOUND BY, ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT
|
|
|
|
AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR
|
|
|
|
SOFTWARE.
|
|
|
|
|
|
|
|
COPYRIGHT AND PERMISSION NOTICE
|
|
|
|
|
|
|
|
Copyright © 1991-2015 Unicode, Inc. All rights reserved. Distributed under the
|
|
|
|
Terms of Use in http://www.unicode.org/copyright.html.
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
|
|
the Unicode data files and any associated documentation (the "Data Files") or
|
|
|
|
Unicode software and any associated documentation (the "Software") to deal in
|
|
|
|
the Data Files or Software without restriction, including without limitation
|
|
|
|
the rights to use, copy, modify, merge, publish, distribute, and/or sell copies
|
|
|
|
of the Data Files or Software, and to permit persons to whom the Data Files or
|
|
|
|
Software are furnished to do so, provided that
|
|
|
|
|
|
|
|
(a) this copyright and permission notice appear with all copies of the Data
|
|
|
|
Files or Software, (b) this copyright and permission notice appear in
|
|
|
|
associated documentation, and (c) there is clear notice in each modified Data
|
|
|
|
File or in the Software as well as in the documentation associated with the
|
|
|
|
Data File(s) or Software that the data or software has been modified. THE DATA
|
|
|
|
FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
|
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
|
|
|
|
NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
|
|
|
|
LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
|
|
|
|
DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
|
|
CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
|
|
|
|
|
|
|
Except as contained in this notice, the name of a copyright holder shall not be
|
|
|
|
used in advertising or otherwise to promote the sale, use or other dealings in
|
|
|
|
these Data Files or Software without prior written authorization of the
|
|
|
|
copyright holder. *)
|
|
|
|
|
2021-11-11 20:45:47 +01:00
|
|
|
open Pdfutil
|
|
|
|
|
|
|
|
let unicodedata_source = "x\001\164\253[\147\228:r%\140\190\199\175\200\1999\015\219\190\224\157\180\148\141\025H\130\025\172\140[\197\165jg\189\233h4\199>\155n\245\152.\255\255\000NF\004\t\172\005\178\182\166$\140\186s-\007\224pw\000\014\016\177\221n\183\239\255\244/\255\248\183\255\252\247\127\252\237\127\1907\255\242\190}\175\143\239\255\252\254\255}\255\151\247\255\245~|?\222\247\251\247\127}\255\223\239\255\191\247\141\193F\000l\255\223\241\253zS\151\219\219\169{\219i\213\246\199\015\251\223ZF\188\204\184\233?o\015xB\225\250\216\186\224t\017|Q\199\235\161\191^\251\211\241A\202\002\164\239\247\254\242\245\000\230\020\168\154\207\227\233\231^\183\031\250\001.(\184\214F\131#\170\228(#\243zV\205S`\229B\175#\178\217\169\139jn\250\242vS\245}\175n\147\190)O\254H\218\247G\253\214i\221\190\253\143}\247\255y\192kV\135\192}\241\141\139\255\249 t\167\203a\148\223\189\228\183\1729\141\186\\z\245\161\223.\250v\191\028\223\254Gsy\14647\152]\223\025\131\185?\199\191[\128\246\143\150G[\138l\213M\189\153\254~\190\233k\163\206\015\253G\220\210[\253\163o\244[s:\222.\167\253\219\233\248\228\196k9\183\159\167\007'Y\205\217]\244\179\166t-\171;\221/\015\018\183\252\163\2540\003\253C\191\249\150\029q7\184~\029\155\221\229t<\221\175o}\187\1272\138\223q\203\183z\127j>\031T\238 \141:6\250\225HQ\181T\197A\183\253\253\240\128+\222\135{}\189\245\183\251\237\217\250\154\139\158\025H\195\140\187?Zo\016\199y\187\234\179\241\214\219\2332\027\136\246\247\168\179\145\215\191\201}YZ\199\188\0293_v\029\027\168\132\166_\2551\245\249\199_\163w\253g\179W\163\128\131\186|\190\159\255a\128\167\163\003\140\223\191\223O\183EXb&\157Cm\226\219\181\2558\014\024}s0\233{{\218\239\213\136\185\254\011\194d\239g}i\244\241\022\018\148\191\171\131\129]\213\177%\205)\222\213\249t5\206t\222i\007\242\250\195\031\182gO}\149\239{m\194\143\209\164\169}\167\175\253\245\253\252\031/\226\215\251\233\172\143f\142\156!Fn\245~\233?v\014\249_\167\228f\127\186\018\178zWW33\244W\166\219\250\253\1887\222:h\237\239V!\238h6\239\205\233pP\003\191q\255\218\190\239\190Lw\143\127\028\250\227\2214\236\127!\017\250\1893\171\134\183\235\237tv\196\024M\247\167\246\129\235\222\175\167}\223Z93\212u\175\174\187\017\148l\223\219\254\163\191\189\253\210\151\211\251Q\23435m\229\223\179\202$\026Q\214h\159\160H\254\189@\241\b\178>\241\004\197\242\239\005J\030 q\186',\145\127/X:\194\196\173\159\168T\254\189P\217\003eb\235\011\149\201\191\023*\031Q\215\254\207\023(\151\127/P\241\000\233\031\230\207OX!\255^\176r\132ikD/X)\255^\176j\132\025+\1564\172\146\127/\1482\182\176?\029\177-$\245\251U\031\250\t\194\181\182\1641~p\189\254q\219\169\227\196\228\158\134\252\128\181\239\250\251]\237\175>\230%J\191\127\\\1802\182\189(\1733QF_\023\130L\186\02137\241\161W\2517u#\168\232\221.\128\142o&\238\2477\003\220\235\155]x\169\247\253\127\025\248\254\1336\171\197H\0241f\212\136\017\011#\193\140\0061\018a\164\152\209\"F*\140\01234bd\194\2001\163C\140\\\024\005f| F!\140\0183v\136Q\n\163\194\140\0301*a(\204\248\134\024J\0245f|\"F-\140\0063\246\136\209\b\163\197\140\003b\180\194\208\152qD\012-\140\0143N\136\209YF\182\197\1403`\020[a\016\255\248\142\024\226\031\025\241\143\011b\136\127d\196?\174\136!\254\145\017\255\184!\134\248GF\252\227\142\024\226\031\025\241\143\031\136!\254\145\017\255\248\137\024\226\031\025\241\143?\017C\252##\254\241\133\024\226\031\025\241\143_\136!\254\145\213\195\226\230jB\246E\191\213f/\250iVUx}\227\128\222%\170f\205\251\197Lb\151\171~\155\205\255\167\217^x\178\006\200\218qM\228\214\t\151E\184N\253\222\244\151\230~\232\246\250O\179\203\177\203\194\247\235\255\153\214j\151\184\150\255\194=\184\198\147N?\237\030\209\172\255\254\005Q\238\199\214t\1679]\030+\192|k\166+%\251\169@M\002y0\030\174t=\168\253t\162\249\219l\024dN\146r#\019\b\224\212\128\019\015\229F\166\016\192i\000'\025\202\141L\"\128\211\002N:\148\027\153F\000G\003N6\148\027\153H\000\167\003\156|(72\149\000\206\007\224\020C\185\145\201\004pv\128S\014\229F\166\019\192\233\001\167\026\202\141L(\128\243\rp\212PndJ\001\156O\192\169\135r#\147\n\224\236\001\167\025\202\141L+\128s\000\156v(72\177\000\206\017p\244Pndj\001\156\019\224tC\185\145\20
|
|
|
|
|
|
|
|
type t =
|
|
|
|
{code_value : string;
|
|
|
|
character_name : string;
|
|
|
|
general_category : string;
|
|
|
|
canonical_combining_classes : string;
|
|
|
|
bidirectional_category : string;
|
|
|
|
character_decomposition_mapping : string;
|
|
|
|
decimal_digit_value : string;
|
|
|
|
digit_value : string;
|
|
|
|
numeric_value : string;
|
|
|
|
mirrored : string;
|
|
|
|
unicode_10_name : string;
|
|
|
|
iso_10646_comment_field : string;
|
|
|
|
uppercase_mapping : string;
|
|
|
|
lowercase_mapping : string;
|
|
|
|
titlecase_mapping : string}
|
|
|
|
|
|
|
|
let get_single_field i =
|
|
|
|
let r = implode (Pdfread.getuntil true (function c -> c = ';' || c = '\n') i) in
|
|
|
|
Pdfio.nudge i;
|
|
|
|
r
|
|
|
|
|
|
|
|
let parse_entry i =
|
|
|
|
let code_value = get_single_field i in
|
|
|
|
let character_name = get_single_field i in
|
|
|
|
let general_category = get_single_field i in
|
|
|
|
let canonical_combining_classes = get_single_field i in
|
|
|
|
let bidirectional_category = get_single_field i in
|
|
|
|
let character_decomposition_mapping = get_single_field i in
|
|
|
|
let decimal_digit_value = get_single_field i in
|
|
|
|
let digit_value = get_single_field i in
|
|
|
|
let numeric_value = get_single_field i in
|
|
|
|
let mirrored = get_single_field i in
|
|
|
|
let unicode_10_name = get_single_field i in
|
|
|
|
let iso_10646_comment_field = get_single_field i in
|
|
|
|
let uppercase_mapping = get_single_field i in
|
|
|
|
let lowercase_mapping = get_single_field i in
|
|
|
|
let titlecase_mapping = get_single_field i in
|
|
|
|
{code_value;
|
|
|
|
character_name;
|
|
|
|
general_category;
|
|
|
|
canonical_combining_classes;
|
|
|
|
bidirectional_category;
|
|
|
|
character_decomposition_mapping;
|
|
|
|
decimal_digit_value;
|
|
|
|
digit_value;
|
|
|
|
numeric_value;
|
|
|
|
mirrored;
|
|
|
|
unicode_10_name;
|
|
|
|
iso_10646_comment_field;
|
|
|
|
uppercase_mapping;
|
|
|
|
lowercase_mapping;
|
|
|
|
titlecase_mapping}
|
|
|
|
|
|
|
|
let rec parse_unicodedata a i =
|
|
|
|
if i.Pdfio.pos_in () = i.Pdfio.in_channel_length + 2 (* it's been nudged *)
|
|
|
|
then rev a
|
|
|
|
else parse_unicodedata (parse_entry i::a) i
|
|
|
|
|
|
|
|
let unicodedata =
|
|
|
|
memoize
|
|
|
|
(fun () ->
|
|
|
|
let r =
|
|
|
|
unicodedata_source
|
|
|
|
|> Pdfio.bytes_of_string
|
|
|
|
|> Pdfcodec.decode_flate
|
|
|
|
|> Pdfio.string_of_bytes
|
|
|
|
|> Pdfio.input_of_string
|
|
|
|
|> parse_unicodedata []
|
|
|
|
in (*iter print_entry r;*) r)
|