1
1
mirror of https://github.com/OpenVoiceOS/OpenVoiceOS synced 2025-06-05 22:19:21 +02:00

[WIP] Pushed for backup.

... Do not build this as of yet ...
This commit is contained in:
j1nx
2023-06-01 15:16:04 +02:00
parent 5c7af8b058
commit c6460b9307
417 changed files with 43487 additions and 76 deletions

View File

@ -0,0 +1,5 @@
from timezonefinder.timezonefinder import TimezoneFinder, TimezoneFinderL
# https://docs.python.org/3/tutorial/modules.html#importing-from-a-package
# determines which objects will be imported with "import *"
__all__ = ("TimezoneFinder", "TimezoneFinderL")

View File

@ -0,0 +1,48 @@
""" 'transparent' numba functionality replacements
njit decorator
data types
dtype_2int_tuple = typeof((1, 1))
@njit(b1(i4, i4, i4[:, :]), cache=True)
@njit(dtype_2int_tuple(f8, f8), cache=True)
"""
def njit(*args, **kwargs):
def wrapper(f):
return f
return wrapper
class SubscriptAndCallable:
def __init__(self, *args, **kwargs):
pass
def __class_getitem__(cls, item):
return None
# DTYPES
# @njit(b1(i4, i4, i4[:, :]), cache=True)
class b1(SubscriptAndCallable):
pass
class f8(SubscriptAndCallable):
pass
class i2(SubscriptAndCallable):
pass
class i4(SubscriptAndCallable):
pass
class u2(SubscriptAndCallable):
pass

View File

@ -0,0 +1,60 @@
import argparse
from typing import Callable
from timezonefinder import TimezoneFinder, TimezoneFinderL
from timezonefinder.timezonefinder import AbstractTimezoneFinder
def get_timezone_function(function_id: int) -> Callable:
"""
Note: script is being called for each point individually. Caching TimezoneFinder() instances is useless.
-> avoid constructing unnecessary instances
"""
tf_instance: AbstractTimezoneFinder
if function_id in [0, 1, 5]:
tf_instance = TimezoneFinder()
functions = {
0: tf_instance.timezone_at,
1: tf_instance.certain_timezone_at,
5: tf_instance.timezone_at_land,
}
else:
tf_instance = TimezoneFinderL()
functions = {
3: tf_instance.timezone_at,
4: tf_instance.timezone_at_land,
}
return functions[function_id]
def main():
parser = argparse.ArgumentParser(description="parse TimezoneFinder parameters")
parser.add_argument("lng", type=float, help="longitude to be queried")
parser.add_argument("lat", type=float, help="latitude to be queried")
parser.add_argument("-v", action="store_true", help="verbosity flag")
parser.add_argument(
"-f",
"--function",
type=int,
choices=[0, 1, 3, 4, 5],
default=0,
help="function to be called:"
"0: TimezoneFinder.timezone_at(), "
"1: TimezoneFinder.certain_timezone_at(), "
"2: removed, "
"3: TimezoneFinderL.timezone_at(), "
"4: TimezoneFinderL.timezone_at_land(), "
"5: TimezoneFinder.timezone_at_land(), ",
)
parsed_args = parser.parse_args() # takes input from sys.argv
timezone_function = get_timezone_function(parsed_args.function)
tz = timezone_function(lng=parsed_args.lng, lat=parsed_args.lat)
if parsed_args.v:
print("Looking for TZ at lat=", parsed_args.lat, " lng=", parsed_args.lng)
print(
"Function:",
["timezone_at()", "certain_timezone_at()"][parsed_args.function],
)
print("Timezone=", tz)
else:
print(tz)

View File

@ -0,0 +1,109 @@
# NOTE: Changes in the global settings might not immediately affect
# the functions in utils.py due to numba compilation and caching!
from typing import Dict, List, Tuple
import numpy as np
# SHORTCUT SETTINGS
# h3 library
SHORTCUT_H3_RES: int = 3
SHORTCUT_FILE = "shortcuts.bin"
OCEAN_TIMEZONE_PREFIX = r"Etc/GMT"
# DATA FILES
# BINARY
BINARY_FILE_ENDING = ".bin"
POLY_ZONE_IDS = "poly_zone_ids"
POLY_COORD_AMOUNT = "poly_coord_amount"
POLY_ADR2DATA = "poly_adr2data"
POLY_MAX_VALUES = "poly_bounds"
POLY_DATA = "poly_data"
POLY_NR2ZONE_ID = "poly_nr2zone_id"
HOLE_COORD_AMOUNT = "hole_coord_amount"
HOLE_ADR2DATA = "hole_adr2data"
HOLE_DATA = "hole_data"
BINARY_DATA_ATTRIBUTES = [
POLY_ZONE_IDS,
POLY_COORD_AMOUNT,
POLY_ADR2DATA,
POLY_MAX_VALUES,
POLY_DATA,
POLY_NR2ZONE_ID,
HOLE_COORD_AMOUNT,
HOLE_ADR2DATA,
HOLE_DATA,
]
# JSON
JSON_FILE_ENDING = ".json"
HOLE_REGISTRY = "hole_registry"
TIMEZONE_NAMES_FILE = "timezone_names" + JSON_FILE_ENDING
HOLE_REGISTRY_FILE = HOLE_REGISTRY + JSON_FILE_ENDING
DATA_ATTRIBUTE_NAMES = BINARY_DATA_ATTRIBUTES + [HOLE_REGISTRY]
# all data files that should be included in the build:
ALL_BINARY_FILES = [specifier + BINARY_FILE_ENDING for specifier in BINARY_DATA_ATTRIBUTES]
ALL_JSON_FILES = [TIMEZONE_NAMES_FILE, HOLE_REGISTRY_FILE]
PACKAGE_DATA_FILES = ALL_BINARY_FILES + ALL_JSON_FILES
# TODO create variables for used dtype for each type of data (polygon address, coordinate...)
# B = unsigned char (1byte = 8bit Integer)
NR_BYTES_B = 1
DTYPE_FORMAT_B = b"<B"
DTYPE_FORMAT_B_NUMPY = "<i1"
THRES_DTYPE_B = 2 ** (NR_BYTES_B * 8)
# H = unsigned short (2 byte integer)
NR_BYTES_H = 2
DTYPE_FORMAT_H = b"<H"
DTYPE_FORMAT_H_NUMPY = "<u2"
THRES_DTYPE_H = 2 ** (NR_BYTES_H * 8) # = 65536
# value to write for representing an invalid zone (e.g. no shortcut polygon)
# = 65535 = highest possible value with H (2 byte unsigned integer)
INVALID_VALUE_DTYPE_H = THRES_DTYPE_H - 1
# i = signed 4byte integer
NR_BYTES_I = 4
DTYPE_FORMAT_SIGNED_I = b"<i"
DTYPE_FORMAT_SIGNED_I_NUMPY = "<i4"
THRES_DTYPE_SIGNED_I_UPPER = 2 ** ((NR_BYTES_I * 8) - 1)
THRES_DTYPE_SIGNED_I_LOWER = -THRES_DTYPE_SIGNED_I_UPPER
# I = unsigned 4byte integer
DTYPE_FORMAT_I = b"<I"
THRES_DTYPE_I = 2 ** (NR_BYTES_I * 8)
# Q = unsigned 8byte integer
NR_BYTES_Q = 8
DTYPE_FORMAT_Q = b"<Q"
# f = 8byte signed float
DTYPE_FORMAT_F_NUMPY = "<f8"
# IMPORTANT: all values between -180 and 180 degree must fit into the domain of i4!
# is the same as testing if 360 fits into the domain of I4 (unsigned!)
MAX_ALLOWED_COORD_VAL = 2 ** (8 * NR_BYTES_I - 1)
# from math import floor,log10
# DECIMAL_PLACES_SHIFT = floor(log10(MAX_ALLOWED_COORD_VAL/180.0)) # == 7
DECIMAL_PLACES_SHIFT = 7
INT2COORD_FACTOR = 10 ** (-DECIMAL_PLACES_SHIFT)
COORD2INT_FACTOR = 10**DECIMAL_PLACES_SHIFT
MAX_LNG_VAL = 180.0
MAX_LAT_VAL = 90.0
MAX_INT_VAL = int(MAX_LNG_VAL * COORD2INT_FACTOR)
assert MAX_INT_VAL < MAX_ALLOWED_COORD_VAL
# TYPES
# hexagon id to list of polygon ids
ShortcutMapping = Dict[int, np.ndarray]
CoordPairs = List[Tuple[float, float]]
CoordLists = List[List[float]]
IntLists = List[List[int]]

View File

@ -0,0 +1,64 @@
import struct
from pathlib import Path
from typing import Dict, List
import numpy as np
from h3.api import numpy_int as h3
from timezonefinder.configs import (
DTYPE_FORMAT_B,
DTYPE_FORMAT_H,
DTYPE_FORMAT_H_NUMPY,
DTYPE_FORMAT_Q,
NR_BYTES_B,
NR_BYTES_I,
NR_BYTES_Q,
THRES_DTYPE_B,
ShortcutMapping,
)
def export_shortcuts_binary(global_mapping: Dict[int, List[int]], path2shortcuts: Path) -> int:
"""
binary format:
for every shortcut entry:
- the hex id (uint64)
- the amount of contained polygons n (uint8)
- n polygon ids (uint16)
"""
shortcut_space = 0
with open(path2shortcuts, "wb") as fp:
for hex_id, poly_ids in global_mapping.items():
fp.write(struct.pack(DTYPE_FORMAT_Q, hex_id))
nr_polys = len(poly_ids)
if nr_polys > THRES_DTYPE_B:
raise ValueError("value overflow: more polys than data type supports")
fp.write(struct.pack(DTYPE_FORMAT_B, nr_polys))
for poly_id in poly_ids:
fp.write(struct.pack(DTYPE_FORMAT_H, poly_id))
shortcut_space += NR_BYTES_Q + NR_BYTES_B + (len(poly_ids) * NR_BYTES_I)
return shortcut_space
def read_shortcuts_binary(path2shortcuts: Path) -> ShortcutMapping:
mapping: ShortcutMapping = {}
with open(path2shortcuts, "rb") as fp:
while 1:
try:
hex_id: int = struct.unpack(DTYPE_FORMAT_Q, fp.read(NR_BYTES_Q))[0]
except struct.error:
# EOF: buffer not long enough to unpack
break
nr_polys: int = struct.unpack(DTYPE_FORMAT_B, fp.read(NR_BYTES_B))[0]
poly_ids: np.ndarray = np.fromfile(fp, dtype=DTYPE_FORMAT_H_NUMPY, count=nr_polys)
mapping[hex_id] = poly_ids
return mapping
def lies_in_h3_cell(h: int, lng: float, lat: float) -> bool:
res = h3.h3_get_resolution(h)
return h3.geo_to_h3(lat, lng, res) == h

View File

@ -0,0 +1,374 @@
{
"1116": [
2,
236
],
"1137": [
15,
238
],
"1141": [
44,
253
],
"1142": [
2,
297
],
"1143": [
37,
299
],
"1144": [
20,
336
],
"1147": [
1,
356
],
"1150": [
28,
357
],
"1151": [
3,
385
],
"1155": [
1,
388
],
"1156": [
23,
389
],
"1159": [
4,
412
],
"1164": [
3,
416
],
"1168": [
1,
419
],
"1169": [
2,
420
],
"1170": [
5,
422
],
"1171": [
3,
427
],
"1172": [
14,
430
],
"1177": [
12,
444
],
"1178": [
2,
456
],
"1181": [
1,
458
],
"1186": [
1,
459
],
"1187": [
7,
460
],
"1192": [
3,
467
],
"1197": [
21,
470
],
"1206": [
1,
491
],
"1210": [
1,
492
],
"1213": [
2,
493
],
"1221": [
5,
495
],
"1223": [
2,
500
],
"1226": [
2,
502
],
"1227": [
4,
504
],
"1229": [
13,
508
],
"1231": [
17,
521
],
"1234": [
2,
538
],
"1236": [
1,
540
],
"1237": [
11,
541
],
"1241": [
2,
552
],
"1245": [
13,
554
],
"1246": [
1,
567
],
"1247": [
4,
568
],
"1250": [
6,
572
],
"1251": [
2,
578
],
"1257": [
6,
580
],
"1260": [
4,
586
],
"1263": [
31,
590
],
"1267": [
34,
621
],
"1271": [
36,
655
],
"1273": [
1,
691
],
"1274": [
21,
692
],
"16": [
8,
1
],
"199": [
92,
23
],
"239": [
1,
115
],
"249": [
2,
116
],
"270": [
1,
118
],
"281": [
3,
119
],
"30": [
1,
9
],
"325": [
1,
122
],
"348": [
1,
123
],
"350": [
2,
124
],
"360": [
1,
126
],
"37": [
1,
10
],
"375": [
1,
127
],
"395": [
1,
128
],
"4": [
1,
0
],
"443": [
1,
129
],
"446": [
6,
130
],
"461": [
1,
136
],
"481": [
16,
137
],
"493": [
2,
153
],
"511": [
1,
155
],
"520": [
1,
156
],
"530": [
2,
157
],
"54": [
2,
11
],
"558": [
22,
159
],
"591": [
2,
181
],
"639": [
5,
183
],
"716": [
21,
188
],
"755": [
1,
209
],
"756": [
1,
210
],
"764": [
6,
211
],
"765": [
1,
217
],
"771": [
6,
218
],
"805": [
2,
224
],
"817": [
1,
226
],
"820": [
1,
227
],
"822": [
1,
228
],
"831": [
1,
229
],
"840": [
2,
230
],
"844": [
1,
232
],
"848": [
1,
233
],
"868": [
2,
234
],
"88": [
10,
13
]
}

View File

@ -0,0 +1,69 @@
#include "inside_polygon_int.h"
#include <stdio.h>
bool inside_polygon_int(int x, int y, int nr_coords, int x_coords[],
int y_coords[]) {
// naive implementation, vulnerable to overflow:
// bool inside;
// for (int i = 0, j = nr_coords - 1; i < nr_coords; j = i++) {
// if (((y_coords[i] > y) != (y_coords[j] > y)) &&
// (x < (x_coords[j] - x_coords[i]) * (y - y_coords[i]) /
// (y_coords[j] - y_coords[i]) +
// x_coords[i])) {
// inside = !inside;
// }
// }
// return inside;
bool inside, y_gt_y1, y_gt_y2, x_le_x1, x_le_x2;
long y1, y2, x1, x2, slope1, slope2; // int64 precision
int i, j;
inside = false;
// the edge from the last to the first point is checked first
j = nr_coords - 1;
y_gt_y1 = y > y_coords[j];
for (i = 0; i < nr_coords; j = i++) {
y_gt_y2 = y > y_coords[i];
if (y_gt_y1 ^ y_gt_y2) { // XOR
// [p1-p2] crosses horizontal line in p
// only count crossings "right" of the point ( >= x)
x_le_x1 = x <= x_coords[j];
x_le_x2 = x <= x_coords[i];
if (x_le_x1 || x_le_x2) {
if (x_le_x1 && x_le_x2) {
// p1 and p2 are both to the right -> valid crossing
inside = !inside;
} else {
// compare the slope of the line [p1-p2] and [p-p2]
// depending on the position of p2 this determines whether
// the polygon edge is right or left of the point
// to avoid expensive division the divisors (of the slope dy/dx)
// are brought to the other side ( dy/dx > a == dy > a * dx )
// only one of the points is to the right
// NOTE: int64 precision required to prevent overflow
y1 = y_coords[j];
y2 = y_coords[i];
x1 = x_coords[j];
x2 = x_coords[i];
slope1 = (y2 - y) * (x2 - x1);
slope2 = (y2 - y1) * (x2 - x);
// NOTE: accept slope equality to also detect if p lies directly
// on an edge
if (y_gt_y1) {
if (slope1 <= slope2) {
inside = !inside;
}
} else { // NOT y_gt_y1
if (slope1 >= slope2) {
inside = !inside;
}
}
}
}
}
// next point
y_gt_y1 = y_gt_y2;
}
return inside;
}

View File

@ -0,0 +1,4 @@
#include <stdbool.h>
bool inside_polygon_int(int x, int y, int nr_coords, int x_coords[],
int y_coords[]);

View File

@ -0,0 +1,7 @@
"""modify PYTHONPATH to make parent package discoverable."""
import sys
from os.path import abspath, join, pardir
package_path = abspath(join(__file__, pardir, pardir))
sys.path.insert(0, package_path)

Binary file not shown.

After

Width:  |  Height:  |  Size: 890 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

View File

@ -0,0 +1,446 @@
[
"Africa/Abidjan",
"Africa/Accra",
"Africa/Addis_Ababa",
"Africa/Algiers",
"Africa/Asmara",
"Africa/Bamako",
"Africa/Bangui",
"Africa/Banjul",
"Africa/Bissau",
"Africa/Blantyre",
"Africa/Brazzaville",
"Africa/Bujumbura",
"Africa/Cairo",
"Africa/Casablanca",
"Africa/Ceuta",
"Africa/Conakry",
"Africa/Dakar",
"Africa/Dar_es_Salaam",
"Africa/Djibouti",
"Africa/Douala",
"Africa/El_Aaiun",
"Africa/Freetown",
"Africa/Gaborone",
"Africa/Harare",
"Africa/Johannesburg",
"Africa/Juba",
"Africa/Kampala",
"Africa/Khartoum",
"Africa/Kigali",
"Africa/Kinshasa",
"Africa/Lagos",
"Africa/Libreville",
"Africa/Lome",
"Africa/Luanda",
"Africa/Lubumbashi",
"Africa/Lusaka",
"Africa/Malabo",
"Africa/Maputo",
"Africa/Maseru",
"Africa/Mbabane",
"Africa/Mogadishu",
"Africa/Monrovia",
"Africa/Nairobi",
"Africa/Ndjamena",
"Africa/Niamey",
"Africa/Nouakchott",
"Africa/Ouagadougou",
"Africa/Porto-Novo",
"Africa/Sao_Tome",
"Africa/Tripoli",
"Africa/Tunis",
"Africa/Windhoek",
"America/Adak",
"America/Anchorage",
"America/Anguilla",
"America/Antigua",
"America/Aruba",
"America/Araguaina",
"America/Argentina/Buenos_Aires",
"America/Argentina/Catamarca",
"America/Argentina/Cordoba",
"America/Argentina/Jujuy",
"America/Argentina/La_Rioja",
"America/Argentina/Mendoza",
"America/Argentina/Rio_Gallegos",
"America/Argentina/Salta",
"America/Argentina/San_Juan",
"America/Argentina/San_Luis",
"America/Argentina/Tucuman",
"America/Argentina/Ushuaia",
"America/Asuncion",
"America/Atikokan",
"America/Bahia",
"America/Bahia_Banderas",
"America/Barbados",
"America/Belem",
"America/Belize",
"America/Blanc-Sablon",
"America/Boa_Vista",
"America/Bogota",
"America/Boise",
"America/Cambridge_Bay",
"America/Campo_Grande",
"America/Cancun",
"America/Caracas",
"America/Cayenne",
"America/Cayman",
"America/Chicago",
"America/Chihuahua",
"America/Ciudad_Juarez",
"America/Costa_Rica",
"America/Creston",
"America/Cuiaba",
"America/Curacao",
"America/Danmarkshavn",
"America/Dawson",
"America/Dawson_Creek",
"America/Denver",
"America/Detroit",
"America/Dominica",
"America/Edmonton",
"America/Eirunepe",
"America/El_Salvador",
"America/Fort_Nelson",
"America/Fortaleza",
"America/Glace_Bay",
"America/Goose_Bay",
"America/Grand_Turk",
"America/Grenada",
"America/Guadeloupe",
"America/Guatemala",
"America/Guayaquil",
"America/Guyana",
"America/Halifax",
"America/Havana",
"America/Hermosillo",
"America/Indiana/Indianapolis",
"America/Indiana/Knox",
"America/Indiana/Marengo",
"America/Indiana/Petersburg",
"America/Indiana/Tell_City",
"America/Indiana/Vevay",
"America/Indiana/Vincennes",
"America/Indiana/Winamac",
"America/Inuvik",
"America/Iqaluit",
"America/Jamaica",
"America/Juneau",
"America/Kentucky/Louisville",
"America/Kentucky/Monticello",
"America/Kralendijk",
"America/La_Paz",
"America/Lima",
"America/Los_Angeles",
"America/Lower_Princes",
"America/Maceio",
"America/Managua",
"America/Manaus",
"America/Marigot",
"America/Martinique",
"America/Matamoros",
"America/Mazatlan",
"America/Miquelon",
"America/Menominee",
"America/Merida",
"America/Metlakatla",
"America/Mexico_City",
"America/Moncton",
"America/Monterrey",
"America/Montevideo",
"America/Montserrat",
"America/Nassau",
"America/New_York",
"America/Nome",
"America/Noronha",
"America/North_Dakota/Beulah",
"America/North_Dakota/Center",
"America/North_Dakota/New_Salem",
"America/Nuuk",
"America/Ojinaga",
"America/Panama",
"America/Paramaribo",
"America/Phoenix",
"America/Port-au-Prince",
"America/Port_of_Spain",
"America/Porto_Velho",
"America/Puerto_Rico",
"America/Punta_Arenas",
"America/Rankin_Inlet",
"America/Recife",
"America/Regina",
"America/Resolute",
"America/Rio_Branco",
"America/Santarem",
"America/Santiago",
"America/Santo_Domingo",
"America/Sao_Paulo",
"America/Scoresbysund",
"America/Sitka",
"America/St_Barthelemy",
"America/St_Johns",
"America/St_Kitts",
"America/St_Lucia",
"America/St_Thomas",
"America/St_Vincent",
"America/Swift_Current",
"America/Tegucigalpa",
"America/Thule",
"America/Tijuana",
"America/Toronto",
"America/Tortola",
"America/Vancouver",
"America/Whitehorse",
"America/Winnipeg",
"America/Yakutat",
"Antarctica/Casey",
"Antarctica/Davis",
"Antarctica/DumontDUrville",
"Antarctica/Macquarie",
"Antarctica/Mawson",
"Antarctica/McMurdo",
"Antarctica/Palmer",
"Antarctica/Rothera",
"Antarctica/Syowa",
"Antarctica/Troll",
"Antarctica/Vostok",
"Arctic/Longyearbyen",
"Asia/Aden",
"Asia/Almaty",
"Asia/Amman",
"Asia/Anadyr",
"Asia/Aqtau",
"Asia/Aqtobe",
"Asia/Ashgabat",
"Asia/Atyrau",
"Asia/Baghdad",
"Asia/Bahrain",
"Asia/Baku",
"Asia/Bangkok",
"Asia/Barnaul",
"Asia/Beirut",
"Asia/Bishkek",
"Asia/Brunei",
"Asia/Chita",
"Asia/Choibalsan",
"Asia/Colombo",
"Asia/Damascus",
"Asia/Dhaka",
"Asia/Dili",
"Asia/Dubai",
"Asia/Dushanbe",
"Asia/Famagusta",
"Asia/Gaza",
"Asia/Hebron",
"Asia/Ho_Chi_Minh",
"Asia/Hong_Kong",
"Asia/Hovd",
"Asia/Irkutsk",
"Asia/Jakarta",
"Asia/Jayapura",
"Asia/Jerusalem",
"Asia/Kabul",
"Asia/Kamchatka",
"Asia/Karachi",
"Asia/Kathmandu",
"Asia/Khandyga",
"Asia/Kolkata",
"Asia/Krasnoyarsk",
"Asia/Kuala_Lumpur",
"Asia/Kuching",
"Asia/Kuwait",
"Asia/Macau",
"Asia/Magadan",
"Asia/Makassar",
"Asia/Manila",
"Asia/Muscat",
"Asia/Nicosia",
"Asia/Novokuznetsk",
"Asia/Novosibirsk",
"Asia/Omsk",
"Asia/Oral",
"Asia/Phnom_Penh",
"Asia/Pontianak",
"Asia/Pyongyang",
"Asia/Qatar",
"Asia/Qostanay",
"Asia/Qyzylorda",
"Asia/Riyadh",
"Asia/Sakhalin",
"Asia/Samarkand",
"Asia/Seoul",
"Asia/Shanghai",
"Asia/Singapore",
"Asia/Srednekolymsk",
"Asia/Taipei",
"Asia/Tashkent",
"Asia/Tbilisi",
"Asia/Tehran",
"Asia/Thimphu",
"Asia/Tokyo",
"Asia/Tomsk",
"Asia/Ulaanbaatar",
"Asia/Urumqi",
"Asia/Ust-Nera",
"Asia/Vientiane",
"Asia/Vladivostok",
"Asia/Yakutsk",
"Asia/Yangon",
"Asia/Yekaterinburg",
"Asia/Yerevan",
"Atlantic/Azores",
"Atlantic/Bermuda",
"Atlantic/Canary",
"Atlantic/Cape_Verde",
"Atlantic/Faroe",
"Atlantic/Madeira",
"Atlantic/Reykjavik",
"Atlantic/South_Georgia",
"Atlantic/St_Helena",
"Atlantic/Stanley",
"Australia/Adelaide",
"Australia/Brisbane",
"Australia/Broken_Hill",
"Australia/Darwin",
"Australia/Eucla",
"Australia/Hobart",
"Australia/Lindeman",
"Australia/Lord_Howe",
"Australia/Melbourne",
"Australia/Perth",
"Australia/Sydney",
"Etc/UTC",
"Europe/Amsterdam",
"Europe/Andorra",
"Europe/Astrakhan",
"Europe/Athens",
"Europe/Belgrade",
"Europe/Berlin",
"Europe/Bratislava",
"Europe/Brussels",
"Europe/Bucharest",
"Europe/Budapest",
"Europe/Busingen",
"Europe/Chisinau",
"Europe/Copenhagen",
"Europe/Dublin",
"Europe/Gibraltar",
"Europe/Guernsey",
"Europe/Helsinki",
"Europe/Isle_of_Man",
"Europe/Istanbul",
"Europe/Jersey",
"Europe/Kaliningrad",
"Europe/Kyiv",
"Europe/Kirov",
"Europe/Lisbon",
"Europe/Ljubljana",
"Europe/London",
"Europe/Luxembourg",
"Europe/Madrid",
"Europe/Malta",
"Europe/Mariehamn",
"Europe/Minsk",
"Europe/Monaco",
"Europe/Moscow",
"Europe/Oslo",
"Europe/Paris",
"Europe/Podgorica",
"Europe/Prague",
"Europe/Riga",
"Europe/Rome",
"Europe/Samara",
"Europe/San_Marino",
"Europe/Sarajevo",
"Europe/Saratov",
"Europe/Simferopol",
"Europe/Skopje",
"Europe/Sofia",
"Europe/Stockholm",
"Europe/Tallinn",
"Europe/Tirane",
"Europe/Ulyanovsk",
"Europe/Vaduz",
"Europe/Vatican",
"Europe/Vienna",
"Europe/Vilnius",
"Europe/Volgograd",
"Europe/Warsaw",
"Europe/Zagreb",
"Europe/Zurich",
"Indian/Antananarivo",
"Indian/Chagos",
"Indian/Christmas",
"Indian/Cocos",
"Indian/Comoro",
"Indian/Kerguelen",
"Indian/Mahe",
"Indian/Maldives",
"Indian/Mauritius",
"Indian/Mayotte",
"Indian/Reunion",
"Pacific/Apia",
"Pacific/Auckland",
"Pacific/Bougainville",
"Pacific/Chatham",
"Pacific/Chuuk",
"Pacific/Easter",
"Pacific/Efate",
"Pacific/Fakaofo",
"Pacific/Fiji",
"Pacific/Funafuti",
"Pacific/Galapagos",
"Pacific/Gambier",
"Pacific/Guadalcanal",
"Pacific/Guam",
"Pacific/Honolulu",
"Pacific/Kanton",
"Pacific/Kiritimati",
"Pacific/Kosrae",
"Pacific/Kwajalein",
"Pacific/Majuro",
"Pacific/Marquesas",
"Pacific/Midway",
"Pacific/Nauru",
"Pacific/Niue",
"Pacific/Norfolk",
"Pacific/Noumea",
"Pacific/Pago_Pago",
"Pacific/Palau",
"Pacific/Pitcairn",
"Pacific/Pohnpei",
"Pacific/Port_Moresby",
"Pacific/Rarotonga",
"Pacific/Saipan",
"Pacific/Tahiti",
"Pacific/Tarawa",
"Pacific/Tongatapu",
"Pacific/Wake",
"Pacific/Wallis",
"Etc/GMT-12",
"Etc/GMT-11",
"Etc/GMT-10",
"Etc/GMT-9",
"Etc/GMT-8",
"Etc/GMT-7",
"Etc/GMT-6",
"Etc/GMT-5",
"Etc/GMT-4",
"Etc/GMT-3",
"Etc/GMT-2",
"Etc/GMT-1",
"Etc/GMT",
"Etc/GMT+1",
"Etc/GMT+2",
"Etc/GMT+3",
"Etc/GMT+4",
"Etc/GMT+5",
"Etc/GMT+6",
"Etc/GMT+7",
"Etc/GMT+8",
"Etc/GMT+9",
"Etc/GMT+10",
"Etc/GMT+11",
"Etc/GMT+12"
]

View File

@ -0,0 +1,491 @@
import json
from abc import ABC, abstractmethod
from io import BytesIO
from pathlib import Path
from struct import unpack
from typing import List, Optional, Union
import numpy as np
from h3.api import numpy_int as h3
from timezonefinder import utils
from timezonefinder.configs import (
BINARY_DATA_ATTRIBUTES,
BINARY_FILE_ENDING,
DATA_ATTRIBUTE_NAMES,
DTYPE_FORMAT_H,
DTYPE_FORMAT_H_NUMPY,
DTYPE_FORMAT_I,
DTYPE_FORMAT_SIGNED_I_NUMPY,
HOLE_ADR2DATA,
HOLE_COORD_AMOUNT,
HOLE_DATA,
HOLE_REGISTRY,
HOLE_REGISTRY_FILE,
NR_BYTES_H,
NR_BYTES_I,
POLY_ADR2DATA,
POLY_COORD_AMOUNT,
POLY_DATA,
POLY_MAX_VALUES,
POLY_NR2ZONE_ID,
POLY_ZONE_IDS,
SHORTCUT_FILE,
SHORTCUT_H3_RES,
TIMEZONE_NAMES_FILE,
CoordLists,
CoordPairs,
)
from timezonefinder.hex_helpers import read_shortcuts_binary
from timezonefinder.utils import inside_polygon
class AbstractTimezoneFinder(ABC):
# TODO document attributes in all classes
# prevent dynamic attribute assignment (-> safe memory)
__slots__ = [
"bin_file_location",
"shortcut_mapping",
"in_memory",
"_fromfile",
"timezone_names",
POLY_ZONE_IDS,
]
binary_data_attributes: List[str] = [POLY_ZONE_IDS]
def __init__(
self,
bin_file_location: Optional[Union[str, Path]] = None,
in_memory: bool = False,
):
self.in_memory = in_memory
if self.in_memory:
self._fromfile = utils.fromfile_memory
else:
self._fromfile = np.fromfile
# open all the files in binary reading mode
# for more info on what is stored in which .bin file, please read the comments in file_converter.py
if bin_file_location is None:
bin_file_location = Path(__file__).parent
else:
bin_file_location = Path(bin_file_location)
self.bin_file_location: Path = bin_file_location
with open(self.bin_file_location / TIMEZONE_NAMES_FILE) as json_file:
self.timezone_names = json.loads(json_file.read())
path2shortcut_bin = self.bin_file_location / SHORTCUT_FILE
self.shortcut_mapping = read_shortcuts_binary(path2shortcut_bin)
for attribute_name in self.binary_data_attributes:
file_name = attribute_name + BINARY_FILE_ENDING
path2file = self.bin_file_location / file_name
if self.in_memory:
with open(path2file, mode="rb") as bin_file:
bf_in_mem = BytesIO(bin_file.read())
bf_in_mem.seek(0)
setattr(self, attribute_name, bf_in_mem)
else:
bin_file = open(path2file, mode="rb")
setattr(self, attribute_name, bin_file)
def __del__(self):
for attribute_name in self.binary_data_attributes:
getattr(self, attribute_name).close()
@property
def nr_of_zones(self):
return len(self.timezone_names)
@staticmethod
def using_numba() -> bool:
"""
:return: True if Numba is being used to JIT compile helper functions
"""
return utils.using_numba
@staticmethod
def using_clang_pip() -> bool:
"""
:return: True if the compiled C implementation of the point in polygon algorithm is being used
"""
return utils.inside_polygon == utils.pt_in_poly_clang
def zone_id_of(self, poly_id: int) -> int:
poly_zone_ids = getattr(self, POLY_ZONE_IDS)
poly_zone_ids.seek(NR_BYTES_H * poly_id)
return unpack(DTYPE_FORMAT_H, poly_zone_ids.read(NR_BYTES_H))[0]
def zone_ids_of(self, poly_ids: np.ndarray) -> np.ndarray:
poly_zone_ids = getattr(self, POLY_ZONE_IDS)
id_array = np.empty(shape=len(poly_ids), dtype=DTYPE_FORMAT_H_NUMPY)
for i, poly_id in enumerate(poly_ids):
poly_zone_ids.seek(NR_BYTES_H * poly_id)
id_array[i] = unpack(DTYPE_FORMAT_H, poly_zone_ids.read(NR_BYTES_H))[0]
return id_array
def zone_name_from_id(self, zone_id: int) -> str:
try:
return self.timezone_names[zone_id]
except IndexError:
raise ValueError("timezone could not be found. index error.")
def zone_name_from_poly_id(self, poly_id: int) -> str:
zone_id = self.zone_id_of(poly_id)
return self.zone_name_from_id(zone_id)
def get_shortcut_polys(self, *, lng: float, lat: float) -> np.ndarray:
hex_id = h3.geo_to_h3(lat, lng, SHORTCUT_H3_RES)
shortcut_poly_ids = self.shortcut_mapping[hex_id]
return shortcut_poly_ids
def most_common_zone_id(self, *, lng: float, lat: float) -> Optional[int]:
polys = self.get_shortcut_polys(lng=lng, lat=lat)
if len(polys) == 0:
return None
# Note: polygons are sorted from small to big in the shortcuts (grouped by zone)
# -> the polygons of the biggest zone come last
poly_of_biggest_zone = polys[-1]
return self.zone_id_of(poly_of_biggest_zone)
def unique_zone_id(self, *, lng: float, lat: float) -> Optional[int]:
"""
:return: the zone id at the coordinate if there is exactly one possible zone, else `None`
"""
polys = self.get_shortcut_polys(lng=lng, lat=lat)
if len(polys) == 0:
return None
if len(polys) == 1:
return self.zone_id_of(polys[0])
zones = self.zone_ids_of(polys)
zones_unique = np.unique(zones)
if len(zones_unique) == 1:
return zones_unique[0]
# more than one zone in this shortcut
return None
@abstractmethod
def timezone_at(self, *, lng: float, lat: float) -> Optional[str]:
"""looks up in which timezone the given coordinate is included in
:param lng: longitude of the point in degree (-180.0 to 180.0)
:param lat: latitude in degree (90.0 to -90.0)
:return: the timezone name of a matching polygon or None
"""
...
def timezone_at_land(self, *, lng: float, lat: float) -> Optional[str]:
"""computes in which land timezone a point is included in
Especially for large polygons it is expensive to check if a point is really included.
To speed things up there are "shortcuts" being used (stored in a binary file),
which have been precomputed and store which timezone polygons have to be checked.
:param lng: longitude of the point in degree (-180.0 to 180.0)
:param lat: latitude in degree (90.0 to -90.0)
:return: the timezone name of a matching polygon or
``None`` when an ocean timezone ("Etc/GMT+-XX") has been matched.
"""
tz_name = self.timezone_at(lng=lng, lat=lat)
if tz_name is not None and utils.is_ocean_timezone(tz_name):
return None
return tz_name
def unique_timezone_at(self, *, lng: float, lat: float) -> Optional[str]:
"""returns the name of a unique zone within the corresponding shortcut
:param lng: longitude of the point in degree (-180.0 to 180.0)
:param lat: latitude in degree (90.0 to -90.0)
:return: the timezone name of the unique zone or ``None`` if there are no or multiple zones in this shortcut
"""
lng, lat = utils.validate_coordinates(lng, lat)
unique_id = self.unique_zone_id(lng=lng, lat=lat)
if unique_id is None:
return None
return self.zone_name_from_id(unique_id)
class TimezoneFinderL(AbstractTimezoneFinder):
"""a 'light' version of the TimezoneFinder class for quickly suggesting a timezone for a point on earth
Instead of using timezone polygon data like ``TimezoneFinder``,
this class only uses a precomputed 'shortcut' to suggest a probable result:
the most common zone in a rectangle of a half degree of latitude and one degree of longitude
"""
def timezone_at(self, *, lng: float, lat: float) -> Optional[str]:
"""instantly returns the name of the most common zone within the corresponding shortcut
Note: 'most common' in this context means that the polygons with the most coordinates in sum
occurring in the corresponding shortcut belong to this zone.
:param lng: longitude of the point in degree (-180.0 to 180.0)
:param lat: latitude in degree (90.0 to -90.0)
:return: the timezone name of the most common zone or None if there are no timezone polygons in this shortcut
"""
lng, lat = utils.validate_coordinates(lng, lat)
most_common_id = self.most_common_zone_id(lng=lng, lat=lat)
if most_common_id is None:
return None
return self.zone_name_from_id(most_common_id)
class TimezoneFinder(AbstractTimezoneFinder):
"""Class for quickly finding the timezone of a point on earth offline.
Because of indexing ("shortcuts"), not all timezone polygons have to be tested during a query.
Opens the required timezone polygon data in binary files to enable fast access.
For a detailed documentation of data management please refer to the code documentation of
`file_converter.py <https://github.com/jannikmi/timezonefinder/blob/master/scripts/file_converter.py>`__
:ivar binary_data_attributes: the names of all attributes which store the opened binary data files
:param bin_file_location: path to the binary data files to use, None if native package data should be used
:param in_memory: whether to completely read and keep the binary files in memory
"""
# __slots__ declared in parents are available in child classes. However, child subclasses will get a __dict__
# and __weakref__ unless they also define __slots__ (which should only contain names of any additional slots).
__slots__ = DATA_ATTRIBUTE_NAMES
binary_data_attributes = BINARY_DATA_ATTRIBUTES
def __init__(self, bin_file_location: Optional[str] = None, in_memory: bool = False):
super().__init__(bin_file_location, in_memory)
# stores for which polygons (how many) holes exits and the id of the first of those holes
# since there are very few it is feasible to keep them in the memory
with open(self.bin_file_location / HOLE_REGISTRY_FILE) as json_file:
hole_registry_tmp = json.loads(json_file.read())
# convert the json string keys to int
hole_registry = {int(k): v for k, v in hole_registry_tmp.items()}
setattr(self, HOLE_REGISTRY, hole_registry)
@property
def nr_of_polygons(self) -> int:
poly_zone_ids = getattr(self, POLY_ZONE_IDS)
return utils.get_file_size_byte(poly_zone_ids) // NR_BYTES_H
def coords_of(self, polygon_nr: int = 0) -> np.ndarray:
poly_coord_amount = getattr(self, POLY_COORD_AMOUNT)
poly_adr2data = getattr(self, POLY_ADR2DATA)
poly_data = getattr(self, POLY_DATA)
# how many coordinates are stored in this polygon
poly_coord_amount.seek(NR_BYTES_I * polygon_nr)
nr_of_values = unpack(DTYPE_FORMAT_I, poly_coord_amount.read(NR_BYTES_I))[0]
poly_adr2data.seek(NR_BYTES_I * polygon_nr)
poly_data.seek(unpack(DTYPE_FORMAT_I, poly_adr2data.read(NR_BYTES_I))[0])
return np.stack(
(
self._fromfile(poly_data, dtype=DTYPE_FORMAT_SIGNED_I_NUMPY, count=nr_of_values),
self._fromfile(poly_data, dtype=DTYPE_FORMAT_SIGNED_I_NUMPY, count=nr_of_values),
)
)
def _holes_of_poly(self, polygon_nr: int):
hole_coord_amount = getattr(self, HOLE_COORD_AMOUNT)
hole_adr2data = getattr(self, HOLE_ADR2DATA)
hole_data = getattr(self, HOLE_DATA)
hole_registry = getattr(self, HOLE_REGISTRY)
try:
amount_of_holes, first_hole_id = hole_registry[polygon_nr]
except KeyError:
return
hole_coord_amount.seek(NR_BYTES_H * first_hole_id)
hole_adr2data.seek(NR_BYTES_I * first_hole_id)
for _ in range(amount_of_holes):
nr_of_values = unpack(DTYPE_FORMAT_H, hole_coord_amount.read(NR_BYTES_H))[0]
hole_data.seek(unpack(DTYPE_FORMAT_I, hole_adr2data.read(NR_BYTES_I))[0])
x_coords = self._fromfile(hole_data, dtype=DTYPE_FORMAT_SIGNED_I_NUMPY, count=nr_of_values)
y_coords = self._fromfile(hole_data, dtype=DTYPE_FORMAT_SIGNED_I_NUMPY, count=nr_of_values)
yield np.array(
[
x_coords,
y_coords,
]
)
def get_polygon(self, polygon_nr: int, coords_as_pairs: bool = False) -> List[Union[CoordPairs, CoordLists]]:
list_of_converted_polygons = []
if coords_as_pairs:
conversion_method = utils.convert2coord_pairs
else:
conversion_method = utils.convert2coords
list_of_converted_polygons.append(conversion_method(self.coords_of(polygon_nr=polygon_nr)))
for hole in self._holes_of_poly(polygon_nr):
list_of_converted_polygons.append(conversion_method(hole))
return list_of_converted_polygons
def get_geometry(
self,
tz_name: Optional[str] = "",
tz_id: Optional[int] = 0,
use_id: bool = False,
coords_as_pairs: bool = False,
):
"""retrieves the geometry of a timezone polygon
:param tz_name: one of the names in ``timezone_names.json`` or ``self.timezone_names``
:param tz_id: the id of the timezone (=index in ``self.timezone_names``)
:param use_id: if ``True`` uses ``tz_id`` instead of ``tz_name``
:param coords_as_pairs: determines the structure of the polygon representation
:return: a data structure representing the multipolygon of this timezone
output format: ``[ [polygon1, hole1, hole2...], [polygon2, ...], ...]``
and each polygon and hole is itself formatted like: ``([longitudes], [latitudes])``
or ``[(lng1,lat1), (lng2,lat2),...]`` if ``coords_as_pairs=True``.
"""
if use_id:
if not isinstance(tz_id, int):
raise TypeError("the zone id must be given as int.")
if tz_id < 0 or tz_id >= self.nr_of_zones:
raise ValueError(f"the given zone id {tz_id} is invalid (value range: 0 - {self.nr_of_zones - 1}.")
else:
try:
tz_id = self.timezone_names.index(tz_name)
except ValueError:
raise ValueError("The timezone '", tz_name, "' does not exist.")
if tz_id is None:
raise ValueError("no timezone id given.")
poly_id2zone_id = getattr(self, POLY_NR2ZONE_ID)
poly_id2zone_id.seek(NR_BYTES_H * tz_id)
# read poly_id of the first polygon of that zone
this_zone_poly_id = unpack(DTYPE_FORMAT_H, poly_id2zone_id.read(NR_BYTES_H))[0]
# read poly_id of the first polygon of the consequent zone
# (also exists for the last zone, cf. file_converter.py)
next_zone_poly_id = unpack(DTYPE_FORMAT_H, poly_id2zone_id.read(NR_BYTES_H))[0]
# read and return all polygons from this zone:
return [self.get_polygon(poly_id, coords_as_pairs) for poly_id in range(this_zone_poly_id, next_zone_poly_id)]
def outside_the_boundaries_of(self, poly_id: int, x: int, y: int) -> bool:
# get the boundaries of the polygon = (lng_max, lng_min, lat_max, lat_min) converted to int32
poly_max_values = getattr(self, POLY_MAX_VALUES)
poly_max_values.seek(4 * NR_BYTES_I * poly_id)
xmax, xmin, ymax, ymin = self._fromfile(
poly_max_values,
dtype=DTYPE_FORMAT_SIGNED_I_NUMPY,
count=4,
)
return x > xmax or x < xmin or y > ymax or y < ymin
def inside_of_polygon(self, poly_id: int, x: int, y: int) -> bool:
# only read polygon (hole) data on demand
# only run the expensive algorithm if the point is withing the boundaries
if self.outside_the_boundaries_of(poly_id, x, y):
return False
if not inside_polygon(x, y, self.coords_of(polygon_nr=poly_id)):
return False
# when the point is within a hole of the polygon, this timezone must not be returned
if any(iter(inside_polygon(x, y, hole) for hole in self._holes_of_poly(poly_id))):
return False
# the query point is included in this polygon, but not any hole
return True
def timezone_at(self, *, lng: float, lat: float) -> Optional[str]:
"""computes in which ocean OR land timezone a point is included in
Especially for large polygons it is expensive to check if a point is really included.
In case there is only one possible zone (left), this zone will instantly be returned without actually checking
if the query point is included in this polygon.
To speed things up there are "shortcuts" being used
which have been precomputed and store which timezone polygons have to be checked.
.. note:: Since ocean timezones span the whole globe, some timezone will always be matched!
`None` can only be returned when you have compiled timezone data without such "full coverage".
:param lng: longitude of the point in degree (-180.0 to 180.0)
:param lat: latitude in degree (90.0 to -90.0)
:return: the timezone name of the matched timezone polygon. possibly "Etc/GMT+-XX" in case of an ocean timezone.
"""
lng, lat = utils.validate_coordinates(lng, lat)
possible_polygons = self.get_shortcut_polys(lng=lng, lat=lat)
nr_possible_polygons = len(possible_polygons)
if nr_possible_polygons == 0:
# Note: hypothetical case, with ocean data every shortcut maps to at least one polygon
return None
if nr_possible_polygons == 1:
# there is only one polygon in that area. return its timezone name without further checks
polygon_id = possible_polygons[0]
return self.zone_name_from_poly_id(polygon_id)
# create a list of all the timezone ids of all possible polygons
zone_ids = self.zone_ids_of(possible_polygons)
last_zone_change_idx = utils.get_last_change_idx(zone_ids)
if last_zone_change_idx == 0:
return self.zone_name_from_id(zone_ids[0])
# ATTENTION: the polygons are stored converted to 32-bit ints,
# convert the query coordinates in the same fashion in order to make the data formats match
# x = longitude y = latitude both converted to 8byte int
x = utils.coord2int(lng)
y = utils.coord2int(lat)
# check until the point is included in one of the possible polygons
for i, poly_id in enumerate(possible_polygons):
if i >= last_zone_change_idx:
break
if self.inside_of_polygon(poly_id, x, y):
zone_id = zone_ids[i]
return self.zone_name_from_id(zone_id)
# since it is the last possible option,
# the polygons of the last possible zone don't actually have to be checked
zone_id = zone_ids[-1]
return self.zone_name_from_id(zone_id)
def certain_timezone_at(self, *, lng: float, lat: float) -> Optional[str]:
"""checks in which timezone polygon the point is certainly included in
.. note:: this is only meaningful when you have compiled your own timezone data
where there are areas without timezone polygon coverage.
Otherwise some timezone will always be matched and the functionality is equal to using `.timezone_at()`
-> useless to actually test all polygons.
.. note:: using this function is less performant than `.timezone_at()`
:param lng: longitude of the point in degree
:param lat: latitude in degree
:return: the timezone name of the polygon the point is included in or `None`
"""
lng, lat = utils.validate_coordinates(lng, lat)
possible_polygons = self.get_shortcut_polys(lng=lng, lat=lat)
nr_possible_polygons = len(possible_polygons)
if nr_possible_polygons == 0:
# Note: hypothetical case, with ocean data every shortcut maps to at least one polygon
return None
# ATTENTION: the polygons are stored converted to 32-bit ints,
# convert the query coordinates in the same fashion in order to make the data formats match
# x = longitude y = latitude both converted to 8byte int
x = utils.coord2int(lng)
y = utils.coord2int(lat)
# check if the query point is found to be truly included in one of the possible polygons
for poly_id in possible_polygons:
if self.inside_of_polygon(poly_id, x, y):
zone_id = self.zone_id_of(poly_id)
return self.zone_name_from_id(zone_id)
# none of the polygon candidates truly matched
return None

View File

@ -0,0 +1,280 @@
""" utility functions
JIT compiled for efficiency in case `numba` is installed
Pending:
Numba Ahead-Of-Time Compilation:
cc = CC('precompiled_helpers', )
# Uncomment the following line to print out the compilation steps
cc.verbose = True
if __name__ == "__main__":
cc.compile()
"""
import io
import re
from typing import Callable, Tuple
import cffi
import numpy as np
from numpy import int64
from timezonefinder.configs import (
COORD2INT_FACTOR,
INT2COORD_FACTOR,
OCEAN_TIMEZONE_PREFIX,
CoordLists,
CoordPairs,
IntLists,
)
try:
# Note: IDE might complain as this import comes from a cffi C extension
from timezonefinder import inside_polygon_ext # type: ignore
clang_extension_loaded = True
ffi = cffi.FFI()
except ImportError:
clang_extension_loaded = False
inside_polygon_ext = None
ffi = None
try:
from numba import b1, f8, i2, i4, njit, u2
using_numba = True
except ImportError:
using_numba = False
# replace Numba functionality with "transparent" implementations
from timezonefinder._numba_replacements import b1, f8, i2, i4, njit, u2
# @cc.export('inside_polygon', 'b1(i4, i4, i4[:, :])')
@njit(b1(i4, i4, i4[:, :]), cache=True)
def pt_in_poly_python(x: int, y: int, coords: np.ndarray) -> bool:
"""
Implementing the ray casting point in polygon test algorithm
cf. https://en.wikipedia.org/wiki/Point_in_polygon#Ray_casting_algorithm
:param x:
:param y:
:param coords: a polygon represented by a list containing two lists (x and y coordinates):
[ [x1,x2,x3...], [y1,y2,y3...]]
those lists are actually numpy arrays which are being read directly from a binary file
:return: true if the point (x,y) lies within the polygon
Some overflow considerations for the critical part of comparing the line segment slopes:
(y2 - y) * (x2 - x1) <= delta_y_max * delta_x_max
(y2 - y1) * (x2 - x) <= delta_y_max * delta_x_max
delta_y_max * delta_x_max = 180 * 360 < 65 x10^3
Instead of calculating with float I decided using just ints (by multiplying with 10^7). That gives us:
delta_y_max * delta_x_max = 180x10^7 * 360x10^7
delta_y_max * delta_x_max <= 65x10^17
So these numbers need up to log_2(65 x10^17) ~ 63 bits to be represented! Even though values this big should never
occur in practice (timezone polygons do not span the whole lng lat coordinate space),
32bit accuracy hence is not safe to use here!
pure Python automatically uses the appropriate int data type preventing overflow
(cf. https://www.python.org/dev/peps/pep-0237/),
but here the data types are numpy internal static data types. The data is stored as int32
-> use int64 when comparing slopes!
slower naive implementation:
j = nr_coords - 1
for i in range(nr_coords):
if ((y_coords[i] > y) != (y_coords[j] > y)) and (
x
< (int64(x_coords[j]) - int64(x_coords[i]))
* (int64(y) - int64(y_coords[i]))
/ (int64(y_coords[j]) - int64(y_coords[i]))
+ int64(x_coords[i])
):
inside = not inside
j = i
i += 1
"""
x_coords = coords[0]
y_coords = coords[1]
nr_coords = len(x_coords)
inside = False
# the edge from the last to the first point is checked first
y1 = y_coords[-1]
y_gt_y1 = y > y1
for i in range(nr_coords):
y2 = y_coords[i]
y_gt_y2 = y > y2
if y_gt_y1 ^ y_gt_y2: # XOR
# [p1-p2] crosses horizontal line in p
x1 = x_coords[i - 1]
x2 = x_coords[i]
# only count crossings "right" of the point ( >= x)
x_le_x1 = x <= x1
x_le_x2 = x <= x2
if x_le_x1 or x_le_x2:
if x_le_x1 and x_le_x2:
# p1 and p2 are both to the right -> valid crossing
inside = not inside
else:
# compare the slope of the line [p1-p2] and [p-p2]
# depending on the position of p2 this determines whether
# the polygon edge is right or left of the point
# to avoid expensive division the divisors (of the slope dy/dx) are brought to the other side
# ( dy/dx > a == dy > a * dx )
# only one of the points is to the right
# NOTE: int64 precision required to prevent overflow
y_64 = int64(y)
y1_64 = int64(y1)
y2_64 = int64(y2)
x_64 = int64(x)
x1_64 = int64(x1)
x2_64 = int64(x2)
slope1 = (y2_64 - y_64) * (x2_64 - x1_64)
slope2 = (y2_64 - y1_64) * (x2_64 - x_64)
# NOTE: accept slope equality to also detect if p lies directly on an edge
if y_gt_y1:
if slope1 <= slope2:
inside = not inside
elif slope1 >= slope2: # NOT y_gt_y1
inside = not inside
# next point
y1 = y2
y_gt_y1 = y_gt_y2
return inside
def pt_in_poly_clang(x: int, y: int, coords: np.ndarray) -> bool:
"""wrapper of the point in polygon test algorithm C extension
ATTENTION: the input numpy arrays must have a C_CONTIGUOUS memory layout
https://numpy.org/doc/stable/reference/generated/numpy.ascontiguousarray.html?highlight=ascontiguousarray#numpy.ascontiguousarray
"""
x_coords = coords[0]
y_coords = coords[1]
nr_coords = len(x_coords)
y_coords = np.ascontiguousarray(y_coords)
x_coords = np.ascontiguousarray(x_coords)
x_coords_ffi = ffi.from_buffer("int []", x_coords)
y_coords_ffi = ffi.from_buffer("int []", y_coords)
contained = inside_polygon_ext.lib.inside_polygon_int(x, y, nr_coords, x_coords_ffi, y_coords_ffi)
return contained
inside_polygon: Callable[[int, int, np.ndarray], bool]
# at import time fix which "point-in-polygon" implementation will be used
if clang_extension_loaded and not using_numba:
# use the C implementation if Numba is not present
inside_polygon = pt_in_poly_clang
else:
# use the (JIT compiled) python function if Numba is present or the C extension cannot be loaded
inside_polygon = pt_in_poly_python
@njit(i2(u2[:]), cache=True)
def get_last_change_idx(lst: np.ndarray) -> int:
"""
:param lst: list of entries
:return: returns the index to the element for which all following elements are equal
"""
nr_entries = lst.shape[0]
if nr_entries <= 1:
return 0
# at least 2 elements
last_elem = lst[-1]
for ptr in range(2, nr_entries + 1):
# Note: from the back
element = lst[-ptr]
if element != last_elem:
# return the last pointer value
# Attention: convert into positive "absolute" index first
return nr_entries - ptr + 1
# Note: all entries are the same -> ptr will be 0
return 0
# @cc.export('int2coord', f8(i4))
@njit(f8(i4), cache=True)
def int2coord(i4: int) -> float:
return float(i4 * INT2COORD_FACTOR)
# @cc.export('coord2int', i4(f8))
@njit(i4(f8), cache=True)
def coord2int(double: float) -> int:
return int(double * COORD2INT_FACTOR)
@njit(cache=True)
def convert2coords(polygon_data: np.ndarray) -> CoordLists:
# return a tuple of coordinate lists
return [
[int2coord(x) for x in polygon_data[0]],
[int2coord(y) for y in polygon_data[1]],
]
@njit(cache=True)
def convert2coord_pairs(polygon_data: np.ndarray) -> CoordPairs:
# return a list of coordinate tuples (x,y)
x_coords = polygon_data[0]
y_coords = polygon_data[1]
nr_coords = len(x_coords)
coodinate_list = [(int2coord(x_coords[i]), int2coord(y_coords[i])) for i in range(nr_coords)]
return coodinate_list
@njit(cache=True)
def convert2ints(polygon_data: np.ndarray) -> IntLists:
# return a tuple of coordinate lists
return [
[coord2int(x) for x in polygon_data[0]],
[coord2int(y) for y in polygon_data[1]],
]
@njit(cache=True)
def any_pt_in_poly(coords1: np.ndarray, coords2: np.ndarray) -> bool:
# pt = points[:, i]
for pt in coords1.T:
if pt_in_poly_python(pt[0], pt[1], coords2):
return True
return False
@njit(cache=True)
def fully_contained_in_hole(poly: np.ndarray, hole: np.ndarray) -> bool:
for pt in poly.T:
if not pt_in_poly_python(pt[0], pt[1], hole):
return False
return True
def validate_coordinates(lng: float, lat: float) -> Tuple[float, float]:
if not -180.0 <= lng <= 180.0:
raise ValueError(f"The given longitude {lng} is out of bounds")
if not -90.0 <= lat <= 90.0:
raise ValueError(f"The given latitude {lat} is out of bounds")
return float(lng), float(lat)
def get_file_size_byte(file) -> int:
file.seek(0, io.SEEK_END)
return file.tell()
def fromfile_memory(file, dtype: str, count: int, **kwargs):
res = np.frombuffer(file.getbuffer(), offset=file.tell(), dtype=dtype, count=count, **kwargs)
file.seek(np.dtype(dtype).itemsize * count, io.SEEK_CUR)
return res
def is_ocean_timezone(timezone_name: str) -> bool:
if re.match(OCEAN_TIMEZONE_PREFIX, timezone_name) is None:
return False
return True