Merge pull request #2595 from dalf/update-wikidata-units

[mod] update wikidata_units.json and fetch_wikidata_units.py
This commit is contained in:
Alexandre Flament 2021-02-23 17:22:37 +01:00 committed by GitHub
commit 5f4a085fc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 170 additions and 63 deletions

View File

@ -1,7 +1,67 @@
{
"Q199": "1",
"Q100036106": "int nmi",
"Q100149279": "°We",
"Q100995": "lb",
"Q101194838": "GHz/V",
"Q101463141": "ym²",
"Q101463237": "zm²",
"Q101463321": "am²",
"Q101463409": "fm²",
"Q101463496": "pm²",
"Q101463679": "hm²",
"Q101464050": "Mm²",
"Q101464215": "Gm²",
"Q101464369": "Tm²",
"Q101464499": "Pm²",
"Q101464624": "Em²",
"Q101464753": "Zm²",
"Q101464875": "Ym²",
"Q101515060": "g/J",
"Q101875087": "cd/cm²",
"Q101877596": "g/ml",
"Q101879174": "dm/s",
"Q102068844": "cm⁻³",
"Q102129339": "min⁻¹",
"Q102129428": "h⁻¹",
"Q102129592": "d⁻¹",
"Q102130673": "ym/s",
"Q102130674": "zm/s",
"Q102130677": "am/s",
"Q102130679": "fm/s",
"Q102130681": "pm/s",
"Q102130684": "nm/s",
"Q102130686": "μm/s",
"Q102130688": "mm/s",
"Q102130690": "dam/s",
"Q102130692": "hm/s",
"Q102130694": "Mm/s",
"Q102130696": "Gm/s",
"Q102130698": "Tm/s",
"Q102130700": "Pm/s",
"Q102130702": "Em/s",
"Q102130704": "Zm/s",
"Q102130706": "Ym/s",
"Q102130743": "ym/s²",
"Q102130745": "zm/s²",
"Q102130747": "am/s²",
"Q102130748": "fm/s²",
"Q102130751": "pm/s²",
"Q102130753": "nm/s²",
"Q102130755": "μm/s²",
"Q102130756": "mm/s²",
"Q102130758": "dm/s²",
"Q102130759": "dam/s²",
"Q102130761": "hm/s²",
"Q102130762": "km/s²",
"Q102130765": "Mm/s²",
"Q102130767": "Gm/s²",
"Q102130769": "Tm/s²",
"Q102130771": "Pm/s²",
"Q102130773": "Em/s²",
"Q102130775": "Zm/s²",
"Q102130777": "Ym/s²",
"Q102178883": "dm³/h",
"Q1022113": "cm³",
"Q102573": "Bq",
"Q103246": "Sv",
@ -9,12 +69,16 @@
"Q10380431": "TJ",
"Q1040401": "das",
"Q1040427": "hs",
"Q1042866": "Zibit",
"Q104117265": "Bi",
"Q1042866": "Zib",
"Q104907398": "μN m",
"Q104907399": "mN m",
"Q1050958": "inHg",
"Q1051665": "m/s²",
"Q1052397": "rad",
"Q1054140": "Mm",
"Q10543042": "Ym",
"Q105519288": "B SPL",
"Q1057069": "hg",
"Q1063756": "rad/s",
"Q1063786": "in²",
@ -22,33 +86,34 @@
"Q1066138": "Ps",
"Q1067722": "Fg",
"Q1069725": "p.",
"Q1072404": "K",
"Q1084321": "Tb/s",
"Q1086691": "fg",
"Q1091257": "tex",
"Q1092296": "a",
"Q1104069": "CAD$",
"Q1104069": "$",
"Q11061003": "μm²",
"Q11061005": "nm²",
"Q1131660": "st",
"Q1137675": "cr",
"Q1140444": "Zbit",
"Q1140577": "Ybit",
"Q1152074": "Pbit",
"Q1152323": "Tbit",
"Q1140444": "Zb",
"Q1140577": "Yb",
"Q1152074": "Pb",
"Q1152323": "Tb",
"Q1165799": "mil",
"Q11776930": "Mg",
"Q11830636": "psf",
"Q11929860": "kpc",
"Q1194225": "lbf",
"Q1194580": "Mibit",
"Q1195111": "Ebit",
"Q1194580": "Mib",
"Q1195111": "Eb",
"Q1196837": "ω_P",
"Q1197459": "Ms",
"Q11982285": "Em³",
"Q11982288": "Zm³",
"Q11982289": "Tm³",
"Q12011178": "Zs",
"Q1204894": "Gibit",
"Q1204894": "Gib",
"Q12257695": "Eb/s",
"Q12257696": "EB/s",
"Q12261466": "kB/s",
@ -59,7 +124,7 @@
"Q12269308": "Zb/s",
"Q12269309": "ZB/s",
"Q1247300": "cm H₂O",
"Q12714022": "sh cwt",
"Q12714022": "cwt",
"Q12789864": "GeV",
"Q12874593": "W h",
"Q128822": "kn",
@ -71,9 +136,9 @@
"Q1323615": "oz t",
"Q132643": "kr",
"Q13400897": "g",
"Q13479685": "mm wg",
"Q1351253": "Eibit",
"Q1351334": "Pibit",
"Q13479685": "mm H2O",
"Q1351253": "Eib",
"Q1351334": "Pib",
"Q13542672": "Ry",
"Q13548586": "THz",
"Q13582667": "kgf/cm²",
@ -88,13 +153,15 @@
"Q14158377": "A_P",
"Q14623803": "MDa",
"Q14623804": "kDa",
"Q1472674": "Sv",
"Q1472674": "S",
"Q14754979": "Zg",
"Q14786969": "MJ",
"Q14850704": "℧",
"Q14913554": "Ys",
"Q14914907": "th",
"Q14916719": "Gpc",
"Q14923662": "Pm³",
"Q1501273": "HU",
"Q1511773": "LSd",
"Q15120301": "l atm",
"Q1542309": "xu",
@ -110,7 +177,7 @@
"Q163354": "H",
"Q1640501": "hyl",
"Q1645498": "μg",
"Q16859309": "lb·ft",
"Q16859309": "lb ft",
"Q169893": "S",
"Q170804": "Wb",
"Q17093295": "m/h",
@ -140,7 +207,7 @@
"Q182429": "m/s",
"Q1826195": "dl",
"Q18413919": "cm/s",
"Q184172": "FF",
"Q184172": "F",
"Q185078": "a",
"Q185153": "erg",
"Q185648": "Torr",
@ -171,11 +238,10 @@
"Q2029519": "hl",
"Q203567": "₦",
"Q2042279": "m H₂O",
"Q204737": "៛",
"Q2051195": "GWh",
"Q2055118": "ppb",
"Q2064166": "fc",
"Q206600": "ރ",
"Q206600": "MRF",
"Q20706220": "cmm",
"Q20706221": "dmm",
"Q2080811": "vol%",
@ -196,9 +262,11 @@
"Q21075844": "ml/l",
"Q21077820": "mg/m³",
"Q21091747": "mg/kg",
"Q211256": "mph",
"Q211256": "mi/h",
"Q21154419": "PD",
"Q211580": "BTU (th)",
"Q212120": "A h",
"Q213005": "G$",
"Q2140397": "in³",
"Q214377": "ell",
"Q2143992": "kHz",
@ -211,7 +279,7 @@
"Q215571": "N m",
"Q21604951": "g/m³",
"Q2165290": "yd³",
"Q216880": "kp",
"Q216880": "kgf",
"Q217208": "a",
"Q2175964": "dm³",
"Q218593": "in",
@ -229,11 +297,14 @@
"Q229354": "Ci",
"Q232291": "mi²",
"Q2332346": "ml",
"Q235729": "y (365 days)",
"Q23808021": "oz (ap.)",
"Q23823681": "TW",
"Q23925410": "gal (UK)",
"Q23925413": "gal (US)",
"Q23931040": "dam²",
"Q23931103": "nmi²",
"Q240468": "syr£",
"Q2414435": "$b.",
"Q242988": "Lib$",
"Q2438073": "ag",
@ -252,7 +323,7 @@
"Q25511288": "mb",
"Q2553708": "MV",
"Q2554092": "kV",
"Q259502": "AU$",
"Q259502": "A$",
"Q260126": "rem",
"Q2612219": "Pg",
"Q261247": "ct",
@ -306,8 +377,11 @@
"Q30001831": "aV",
"Q30001832": "aW",
"Q30001833": "aWb",
"Q3013059": "kyr",
"Q3194304": "kbit",
"Q3013059": "ka",
"Q304479": "tr",
"Q305896": "DPI",
"Q31889818": "ppq",
"Q3194304": "kb",
"Q3207456": "mW",
"Q321017": "R",
"Q3221356": "ym",
@ -330,10 +404,10 @@
"Q3312063": "fL",
"Q3320608": "kW",
"Q3331719": "dm²",
"Q3332689": "ToR",
"Q3332814": "Mbit",
"Q3332689": "RT",
"Q3332814": "Mb",
"Q3396758": "daa",
"Q3414243": "rps",
"Q3414243": "qps",
"Q3421309": "R_J",
"Q3495543": "mbar",
"Q355198": "px",
@ -343,11 +417,11 @@
"Q376660": "nat",
"Q37732658": "°R",
"Q3773454": "Mpc",
"Q3815076": "Kibit",
"Q3815076": "Kib",
"Q3833309": "£",
"Q3858002": "mA h",
"Q3867152": "ft/s²",
"Q389062": "Tibit",
"Q389062": "Tib",
"Q3902688": "pl",
"Q3902709": "ps",
"Q39360235": "US lea",
@ -359,7 +433,7 @@
"Q39462789": "µin²",
"Q39467934": "kgf/m²",
"Q39469927": "N/m²",
"Q39617688": "cwt long",
"Q39617688": "cwt",
"Q39617818": "t lb",
"Q39628023": "y",
"Q39699418": "cm/s²",
@ -367,14 +441,14 @@
"Q39709980": "bd",
"Q39710113": "bhp EDR",
"Q3972226": "kL",
"Q4041686": "iwg",
"Q4041686": "in H20",
"Q4068266": "Ʒ",
"Q4176683": "aC",
"Q420266": "oz. fl.",
"Q420266": "fl oz",
"Q42319606": "people/m²",
"Q4243638": "km³",
"Q4456994": "mF",
"Q469356": "tn. sh.",
"Q469356": "T",
"Q476572": "Ha",
"Q482798": "yd",
"Q483261": "Da",
@ -390,15 +464,18 @@
"Q514845": "pz",
"Q5195628": "hm³",
"Q5198770": "dam³",
"Q524410": "byr",
"Q524410": "Ga",
"Q5299480": "DPCm",
"Q53393488": "PHz",
"Q53393490": "EHz",
"Q53393494": "ZHz",
"Q53393498": "YHz",
"Q53393659": "ML",
"Q53393664": "GL",
"Q53393669": "El",
"Q53393674": "ZL",
"Q53393678": "YL",
"Q53393768": "zl",
"Q53393771": "yL",
"Q53393868": "GJ",
"Q53393886": "PJ",
@ -492,7 +569,7 @@
"Q54083813": "Zkat",
"Q5409016": "MVA",
"Q5465723": "ft-pdl",
"Q549389": "bit/s",
"Q549389": "b/s",
"Q550341": "V A",
"Q552299": "ch",
"Q55442349": "U/L",
@ -523,6 +600,8 @@
"Q6170164": "yg",
"Q6171168": "zg",
"Q61756607": "yd",
"Q61771602": "ft",
"Q61771670": "in",
"Q61793198": "rd",
"Q61794766": "ch (US survey)",
"Q61994988": "Wth",
@ -534,13 +613,12 @@
"Q6414556": "kip",
"Q648908": "bya",
"Q64996135": "gal (US)/min",
"Q65028392": "mm/yr",
"Q65028392": "mm/a",
"Q651336": "M_J",
"Q6517513": "dag",
"Q667419": "UK t",
"Q681996": "M⊕",
"Q685662": "p_P",
"Q6859652": "mm Hg",
"Q686163": "$",
"Q68725821": "°Rø",
"Q68726230": "°De",
@ -582,20 +660,23 @@
"Q70444514": "Ymol",
"Q70444609": "Pmol",
"Q712226": "km²",
"Q717310": "Mg",
"Q72081071": "MeV",
"Q723733": "ms",
"Q730251": "ft·lbf",
"Q732707": "MHz",
"Q73408": "K",
"Q7350781": "Mb/s",
"Q7398951": "PPI",
"Q743895": "bpm",
"Q748716": "ft/s",
"Q750178": "‱",
"Q752079": "RT",
"Q752197": "kJ/mol",
"Q7672057": "TU",
"Q777017": "dBm",
"Q78754556": "rot",
"Q78756901": "rev",
"Q78756901": "r",
"Q78757683": "windings",
"Q79726": "kB",
"Q79735": "MB",
@ -637,14 +718,16 @@
"Q848856": "dam",
"Q851872": "o",
"Q854546": "Gm",
"Q855161": "Yibit",
"Q855161": "Yib",
"Q856240": "ft³/min",
"Q857027": "ft²",
"Q85854198": "MN",
"Q864818": "abA",
"Q87262709": "kΩ",
"Q87416053": "MΩ",
"Q88296091": "tsp",
"Q89473028": "bu (UK)",
"Q89662131": "pt (UK)",
"Q901492": "ph",
"Q9026416": "MWth",
"Q9048643": "nl",
"Q905912": "L",
@ -653,7 +736,9 @@
"Q911730": "nx",
"Q914151": "P_P",
"Q915169": "F_P",
"Q93318": "nmi",
"Q93318": "M",
"Q93678895": "gill (US)",
"Q93679498": "gill (UK)",
"Q940052": "q",
"Q94076025": "dalm",
"Q94076717": "dakat",
@ -664,6 +749,7 @@
"Q94415255": "GC",
"Q94415438": "Yrad",
"Q94415526": "YC",
"Q94415561": "krad",
"Q94415782": "Mrad",
"Q94416260": "GN",
"Q94416535": "cN",
@ -943,6 +1029,7 @@
"Q96106385": "h°C",
"Q96106393": "M°C",
"Q96236286": "G°C",
"Q96312779": "μas",
"Q97059641": "p°C",
"Q97059652": "T°C",
"Q97143826": "P°C",
@ -953,9 +1040,21 @@
"Q97143843": "z°C",
"Q97143849": "Y°C",
"Q97143851": "a°C",
"Q98492214": "den",
"Q98538634": "eV/m²",
"Q98635536": "eV/m",
"Q98642859": "eV m²/kg",
"Q98793302": "qt (UK)",
"Q98793408": "liq qt (US)",
"Q98793687": "dry qt (US)",
"Q99476928": "gf",
"Q99487704": "ppt",
"Q99490009": "BTU (IT)",
"Q99490479": "BTU (39 °F)",
"Q99490986": "BTU (59 °F)",
"Q99491193": "BTU (60 °F)",
"Q99491447": "BTU (mean)",
"Q99492167": "m Hg",
"Q11229": "%",
"Q11570": "kg",
"Q11573": "m",
@ -965,8 +1064,7 @@
"Q12129": "pc",
"Q12438": "N",
"Q16068": "DM",
"Q1811": "ua",
"Q20764": "Myr",
"Q20764": "Ma",
"Q2101": "e",
"Q25235": "h",
"Q25236": "W",
@ -979,25 +1077,25 @@
"Q25517": "m³",
"Q33680": "rad",
"Q35852": "ha",
"Q36384": "equiv",
"Q36384": "Eq",
"Q3710": "ft",
"Q39274": "Sv",
"Q39369": "Hz",
"Q41509": "mol",
"Q41803": "g",
"Q42289": "°F",
"Q4406": "TV$",
"Q4406": "$T",
"Q44395": "Pa",
"Q4587": "Le",
"Q4588": "WS$",
"Q4592": "F$",
"Q4596": "Rs",
"Q4597": "$",
"Q47083": "Ω",
"Q48013": "oz",
"Q4917": "US$",
"Q50094": "Np",
"Q50098": "B",
"Q531": "ly",
"Q531": "l.y.",
"Q5329": "dB",
"Q573": "d",
"Q577": "a",

View File

@ -12,31 +12,40 @@ from searx import searx_dir
from searx.engines.wikidata import send_wikidata_query
# the response contains duplicate ?item with the different ?symbol
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
# even if a ?item has different ?symbol of the same rank.
# A deterministic result
# see:
# * https://www.wikidata.org/wiki/Help:Ranking
# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
# * https://w.wiki/32BT
# see the result for https://www.wikidata.org/wiki/Q11582
# there are multiple symbols the same rank
SARQL_REQUEST = """
SELECT DISTINCT ?item ?symbol ?P2370 ?P2370Unit ?P2442 ?P2442Unit
SELECT DISTINCT ?item ?symbol
WHERE
{
?item wdt:P31/wdt:P279 wd:Q47574.
?item wdt:P5061 ?symbol.
FILTER(LANG(?symbol) = "en").
?item wdt:P31/wdt:P279 wd:Q47574 .
?item p:P5061 ?symbolP .
?symbolP ps:P5061 ?symbol ;
wikibase:rank ?rank .
FILTER(LANG(?symbol) = "en").
}
ORDER BY ?item
ORDER BY ?item DESC(?rank) ?symbol
"""
def get_data():
def get_key(unit):
return unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
def get_value(unit):
return unit['symbol']['value']
result = send_wikidata_query(SARQL_REQUEST)
if result is not None:
# sort the unit by entity name
# so different fetchs keep the file unchanged.
list(result['results']['bindings']).sort(key=get_key)
return collections.OrderedDict([(get_key(unit), get_value(unit)) for unit in result['results']['bindings']])
results = collections.OrderedDict()
response = send_wikidata_query(SARQL_REQUEST)
for unit in response['results']['bindings']:
name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
unit = unit['symbol']['value']
if name not in results:
# ignore duplicate: always use the first one
results[name] = unit
return results
def get_wikidata_units_filename():