jehanne/sys/src/cmd/ndb/idn.c

278 lines
5.3 KiB
C

/*
* This file is part of the UCB release of Plan 9. It is subject to the license
* terms in the LICENSE file found in the top-level directory of this
* distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
* part of the UCB release of Plan 9, including this file, may be copied,
* modified, propagated, or distributed except according to the terms contained
* in the LICENSE file.
*/
/* Portions of this file are Copyright (C) 2015-2018 Giacomo Tesio <giacomo@tesio.it>
* See /doc/license/gpl-2.0.txt for details about the licensing.
*/
/* Portions of this file are Copyright (C) 9front's team.
* See /doc/license/9front-mit for details about the licensing.
* See http://git.9front.org/plan9front/plan9front/HEAD/info.html for a list of authors.
*/
#include <u.h>
#include <lib9.h>
#include <ip.h>
#include "dns.h"
enum {
base = 36,
tmin = 1,
tmax = 26,
skew = 38,
damp = 700,
initial_bias = 72,
initial_n = 0x80,
};
static uint maxint = ~0;
static uint
decode_digit(uint cp)
{
if((cp - '0') < 10)
return cp - ('0' - 26);
if((cp - 'A') < 26)
return cp - 'A';
if((cp - 'a') < 26)
return cp - 'a';
return base;
}
static char
encode_digit(uint d, int flag)
{
if(d < 26)
return d + (flag ? 'A' : 'a');
return d + ('0' - 26);
}
static uint
adapt(uint delta, uint numpoints, int firsttime)
{
uint k;
delta = firsttime ? delta / damp : delta >> 1;
delta += delta / numpoints;
for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base)
delta /= base - tmin;
return k + (base - tmin + 1) * delta / (delta + skew);
}
static int
punyencode(uint input_length, Rune input[], uint max_out, char output[])
{
uint n, delta, h, b, out, bias, j, m, q, k, t;
n = initial_n;
delta = out = 0;
bias = initial_bias;
for (j = 0; j < input_length; ++j) {
if ((uint)input[j] < 0x80) {
if (max_out - out < 2)
return -1;
output[out++] = input[j];
}
}
h = b = out;
if (b > 0)
output[out++] = '-';
while (h < input_length) {
for (m = maxint, j = 0; j < input_length; ++j) {
if (input[j] >= n && input[j] < m)
m = input[j];
}
if (m - n > (maxint - delta) / (h + 1))
return -1;
delta += (m - n) * (h + 1);
n = m;
for (j = 0; j < input_length; ++j) {
if (input[j] < n) {
if (++delta == 0)
return -1;
}
if (input[j] == n) {
for (q = delta, k = base;; k += base) {
if (out >= max_out)
return -1;
if (k <= bias)
t = tmin;
else if (k >= bias + tmax)
t = tmax;
else
t = k - bias;
if (q < t)
break;
output[out++] = encode_digit(t + (q - t) % (base - t), 0);
q = (q - t) / (base - t);
}
output[out++] = encode_digit(q, isupperrune(input[j]));
bias = adapt(delta, h + 1, h == b);
delta = 0;
++h;
}
}
++delta, ++n;
}
return (int)out;
}
static int
punydecode(uint input_length, char input[], uint max_out, Rune output[])
{
uint n, out, i, bias, b, j, in, oldi, w, k, digit, t;
n = initial_n;
out = i = 0;
bias = initial_bias;
for (b = j = 0; j < input_length; ++j)
if (input[j] == '-')
b = j;
if (b > max_out)
return -1;
for (j = 0; j < b; ++j) {
if (input[j] & 0x80)
return -1;
output[out++] = input[j];
}
for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) {
for (oldi = i, w = 1, k = base;; k += base) {
if (in >= input_length)
return -1;
digit = decode_digit(input[in++]);
if (digit >= base)
return -1;
if (digit > (maxint - i) / w)
return -1;
i += digit * w;
if (k <= bias)
t = tmin;
else if (k >= bias + tmax)
t = tmax;
else
t = k - bias;
if (digit < t)
break;
if (w > maxint / (base - t))
return -1;
w *= (base - t);
}
bias = adapt(i - oldi, out + 1, oldi == 0);
if (i / (out + 1) > maxint - n)
return -1;
n += i / (out + 1);
i %= (out + 1);
if (out >= max_out)
return -1;
memmove(output + i + 1, output + i, (out - i) * sizeof *output);
if(((uint)input[in-1] - 'A') < 26)
output[i++] = toupperrune(n);
else
output[i++] = tolowerrune(n);
}
return (int)out;
}
/*
* convert punycode encoded internationalized
* domain name to unicode string
*/
char*
idn2utf(char *name, char *buf, int nbuf)
{
char *dp, *de, *cp;
Rune rb[Domlen], r;
int nc, nr, n;
cp = name;
dp = buf;
de = dp+nbuf-1;
for(;;){
nc = nr = 0;
while(cp[nc] != 0){
n = chartorune(&r, cp+nc);
if(r == '.')
break;
rb[nr++] = r;
nc += n;
}
if(cistrncmp(cp, "xn--", 4) == 0)
if((nr = punydecode(nc-4, cp+4, nelem(rb), rb)) < 0)
return nil;
dp = seprint(dp, de, "%.*S", nr, rb);
if(dp >= de)
return nil;
if(cp[nc] == 0)
break;
*dp++ = '.';
cp += nc+1;
}
*dp = 0;
return buf;
}
/*
* convert unicode string to punycode
* encoded internationalized domain name
*/
char*
utf2idn(char *name, char *buf, int nbuf)
{
char *dp, *de, *cp;
Rune rb[Domlen], r;
int nc, nr, n;
dp = buf;
de = dp+nbuf-1;
cp = name;
for(;;){
nc = nr = 0;
while(cp[nc] != 0 && nr < nelem(rb)){
n = chartorune(&r, cp+nc);
if(r == '.')
break;
rb[nr++] = r;
nc += n;
}
if(nc == nr)
dp = seprint(dp, de, "%.*s", nc, cp);
else {
dp = seprint(dp, de, "xn--");
if((n = punyencode(nr, rb, de - dp, dp)) < 0)
return nil;
dp += n;
}
if(dp >= de)
return nil;
if(cp[nc] == 0)
break;
*dp++ = '.';
cp += nc+1;
}
*dp = 0;
return buf;
}