1
0
Fork 0
tinmop/src/idn.lisp

125 lines
5.0 KiB
Common Lisp
Raw Normal View History

(in-package :idn)
(defgeneric basic-code-point-p (object))
(defmethod basic-code-point-p ((object character))
(basic-code-point-p (char-code object)))
(defmethod basic-code-point-p ((object integer))
(<= 0 object #x7f))
(defun digit->code-point (digit)
(code-char (cond
((<= 0 digit 25)
(+ 97 digit))
((<= 26 digit 35)
(+ (- digit 26) 48))
(t
(error "digit overflow")))))
(defun div (a b)
(floor (/ a b)))
(define-constant +base+ 36 :test #'=)
(define-constant +t-min+ 1 :test #'=)
(define-constant +t-max+ 26 :test #'=)
(define-constant +skew+ 38 :test #'=)
(define-constant +damp+ 700 :test #'=)
(define-constant +initial-bias+ 72 :test #'=)
(define-constant +delimiter+ #\- :test #'char=)
(define-constant +encoded-string-prefix+ "xn--" :test #'string=)
(define-constant +delta-overflow+ (1- (expt 2 26)) :test #'=)
(define-constant +initial-n+ #x80 :test #'=)
(defun unicode->ascii (unicode-text)
(let* ((n +initial-n+)
(delta 0)
(bias +initial-bias+)
(basic-code-points (remove-if-not #'basic-code-point-p unicode-text))
(h (length basic-code-points))
(b (length basic-code-points))
(output (if (> b 0)
(text-utils:strcat basic-code-points (string +delimiter+))
basic-code-points)))
(labels ((adapt (delta num-points first-time)
(if first-time
(setf delta (div delta +damp+))
(setf delta (div delta 2)))
(incf delta (div delta num-points))
(let ((k 0))
(loop while (> delta
(div (* (- +base+ +t-min+)
+t-max+)
2))
do
(setf delta (div delta (- +base+ +t-min+)))
(incf k +base+))
(+ k
(div (* (+ (- +base+ +t-min+) 1)
delta)
(+ delta +skew+)))))
(maybe-signal-overflow (delta)
(when (> delta +delta-overflow+)
(error "overflow detected"))))
(loop while (< h (length unicode-text)) do
(let ((minimum-code-greater-than-n (num:find-min (remove-if (lambda (a) (< a n))
(map 'list
#'char-code
unicode-text)))))
(setf delta (+ delta (* (- minimum-code-greater-than-n n)
(+ h 1))))
(maybe-signal-overflow delta)
(setf n minimum-code-greater-than-n)
(loop for character across unicode-text do
(let ((code-point (char-code character)))
(cond
((< code-point n)
(incf delta)
(maybe-signal-overflow delta))
((= code-point n)
(let ((q delta))
(loop named inner-loop for k from +base+ by +base+ do
(let ((tx (cond
((<= k bias)
+t-min+)
((>= k (+ bias +t-max+))
+t-max+)
(t
(- k bias)))))
(when (< q tx)
(return-from inner-loop))
(let ((new-char (digit->code-point (+ tx (rem (- q tx)
(- +base+ tx))))))
(setf output (text-utils:strcat output (string new-char))))
(setf q (div (- q tx)
(- +base+ tx)))))
(setf output (text-utils:strcat output (string (digit->code-point q))))
(setf bias (adapt delta (1+ h) (= h b)))
(setf delta 0)
(incf h))))))
(incf delta)
(incf n)))
(cond
((text-utils:string-empty-p output)
output)
((char= (alexandria:last-elt output) +delimiter+)
(misc:all-but-last-elt output))
(t
(text-utils:strcat +encoded-string-prefix+ output))))))
(defun host-unicode->ascii (unicode-host)
(if (find #\. unicode-host :test #'char=)
(let ((splitted (cl-ppcre:split "\\." unicode-host)))
(text-utils:join-with-strings (mapcar #'unicode->ascii splitted)
"."))
(unicode->ascii unicode-host)))