Update deps

2019-11-14 22:34:38 -05:00 · 2019-11-14 22:34:38 -05:00 · 568376ea13
parent d063a7959e
commit 568376ea13
39 changed files with 1832 additions and 1636 deletions
--- a/go.mod
+++ b/go.mod
@ -21,9 +21,9 @@ require (
 	github.com/kardianos/service v1.0.0
 	github.com/miekg/dns v1.1.22
 	github.com/powerman/check v1.2.1
-	golang.org/x/crypto v0.0.0-20191029031824-8986dd9e96cf
-	golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9
-	golang.org/x/sys v0.0.0-20191104094858-e8c54fb511f6
+	golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708
+	golang.org/x/net v0.0.0-20191112182307-2180aed22343
+	golang.org/x/sys v0.0.0-20191113165036-4c7a9d0fe056
 	gopkg.in/natefinch/lumberjack.v2 v2.0.0
 	gopkg.in/yaml.v2 v2.2.2 // indirect
 )
--- a/go.sum
+++ b/go.sum
@ -63,14 +63,14 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
 golang.org/x/crypto v0.0.0-20190909091759-094676da4a83 h1:mgAKeshyNqWKdENOnQsg+8dRTwZFIwFaO3HNl52sweA=
 golang.org/x/crypto v0.0.0-20190909091759-094676da4a83/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
-golang.org/x/crypto v0.0.0-20191029031824-8986dd9e96cf h1:fnPsqIDRbCSgumaMCRpoIoF2s4qxv0xSSS0BVZUE/ss=
-golang.org/x/crypto v0.0.0-20191029031824-8986dd9e96cf/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708 h1:pXVtWnwHkrWD9ru3sDxY/qFK/bfc0egRovX91EjWjf4=
+golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 h1:DPz9iiH3YoKiKhX/ijjoZvT0VFwK2c6CWYWQ7Zyr8TU=
-golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20191112182307-2180aed22343 h1:00ohfJ4K98s3m6BGUoBd8nyfp4Yl0GoIKvw5abItTjI=
+golang.org/x/net v0.0.0-20191112182307-2180aed22343/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190204203706-41f3e6584952/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -82,8 +82,8 @@ golang.org/x/sys v0.0.0-20190909082730-f460065e899a/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191029155521-f43be2a4598c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191104094858-e8c54fb511f6 h1:ZJUmhYTp8GbGC0ViZRc2U+MIYQ8xx9MscsdXnclfIhw=
-golang.org/x/sys v0.0.0-20191104094858-e8c54fb511f6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191113165036-4c7a9d0fe056 h1:dHtDnRWQtSx0Hjq9kvKFpBh9uPPKfQN70NZZmvssGwk=
+golang.org/x/sys v0.0.0-20191113165036-4c7a9d0fe056/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
--- a/vendor/golang.org/x/crypto/curve25519/const_amd64.h
+++ b/vendor/golang.org/x/crypto/curve25519/const_amd64.h
@ -1,8 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-#define REDMASK51     0x0007FFFFFFFFFFFF
--- a/vendor/golang.org/x/crypto/curve25519/const_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/const_amd64.s
@ -1,20 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-// These constants cannot be encoded in non-MOVQ immediates.
-// We access them directly from memory instead.
-
-DATA ·_121666_213(SB)/8, $996687872
-GLOBL ·_121666_213(SB), 8, $8
-
-DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
-GLOBL ·_2P0(SB), 8, $8
-
-DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
-GLOBL ·_2P1234(SB), 8, $8
--- a/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s
@ -1,65 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build amd64,!gccgo,!appengine
-
-// func cswap(inout *[4][5]uint64, v uint64)
-TEXT ·cswap(SB),7,$0
-	MOVQ inout+0(FP),DI
-	MOVQ v+8(FP),SI
-
-	SUBQ $1, SI
-	NOTQ SI
-	MOVQ SI, X15
-	PSHUFD $0x44, X15, X15
-
-	MOVOU 0(DI), X0
-	MOVOU 16(DI), X2
-	MOVOU 32(DI), X4
-	MOVOU 48(DI), X6
-	MOVOU 64(DI), X8
-	MOVOU 80(DI), X1
-	MOVOU 96(DI), X3
-	MOVOU 112(DI), X5
-	MOVOU 128(DI), X7
-	MOVOU 144(DI), X9
-
-	MOVO X1, X10
-	MOVO X3, X11
-	MOVO X5, X12
-	MOVO X7, X13
-	MOVO X9, X14
-
-	PXOR X0, X10
-	PXOR X2, X11
-	PXOR X4, X12
-	PXOR X6, X13
-	PXOR X8, X14
-	PAND X15, X10
-	PAND X15, X11
-	PAND X15, X12
-	PAND X15, X13
-	PAND X15, X14
-	PXOR X10, X0
-	PXOR X10, X1
-	PXOR X11, X2
-	PXOR X11, X3
-	PXOR X12, X4
-	PXOR X12, X5
-	PXOR X13, X6
-	PXOR X13, X7
-	PXOR X14, X8
-	PXOR X14, X9
-
-	MOVOU X0, 0(DI)
-	MOVOU X2, 16(DI)
-	MOVOU X4, 32(DI)
-	MOVOU X6, 48(DI)
-	MOVOU X8, 64(DI)
-	MOVOU X1, 80(DI)
-	MOVOU X3, 96(DI)
-	MOVOU X5, 112(DI)
-	MOVOU X7, 128(DI)
-	MOVOU X9, 144(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/curve25519.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519.go
@ -1,834 +1,95 @@
-// Copyright 2013 The Go Authors. All rights reserved.
+// Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// We have an implementation in amd64 assembly so this code is only run on
-// non-amd64 platforms. The amd64 assembly does not support gccgo.
-// +build !amd64 gccgo appengine
-
-package curve25519
+// Package curve25519 provides an implementation of the X25519 function, which
+// performs scalar multiplication on the elliptic curve known as Curve25519.
+// See RFC 7748.
+package curve25519 // import "golang.org/x/crypto/curve25519"

 import (
-	"encoding/binary"
+	"crypto/subtle"
+	"fmt"
 )

-// This code is a port of the public domain, "ref10" implementation of
-// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
+// ScalarMult sets dst to the product scalar * point.
+//
+// Deprecated: when provided a low-order point, ScalarMult will set dst to all
+// zeroes, irrespective of the scalar. Instead, use the X25519 function, which
+// will return an error.
+func ScalarMult(dst, scalar, point *[32]byte) {
+	scalarMult(dst, scalar, point)
+}

-// fieldElement represents an element of the field GF(2^255 - 19). An element
-// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
-// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
-// context.
-type fieldElement [10]int32
+// ScalarBaseMult sets dst to the product scalar * base where base is the
+// standard generator.
+//
+// It is recommended to use the X25519 function with Basepoint instead, as
+// copying into fixed size arrays can lead to unexpected bugs.
+func ScalarBaseMult(dst, scalar *[32]byte) {
+	ScalarMult(dst, scalar, &basePoint)
+}

-func feZero(fe *fieldElement) {
-	for i := range fe {
-		fe[i] = 0
+const (
+	// ScalarSize is the size of the scalar input to X25519.
+	ScalarSize = 32
+	// PointSize is the size of the point input to X25519.
+	PointSize = 32
+)
+
+// Basepoint is the canonical Curve25519 generator.
+var Basepoint []byte
+
+var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+func init() { Basepoint = basePoint[:] }
+
+func checkBasepoint() {
+	if subtle.ConstantTimeCompare(Basepoint, []byte{
+		0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	}) != 1 {
+		panic("curve25519: global Basepoint value was modified")
 	}
 }

-func feOne(fe *fieldElement) {
-	feZero(fe)
-	fe[0] = 1
+// X25519 returns the result of the scalar multiplication (scalar * point),
+// according to RFC 7748, Section 5. scalar, point and the return value are
+// slices of 32 bytes.
+//
+// scalar can be generated at random, for example with crypto/rand. point should
+// be either Basepoint or the output of another X25519 call.
+//
+// If point is Basepoint (but not if it's a different slice with the same
+// contents) a precomputed implementation might be used for performance.
+func X25519(scalar, point []byte) ([]byte, error) {
+	// Outline the body of function, to let the allocation be inlined in the
+	// caller, and possibly avoid escaping to the heap.
+	var dst [32]byte
+	return x25519(&dst, scalar, point)
 }

-func feAdd(dst, a, b *fieldElement) {
-	for i := range dst {
-		dst[i] = a[i] + b[i]
+func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) {
+	var in [32]byte
+	if l := len(scalar); l != 32 {
+		return nil, fmt.Errorf("bad scalar length: %d, expected %d", l, 32)
 	}
-}
-
-func feSub(dst, a, b *fieldElement) {
-	for i := range dst {
-		dst[i] = a[i] - b[i]
-	}
-}
-
-func feCopy(dst, src *fieldElement) {
-	for i := range dst {
-		dst[i] = src[i]
-	}
-}
-
-// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
-//
-// Preconditions: b in {0,1}.
-func feCSwap(f, g *fieldElement, b int32) {
-	b = -b
-	for i := range f {
-		t := b & (f[i] ^ g[i])
-		f[i] ^= t
-		g[i] ^= t
-	}
-}
-
-// load3 reads a 24-bit, little-endian value from in.
-func load3(in []byte) int64 {
-	var r int64
-	r = int64(in[0])
-	r |= int64(in[1]) << 8
-	r |= int64(in[2]) << 16
-	return r
-}
-
-// load4 reads a 32-bit, little-endian value from in.
-func load4(in []byte) int64 {
-	return int64(binary.LittleEndian.Uint32(in))
-}
-
-func feFromBytes(dst *fieldElement, src *[32]byte) {
-	h0 := load4(src[:])
-	h1 := load3(src[4:]) << 6
-	h2 := load3(src[7:]) << 5
-	h3 := load3(src[10:]) << 3
-	h4 := load3(src[13:]) << 2
-	h5 := load4(src[16:])
-	h6 := load3(src[20:]) << 7
-	h7 := load3(src[23:]) << 5
-	h8 := load3(src[26:]) << 4
-	h9 := (load3(src[29:]) & 0x7fffff) << 2
-
-	var carry [10]int64
-	carry[9] = (h9 + 1<<24) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	carry[1] = (h1 + 1<<24) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[3] = (h3 + 1<<24) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[5] = (h5 + 1<<24) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	carry[7] = (h7 + 1<<24) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-
-	carry[0] = (h0 + 1<<25) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[2] = (h2 + 1<<25) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[4] = (h4 + 1<<25) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[6] = (h6 + 1<<25) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	carry[8] = (h8 + 1<<25) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-
-	dst[0] = int32(h0)
-	dst[1] = int32(h1)
-	dst[2] = int32(h2)
-	dst[3] = int32(h3)
-	dst[4] = int32(h4)
-	dst[5] = int32(h5)
-	dst[6] = int32(h6)
-	dst[7] = int32(h7)
-	dst[8] = int32(h8)
-	dst[9] = int32(h9)
-}
-
-// feToBytes marshals h to s.
-// Preconditions:
-//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-//
-// Write p=2^255-19; q=floor(h/p).
-// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
-//
-// Proof:
-//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
-//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
-//
-//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
-//   Then 0<y<1.
-//
-//   Write r=h-pq.
-//   Have 0<=r<=p-1=2^255-20.
-//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
-//
-//   Write x=r+19(2^-255)r+y.
-//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
-//
-//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
-//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
-func feToBytes(s *[32]byte, h *fieldElement) {
-	var carry [10]int32
-
-	q := (19*h[9] + (1 << 24)) >> 25
-	q = (h[0] + q) >> 26
-	q = (h[1] + q) >> 25
-	q = (h[2] + q) >> 26
-	q = (h[3] + q) >> 25
-	q = (h[4] + q) >> 26
-	q = (h[5] + q) >> 25
-	q = (h[6] + q) >> 26
-	q = (h[7] + q) >> 25
-	q = (h[8] + q) >> 26
-	q = (h[9] + q) >> 25
-
-	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
-	h[0] += 19 * q
-	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
-
-	carry[0] = h[0] >> 26
-	h[1] += carry[0]
-	h[0] -= carry[0] << 26
-	carry[1] = h[1] >> 25
-	h[2] += carry[1]
-	h[1] -= carry[1] << 25
-	carry[2] = h[2] >> 26
-	h[3] += carry[2]
-	h[2] -= carry[2] << 26
-	carry[3] = h[3] >> 25
-	h[4] += carry[3]
-	h[3] -= carry[3] << 25
-	carry[4] = h[4] >> 26
-	h[5] += carry[4]
-	h[4] -= carry[4] << 26
-	carry[5] = h[5] >> 25
-	h[6] += carry[5]
-	h[5] -= carry[5] << 25
-	carry[6] = h[6] >> 26
-	h[7] += carry[6]
-	h[6] -= carry[6] << 26
-	carry[7] = h[7] >> 25
-	h[8] += carry[7]
-	h[7] -= carry[7] << 25
-	carry[8] = h[8] >> 26
-	h[9] += carry[8]
-	h[8] -= carry[8] << 26
-	carry[9] = h[9] >> 25
-	h[9] -= carry[9] << 25
-	// h10 = carry9
-
-	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
-	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
-	// evidently 2^255 h10-2^255 q = 0.
-	// Goal: Output h[0]+...+2^230 h[9].
-
-	s[0] = byte(h[0] >> 0)
-	s[1] = byte(h[0] >> 8)
-	s[2] = byte(h[0] >> 16)
-	s[3] = byte((h[0] >> 24) | (h[1] << 2))
-	s[4] = byte(h[1] >> 6)
-	s[5] = byte(h[1] >> 14)
-	s[6] = byte((h[1] >> 22) | (h[2] << 3))
-	s[7] = byte(h[2] >> 5)
-	s[8] = byte(h[2] >> 13)
-	s[9] = byte((h[2] >> 21) | (h[3] << 5))
-	s[10] = byte(h[3] >> 3)
-	s[11] = byte(h[3] >> 11)
-	s[12] = byte((h[3] >> 19) | (h[4] << 6))
-	s[13] = byte(h[4] >> 2)
-	s[14] = byte(h[4] >> 10)
-	s[15] = byte(h[4] >> 18)
-	s[16] = byte(h[5] >> 0)
-	s[17] = byte(h[5] >> 8)
-	s[18] = byte(h[5] >> 16)
-	s[19] = byte((h[5] >> 24) | (h[6] << 1))
-	s[20] = byte(h[6] >> 7)
-	s[21] = byte(h[6] >> 15)
-	s[22] = byte((h[6] >> 23) | (h[7] << 3))
-	s[23] = byte(h[7] >> 5)
-	s[24] = byte(h[7] >> 13)
-	s[25] = byte((h[7] >> 21) | (h[8] << 4))
-	s[26] = byte(h[8] >> 4)
-	s[27] = byte(h[8] >> 12)
-	s[28] = byte((h[8] >> 20) | (h[9] << 6))
-	s[29] = byte(h[9] >> 2)
-	s[30] = byte(h[9] >> 10)
-	s[31] = byte(h[9] >> 18)
-}
-
-// feMul calculates h = f * g
-// Can overlap h with f or g.
-//
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-//
-// Notes on implementation strategy:
-//
-// Using schoolbook multiplication.
-// Karatsuba would save a little in some cost models.
-//
-// Most multiplications by 2 and 19 are 32-bit precomputations;
-// cheaper than 64-bit postcomputations.
-//
-// There is one remaining multiplication by 19 in the carry chain;
-// one *19 precomputation can be merged into this,
-// but the resulting data flow is considerably less clean.
-//
-// There are 12 carries below.
-// 10 of them are 2-way parallelizable and vectorizable.
-// Can get away with 11 carries, but then data flow is much deeper.
-//
-// With tighter constraints on inputs can squeeze carries into int32.
-func feMul(h, f, g *fieldElement) {
-	f0 := f[0]
-	f1 := f[1]
-	f2 := f[2]
-	f3 := f[3]
-	f4 := f[4]
-	f5 := f[5]
-	f6 := f[6]
-	f7 := f[7]
-	f8 := f[8]
-	f9 := f[9]
-	g0 := g[0]
-	g1 := g[1]
-	g2 := g[2]
-	g3 := g[3]
-	g4 := g[4]
-	g5 := g[5]
-	g6 := g[6]
-	g7 := g[7]
-	g8 := g[8]
-	g9 := g[9]
-	g1_19 := 19 * g1 // 1.4*2^29
-	g2_19 := 19 * g2 // 1.4*2^30; still ok
-	g3_19 := 19 * g3
-	g4_19 := 19 * g4
-	g5_19 := 19 * g5
-	g6_19 := 19 * g6
-	g7_19 := 19 * g7
-	g8_19 := 19 * g8
-	g9_19 := 19 * g9
-	f1_2 := 2 * f1
-	f3_2 := 2 * f3
-	f5_2 := 2 * f5
-	f7_2 := 2 * f7
-	f9_2 := 2 * f9
-	f0g0 := int64(f0) * int64(g0)
-	f0g1 := int64(f0) * int64(g1)
-	f0g2 := int64(f0) * int64(g2)
-	f0g3 := int64(f0) * int64(g3)
-	f0g4 := int64(f0) * int64(g4)
-	f0g5 := int64(f0) * int64(g5)
-	f0g6 := int64(f0) * int64(g6)
-	f0g7 := int64(f0) * int64(g7)
-	f0g8 := int64(f0) * int64(g8)
-	f0g9 := int64(f0) * int64(g9)
-	f1g0 := int64(f1) * int64(g0)
-	f1g1_2 := int64(f1_2) * int64(g1)
-	f1g2 := int64(f1) * int64(g2)
-	f1g3_2 := int64(f1_2) * int64(g3)
-	f1g4 := int64(f1) * int64(g4)
-	f1g5_2 := int64(f1_2) * int64(g5)
-	f1g6 := int64(f1) * int64(g6)
-	f1g7_2 := int64(f1_2) * int64(g7)
-	f1g8 := int64(f1) * int64(g8)
-	f1g9_38 := int64(f1_2) * int64(g9_19)
-	f2g0 := int64(f2) * int64(g0)
-	f2g1 := int64(f2) * int64(g1)
-	f2g2 := int64(f2) * int64(g2)
-	f2g3 := int64(f2) * int64(g3)
-	f2g4 := int64(f2) * int64(g4)
-	f2g5 := int64(f2) * int64(g5)
-	f2g6 := int64(f2) * int64(g6)
-	f2g7 := int64(f2) * int64(g7)
-	f2g8_19 := int64(f2) * int64(g8_19)
-	f2g9_19 := int64(f2) * int64(g9_19)
-	f3g0 := int64(f3) * int64(g0)
-	f3g1_2 := int64(f3_2) * int64(g1)
-	f3g2 := int64(f3) * int64(g2)
-	f3g3_2 := int64(f3_2) * int64(g3)
-	f3g4 := int64(f3) * int64(g4)
-	f3g5_2 := int64(f3_2) * int64(g5)
-	f3g6 := int64(f3) * int64(g6)
-	f3g7_38 := int64(f3_2) * int64(g7_19)
-	f3g8_19 := int64(f3) * int64(g8_19)
-	f3g9_38 := int64(f3_2) * int64(g9_19)
-	f4g0 := int64(f4) * int64(g0)
-	f4g1 := int64(f4) * int64(g1)
-	f4g2 := int64(f4) * int64(g2)
-	f4g3 := int64(f4) * int64(g3)
-	f4g4 := int64(f4) * int64(g4)
-	f4g5 := int64(f4) * int64(g5)
-	f4g6_19 := int64(f4) * int64(g6_19)
-	f4g7_19 := int64(f4) * int64(g7_19)
-	f4g8_19 := int64(f4) * int64(g8_19)
-	f4g9_19 := int64(f4) * int64(g9_19)
-	f5g0 := int64(f5) * int64(g0)
-	f5g1_2 := int64(f5_2) * int64(g1)
-	f5g2 := int64(f5) * int64(g2)
-	f5g3_2 := int64(f5_2) * int64(g3)
-	f5g4 := int64(f5) * int64(g4)
-	f5g5_38 := int64(f5_2) * int64(g5_19)
-	f5g6_19 := int64(f5) * int64(g6_19)
-	f5g7_38 := int64(f5_2) * int64(g7_19)
-	f5g8_19 := int64(f5) * int64(g8_19)
-	f5g9_38 := int64(f5_2) * int64(g9_19)
-	f6g0 := int64(f6) * int64(g0)
-	f6g1 := int64(f6) * int64(g1)
-	f6g2 := int64(f6) * int64(g2)
-	f6g3 := int64(f6) * int64(g3)
-	f6g4_19 := int64(f6) * int64(g4_19)
-	f6g5_19 := int64(f6) * int64(g5_19)
-	f6g6_19 := int64(f6) * int64(g6_19)
-	f6g7_19 := int64(f6) * int64(g7_19)
-	f6g8_19 := int64(f6) * int64(g8_19)
-	f6g9_19 := int64(f6) * int64(g9_19)
-	f7g0 := int64(f7) * int64(g0)
-	f7g1_2 := int64(f7_2) * int64(g1)
-	f7g2 := int64(f7) * int64(g2)
-	f7g3_38 := int64(f7_2) * int64(g3_19)
-	f7g4_19 := int64(f7) * int64(g4_19)
-	f7g5_38 := int64(f7_2) * int64(g5_19)
-	f7g6_19 := int64(f7) * int64(g6_19)
-	f7g7_38 := int64(f7_2) * int64(g7_19)
-	f7g8_19 := int64(f7) * int64(g8_19)
-	f7g9_38 := int64(f7_2) * int64(g9_19)
-	f8g0 := int64(f8) * int64(g0)
-	f8g1 := int64(f8) * int64(g1)
-	f8g2_19 := int64(f8) * int64(g2_19)
-	f8g3_19 := int64(f8) * int64(g3_19)
-	f8g4_19 := int64(f8) * int64(g4_19)
-	f8g5_19 := int64(f8) * int64(g5_19)
-	f8g6_19 := int64(f8) * int64(g6_19)
-	f8g7_19 := int64(f8) * int64(g7_19)
-	f8g8_19 := int64(f8) * int64(g8_19)
-	f8g9_19 := int64(f8) * int64(g9_19)
-	f9g0 := int64(f9) * int64(g0)
-	f9g1_38 := int64(f9_2) * int64(g1_19)
-	f9g2_19 := int64(f9) * int64(g2_19)
-	f9g3_38 := int64(f9_2) * int64(g3_19)
-	f9g4_19 := int64(f9) * int64(g4_19)
-	f9g5_38 := int64(f9_2) * int64(g5_19)
-	f9g6_19 := int64(f9) * int64(g6_19)
-	f9g7_38 := int64(f9_2) * int64(g7_19)
-	f9g8_19 := int64(f9) * int64(g8_19)
-	f9g9_38 := int64(f9_2) * int64(g9_19)
-	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
-	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
-	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
-	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
-	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
-	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
-	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
-	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
-	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
-	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
-	var carry [10]int64
-
-	// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
-	//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
-	// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
-	//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	// |h0| <= 2^25
-	// |h4| <= 2^25
-	// |h1| <= 1.51*2^58
-	// |h5| <= 1.51*2^58
-
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	// |h1| <= 2^24; from now on fits into int32
-	// |h5| <= 2^24; from now on fits into int32
-	// |h2| <= 1.21*2^59
-	// |h6| <= 1.21*2^59
-
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	// |h2| <= 2^25; from now on fits into int32 unchanged
-	// |h6| <= 2^25; from now on fits into int32 unchanged
-	// |h3| <= 1.51*2^58
-	// |h7| <= 1.51*2^58
-
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-	// |h3| <= 2^24; from now on fits into int32 unchanged
-	// |h7| <= 2^24; from now on fits into int32 unchanged
-	// |h4| <= 1.52*2^33
-	// |h8| <= 1.52*2^33
-
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-	// |h4| <= 2^25; from now on fits into int32 unchanged
-	// |h8| <= 2^25; from now on fits into int32 unchanged
-	// |h5| <= 1.01*2^24
-	// |h9| <= 1.51*2^58
-
-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	// |h9| <= 2^24; from now on fits into int32 unchanged
-	// |h0| <= 1.8*2^37
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	// |h0| <= 2^25; from now on fits into int32 unchanged
-	// |h1| <= 1.01*2^24
-
-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
-}
-
-// feSquare calculates h = f*f. Can overlap h with f.
-//
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-func feSquare(h, f *fieldElement) {
-	f0 := f[0]
-	f1 := f[1]
-	f2 := f[2]
-	f3 := f[3]
-	f4 := f[4]
-	f5 := f[5]
-	f6 := f[6]
-	f7 := f[7]
-	f8 := f[8]
-	f9 := f[9]
-	f0_2 := 2 * f0
-	f1_2 := 2 * f1
-	f2_2 := 2 * f2
-	f3_2 := 2 * f3
-	f4_2 := 2 * f4
-	f5_2 := 2 * f5
-	f6_2 := 2 * f6
-	f7_2 := 2 * f7
-	f5_38 := 38 * f5 // 1.31*2^30
-	f6_19 := 19 * f6 // 1.31*2^30
-	f7_38 := 38 * f7 // 1.31*2^30
-	f8_19 := 19 * f8 // 1.31*2^30
-	f9_38 := 38 * f9 // 1.31*2^30
-	f0f0 := int64(f0) * int64(f0)
-	f0f1_2 := int64(f0_2) * int64(f1)
-	f0f2_2 := int64(f0_2) * int64(f2)
-	f0f3_2 := int64(f0_2) * int64(f3)
-	f0f4_2 := int64(f0_2) * int64(f4)
-	f0f5_2 := int64(f0_2) * int64(f5)
-	f0f6_2 := int64(f0_2) * int64(f6)
-	f0f7_2 := int64(f0_2) * int64(f7)
-	f0f8_2 := int64(f0_2) * int64(f8)
-	f0f9_2 := int64(f0_2) * int64(f9)
-	f1f1_2 := int64(f1_2) * int64(f1)
-	f1f2_2 := int64(f1_2) * int64(f2)
-	f1f3_4 := int64(f1_2) * int64(f3_2)
-	f1f4_2 := int64(f1_2) * int64(f4)
-	f1f5_4 := int64(f1_2) * int64(f5_2)
-	f1f6_2 := int64(f1_2) * int64(f6)
-	f1f7_4 := int64(f1_2) * int64(f7_2)
-	f1f8_2 := int64(f1_2) * int64(f8)
-	f1f9_76 := int64(f1_2) * int64(f9_38)
-	f2f2 := int64(f2) * int64(f2)
-	f2f3_2 := int64(f2_2) * int64(f3)
-	f2f4_2 := int64(f2_2) * int64(f4)
-	f2f5_2 := int64(f2_2) * int64(f5)
-	f2f6_2 := int64(f2_2) * int64(f6)
-	f2f7_2 := int64(f2_2) * int64(f7)
-	f2f8_38 := int64(f2_2) * int64(f8_19)
-	f2f9_38 := int64(f2) * int64(f9_38)
-	f3f3_2 := int64(f3_2) * int64(f3)
-	f3f4_2 := int64(f3_2) * int64(f4)
-	f3f5_4 := int64(f3_2) * int64(f5_2)
-	f3f6_2 := int64(f3_2) * int64(f6)
-	f3f7_76 := int64(f3_2) * int64(f7_38)
-	f3f8_38 := int64(f3_2) * int64(f8_19)
-	f3f9_76 := int64(f3_2) * int64(f9_38)
-	f4f4 := int64(f4) * int64(f4)
-	f4f5_2 := int64(f4_2) * int64(f5)
-	f4f6_38 := int64(f4_2) * int64(f6_19)
-	f4f7_38 := int64(f4) * int64(f7_38)
-	f4f8_38 := int64(f4_2) * int64(f8_19)
-	f4f9_38 := int64(f4) * int64(f9_38)
-	f5f5_38 := int64(f5) * int64(f5_38)
-	f5f6_38 := int64(f5_2) * int64(f6_19)
-	f5f7_76 := int64(f5_2) * int64(f7_38)
-	f5f8_38 := int64(f5_2) * int64(f8_19)
-	f5f9_76 := int64(f5_2) * int64(f9_38)
-	f6f6_19 := int64(f6) * int64(f6_19)
-	f6f7_38 := int64(f6) * int64(f7_38)
-	f6f8_38 := int64(f6_2) * int64(f8_19)
-	f6f9_38 := int64(f6) * int64(f9_38)
-	f7f7_38 := int64(f7) * int64(f7_38)
-	f7f8_38 := int64(f7_2) * int64(f8_19)
-	f7f9_76 := int64(f7_2) * int64(f9_38)
-	f8f8_19 := int64(f8) * int64(f8_19)
-	f8f9_38 := int64(f8) * int64(f9_38)
-	f9f9_38 := int64(f9) * int64(f9_38)
-	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
-	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
-	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
-	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
-	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
-	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
-	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
-	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
-	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
-	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
-	var carry [10]int64
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-
-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-
-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
-}
-
-// feMul121666 calculates h = f * 121666. Can overlap h with f.
-//
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-func feMul121666(h, f *fieldElement) {
-	h0 := int64(f[0]) * 121666
-	h1 := int64(f[1]) * 121666
-	h2 := int64(f[2]) * 121666
-	h3 := int64(f[3]) * 121666
-	h4 := int64(f[4]) * 121666
-	h5 := int64(f[5]) * 121666
-	h6 := int64(f[6]) * 121666
-	h7 := int64(f[7]) * 121666
-	h8 := int64(f[8]) * 121666
-	h9 := int64(f[9]) * 121666
-	var carry [10]int64
-
-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-
-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
-}
-
-// feInvert sets out = z^-1.
-func feInvert(out, z *fieldElement) {
-	var t0, t1, t2, t3 fieldElement
-	var i int
-
-	feSquare(&t0, z)
-	for i = 1; i < 1; i++ {
-		feSquare(&t0, &t0)
-	}
-	feSquare(&t1, &t0)
-	for i = 1; i < 2; i++ {
-		feSquare(&t1, &t1)
-	}
-	feMul(&t1, z, &t1)
-	feMul(&t0, &t0, &t1)
-	feSquare(&t2, &t0)
-	for i = 1; i < 1; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t1, &t2)
-	feSquare(&t2, &t1)
-	for i = 1; i < 5; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t2, &t1)
-	for i = 1; i < 10; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t2, &t2, &t1)
-	feSquare(&t3, &t2)
-	for i = 1; i < 20; i++ {
-		feSquare(&t3, &t3)
-	}
-	feMul(&t2, &t3, &t2)
-	feSquare(&t2, &t2)
-	for i = 1; i < 10; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t2, &t1)
-	for i = 1; i < 50; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t2, &t2, &t1)
-	feSquare(&t3, &t2)
-	for i = 1; i < 100; i++ {
-		feSquare(&t3, &t3)
-	}
-	feMul(&t2, &t3, &t2)
-	feSquare(&t2, &t2)
-	for i = 1; i < 50; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t1, &t1)
-	for i = 1; i < 5; i++ {
-		feSquare(&t1, &t1)
-	}
-	feMul(out, &t1, &t0)
-}
-
-func scalarMult(out, in, base *[32]byte) {
-	var e [32]byte
-
-	copy(e[:], in[:])
-	e[0] &= 248
-	e[31] &= 127
-	e[31] |= 64
-
-	var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
-	feFromBytes(&x1, base)
-	feOne(&x2)
-	feCopy(&x3, &x1)
-	feOne(&z3)
-
-	swap := int32(0)
-	for pos := 254; pos >= 0; pos-- {
-		b := e[pos/8] >> uint(pos&7)
-		b &= 1
-		swap ^= int32(b)
-		feCSwap(&x2, &x3, swap)
-		feCSwap(&z2, &z3, swap)
-		swap = int32(b)
-
-		feSub(&tmp0, &x3, &z3)
-		feSub(&tmp1, &x2, &z2)
-		feAdd(&x2, &x2, &z2)
-		feAdd(&z2, &x3, &z3)
-		feMul(&z3, &tmp0, &x2)
-		feMul(&z2, &z2, &tmp1)
-		feSquare(&tmp0, &tmp1)
-		feSquare(&tmp1, &x2)
-		feAdd(&x3, &z3, &z2)
-		feSub(&z2, &z3, &z2)
-		feMul(&x2, &tmp1, &tmp0)
-		feSub(&tmp1, &tmp1, &tmp0)
-		feSquare(&z2, &z2)
-		feMul121666(&z3, &tmp1)
-		feSquare(&x3, &x3)
-		feAdd(&tmp0, &tmp0, &z3)
-		feMul(&z3, &x1, &z2)
-		feMul(&z2, &tmp1, &tmp0)
-	}
-
-	feCSwap(&x2, &x3, swap)
-	feCSwap(&z2, &z3, swap)
-
-	feInvert(&z2, &z2)
-	feMul(&x2, &x2, &z2)
-	feToBytes(out, &x2)
+	if l := len(point); l != 32 {
+		return nil, fmt.Errorf("bad point length: %d, expected %d", l, 32)
+	}
+	copy(in[:], scalar)
+	if &point[0] == &Basepoint[0] {
+		checkBasepoint()
+		ScalarBaseMult(dst, &in)
+	} else {
+		var base, zero [32]byte
+		copy(base[:], point)
+		ScalarMult(dst, &in, &base)
+		if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 {
+			return nil, fmt.Errorf("bad input point: low order point")
+		}
+	}
+	return dst[:], nil
 }
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build amd64,!gccgo,!appengine
+// +build amd64,!gccgo,!appengine,!purego

 package curve25519

--- a/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s
@ -5,9 +5,84 @@
 // This code was translated into a form compatible with 6a from the public
 // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html

-// +build amd64,!gccgo,!appengine
+// +build amd64,!gccgo,!appengine,!purego

-#include "const_amd64.h"
+#define REDMASK51     0x0007FFFFFFFFFFFF
+
+// These constants cannot be encoded in non-MOVQ immediates.
+// We access them directly from memory instead.
+
+DATA ·_121666_213(SB)/8, $996687872
+GLOBL ·_121666_213(SB), 8, $8
+
+DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
+GLOBL ·_2P0(SB), 8, $8
+
+DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
+GLOBL ·_2P1234(SB), 8, $8
+
+// func freeze(inout *[5]uint64)
+TEXT ·freeze(SB),7,$0-8
+	MOVQ inout+0(FP), DI
+
+	MOVQ 0(DI),SI
+	MOVQ 8(DI),DX
+	MOVQ 16(DI),CX
+	MOVQ 24(DI),R8
+	MOVQ 32(DI),R9
+	MOVQ $REDMASK51,AX
+	MOVQ AX,R10
+	SUBQ $18,R10
+	MOVQ $3,R11
+REDUCELOOP:
+	MOVQ SI,R12
+	SHRQ $51,R12
+	ANDQ AX,SI
+	ADDQ R12,DX
+	MOVQ DX,R12
+	SHRQ $51,R12
+	ANDQ AX,DX
+	ADDQ R12,CX
+	MOVQ CX,R12
+	SHRQ $51,R12
+	ANDQ AX,CX
+	ADDQ R12,R8
+	MOVQ R8,R12
+	SHRQ $51,R12
+	ANDQ AX,R8
+	ADDQ R12,R9
+	MOVQ R9,R12
+	SHRQ $51,R12
+	ANDQ AX,R9
+	IMUL3Q $19,R12,R12
+	ADDQ R12,SI
+	SUBQ $1,R11
+	JA REDUCELOOP
+	MOVQ $1,R12
+	CMPQ R10,SI
+	CMOVQLT R11,R12
+	CMPQ AX,DX
+	CMOVQNE R11,R12
+	CMPQ AX,CX
+	CMOVQNE R11,R12
+	CMPQ AX,R8
+	CMOVQNE R11,R12
+	CMPQ AX,R9
+	CMOVQNE R11,R12
+	NEGQ R12
+	ANDQ R12,AX
+	ANDQ R12,R10
+	SUBQ R10,SI
+	SUBQ AX,DX
+	SUBQ AX,CX
+	SUBQ AX,R8
+	SUBQ AX,R9
+	MOVQ SI,0(DI)
+	MOVQ DX,8(DI)
+	MOVQ CX,16(DI)
+	MOVQ R8,24(DI)
+	MOVQ R9,32(DI)
+	RET

 // func ladderstep(inout *[5][5]uint64)
 TEXT ·ladderstep(SB),0,$296-8
@ -1375,3 +1450,344 @@ TEXT ·ladderstep(SB),0,$296-8
 	MOVQ AX,104(DI)
 	MOVQ R10,112(DI)
 	RET
+
+// func cswap(inout *[4][5]uint64, v uint64)
+TEXT ·cswap(SB),7,$0
+	MOVQ inout+0(FP),DI
+	MOVQ v+8(FP),SI
+
+	SUBQ $1, SI
+	NOTQ SI
+	MOVQ SI, X15
+	PSHUFD $0x44, X15, X15
+
+	MOVOU 0(DI), X0
+	MOVOU 16(DI), X2
+	MOVOU 32(DI), X4
+	MOVOU 48(DI), X6
+	MOVOU 64(DI), X8
+	MOVOU 80(DI), X1
+	MOVOU 96(DI), X3
+	MOVOU 112(DI), X5
+	MOVOU 128(DI), X7
+	MOVOU 144(DI), X9
+
+	MOVO X1, X10
+	MOVO X3, X11
+	MOVO X5, X12
+	MOVO X7, X13
+	MOVO X9, X14
+
+	PXOR X0, X10
+	PXOR X2, X11
+	PXOR X4, X12
+	PXOR X6, X13
+	PXOR X8, X14
+	PAND X15, X10
+	PAND X15, X11
+	PAND X15, X12
+	PAND X15, X13
+	PAND X15, X14
+	PXOR X10, X0
+	PXOR X10, X1
+	PXOR X11, X2
+	PXOR X11, X3
+	PXOR X12, X4
+	PXOR X12, X5
+	PXOR X13, X6
+	PXOR X13, X7
+	PXOR X14, X8
+	PXOR X14, X9
+
+	MOVOU X0, 0(DI)
+	MOVOU X2, 16(DI)
+	MOVOU X4, 32(DI)
+	MOVOU X6, 48(DI)
+	MOVOU X8, 64(DI)
+	MOVOU X1, 80(DI)
+	MOVOU X3, 96(DI)
+	MOVOU X5, 112(DI)
+	MOVOU X7, 128(DI)
+	MOVOU X9, 144(DI)
+	RET
+
+// func mul(dest, a, b *[5]uint64)
+TEXT ·mul(SB),0,$16-24
+	MOVQ dest+0(FP), DI
+	MOVQ a+8(FP), SI
+	MOVQ b+16(FP), DX
+
+	MOVQ DX,CX
+	MOVQ 24(SI),DX
+	IMUL3Q $19,DX,AX
+	MOVQ AX,0(SP)
+	MULQ 16(CX)
+	MOVQ AX,R8
+	MOVQ DX,R9
+	MOVQ 32(SI),DX
+	IMUL3Q $19,DX,AX
+	MOVQ AX,8(SP)
+	MULQ 8(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 0(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 0(SI),AX
+	MULQ 8(CX)
+	MOVQ AX,R10
+	MOVQ DX,R11
+	MOVQ 0(SI),AX
+	MULQ 16(CX)
+	MOVQ AX,R12
+	MOVQ DX,R13
+	MOVQ 0(SI),AX
+	MULQ 24(CX)
+	MOVQ AX,R14
+	MOVQ DX,R15
+	MOVQ 0(SI),AX
+	MULQ 32(CX)
+	MOVQ AX,BX
+	MOVQ DX,BP
+	MOVQ 8(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 8(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 8(SI),AX
+	MULQ 16(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 8(SI),AX
+	MULQ 24(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 8(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 16(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 16(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 16(SI),AX
+	MULQ 16(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 16(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 24(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 16(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 24(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 24(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 0(SP),AX
+	MULQ 24(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 0(SP),AX
+	MULQ 32(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 32(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 8(SP),AX
+	MULQ 16(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 8(SP),AX
+	MULQ 24(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 8(SP),AX
+	MULQ 32(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ $REDMASK51,SI
+	SHLQ $13,R8,R9
+	ANDQ SI,R8
+	SHLQ $13,R10,R11
+	ANDQ SI,R10
+	ADDQ R9,R10
+	SHLQ $13,R12,R13
+	ANDQ SI,R12
+	ADDQ R11,R12
+	SHLQ $13,R14,R15
+	ANDQ SI,R14
+	ADDQ R13,R14
+	SHLQ $13,BX,BP
+	ANDQ SI,BX
+	ADDQ R15,BX
+	IMUL3Q $19,BP,DX
+	ADDQ DX,R8
+	MOVQ R8,DX
+	SHRQ $51,DX
+	ADDQ R10,DX
+	MOVQ DX,CX
+	SHRQ $51,DX
+	ANDQ SI,R8
+	ADDQ R12,DX
+	MOVQ DX,R9
+	SHRQ $51,DX
+	ANDQ SI,CX
+	ADDQ R14,DX
+	MOVQ DX,AX
+	SHRQ $51,DX
+	ANDQ SI,R9
+	ADDQ BX,DX
+	MOVQ DX,R10
+	SHRQ $51,DX
+	ANDQ SI,AX
+	IMUL3Q $19,DX,DX
+	ADDQ DX,R8
+	ANDQ SI,R10
+	MOVQ R8,0(DI)
+	MOVQ CX,8(DI)
+	MOVQ R9,16(DI)
+	MOVQ AX,24(DI)
+	MOVQ R10,32(DI)
+	RET
+
+// func square(out, in *[5]uint64)
+TEXT ·square(SB),7,$0-16
+	MOVQ out+0(FP), DI
+	MOVQ in+8(FP), SI
+
+	MOVQ 0(SI),AX
+	MULQ 0(SI)
+	MOVQ AX,CX
+	MOVQ DX,R8
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 8(SI)
+	MOVQ AX,R9
+	MOVQ DX,R10
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 16(SI)
+	MOVQ AX,R11
+	MOVQ DX,R12
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 24(SI)
+	MOVQ AX,R13
+	MOVQ DX,R14
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 32(SI)
+	MOVQ AX,R15
+	MOVQ DX,BX
+	MOVQ 8(SI),AX
+	MULQ 8(SI)
+	ADDQ AX,R11
+	ADCQ DX,R12
+	MOVQ 8(SI),AX
+	SHLQ $1,AX
+	MULQ 16(SI)
+	ADDQ AX,R13
+	ADCQ DX,R14
+	MOVQ 8(SI),AX
+	SHLQ $1,AX
+	MULQ 24(SI)
+	ADDQ AX,R15
+	ADCQ DX,BX
+	MOVQ 8(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,CX
+	ADCQ DX,R8
+	MOVQ 16(SI),AX
+	MULQ 16(SI)
+	ADDQ AX,R15
+	ADCQ DX,BX
+	MOVQ 16(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 24(SI)
+	ADDQ AX,CX
+	ADCQ DX,R8
+	MOVQ 16(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R9
+	ADCQ DX,R10
+	MOVQ 24(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 24(SI)
+	ADDQ AX,R9
+	ADCQ DX,R10
+	MOVQ 24(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R11
+	ADCQ DX,R12
+	MOVQ 32(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R13
+	ADCQ DX,R14
+	MOVQ $REDMASK51,SI
+	SHLQ $13,CX,R8
+	ANDQ SI,CX
+	SHLQ $13,R9,R10
+	ANDQ SI,R9
+	ADDQ R8,R9
+	SHLQ $13,R11,R12
+	ANDQ SI,R11
+	ADDQ R10,R11
+	SHLQ $13,R13,R14
+	ANDQ SI,R13
+	ADDQ R12,R13
+	SHLQ $13,R15,BX
+	ANDQ SI,R15
+	ADDQ R14,R15
+	IMUL3Q $19,BX,DX
+	ADDQ DX,CX
+	MOVQ CX,DX
+	SHRQ $51,DX
+	ADDQ R9,DX
+	ANDQ SI,CX
+	MOVQ DX,R8
+	SHRQ $51,DX
+	ADDQ R11,DX
+	ANDQ SI,R8
+	MOVQ DX,R9
+	SHRQ $51,DX
+	ADDQ R13,DX
+	ANDQ SI,R9
+	MOVQ DX,AX
+	SHRQ $51,DX
+	ADDQ R15,DX
+	ANDQ SI,AX
+	MOVQ DX,R10
+	SHRQ $51,DX
+	IMUL3Q $19,DX,DX
+	ADDQ DX,CX
+	ANDQ SI,R10
+	MOVQ CX,0(DI)
+	MOVQ R8,8(DI)
+	MOVQ R9,16(DI)
+	MOVQ AX,24(DI)
+	MOVQ R10,32(DI)
+	RET
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go
@ -0,0 +1,828 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package curve25519
+
+import "encoding/binary"
+
+// This code is a port of the public domain, "ref10" implementation of
+// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
+
+// fieldElement represents an element of the field GF(2^255 - 19). An element
+// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
+// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
+// context.
+type fieldElement [10]int32
+
+func feZero(fe *fieldElement) {
+	for i := range fe {
+		fe[i] = 0
+	}
+}
+
+func feOne(fe *fieldElement) {
+	feZero(fe)
+	fe[0] = 1
+}
+
+func feAdd(dst, a, b *fieldElement) {
+	for i := range dst {
+		dst[i] = a[i] + b[i]
+	}
+}
+
+func feSub(dst, a, b *fieldElement) {
+	for i := range dst {
+		dst[i] = a[i] - b[i]
+	}
+}
+
+func feCopy(dst, src *fieldElement) {
+	for i := range dst {
+		dst[i] = src[i]
+	}
+}
+
+// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
+//
+// Preconditions: b in {0,1}.
+func feCSwap(f, g *fieldElement, b int32) {
+	b = -b
+	for i := range f {
+		t := b & (f[i] ^ g[i])
+		f[i] ^= t
+		g[i] ^= t
+	}
+}
+
+// load3 reads a 24-bit, little-endian value from in.
+func load3(in []byte) int64 {
+	var r int64
+	r = int64(in[0])
+	r |= int64(in[1]) << 8
+	r |= int64(in[2]) << 16
+	return r
+}
+
+// load4 reads a 32-bit, little-endian value from in.
+func load4(in []byte) int64 {
+	return int64(binary.LittleEndian.Uint32(in))
+}
+
+func feFromBytes(dst *fieldElement, src *[32]byte) {
+	h0 := load4(src[:])
+	h1 := load3(src[4:]) << 6
+	h2 := load3(src[7:]) << 5
+	h3 := load3(src[10:]) << 3
+	h4 := load3(src[13:]) << 2
+	h5 := load4(src[16:])
+	h6 := load3(src[20:]) << 7
+	h7 := load3(src[23:]) << 5
+	h8 := load3(src[26:]) << 4
+	h9 := (load3(src[29:]) & 0x7fffff) << 2
+
+	var carry [10]int64
+	carry[9] = (h9 + 1<<24) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	carry[1] = (h1 + 1<<24) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[3] = (h3 + 1<<24) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[5] = (h5 + 1<<24) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	carry[7] = (h7 + 1<<24) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[0] = (h0 + 1<<25) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[2] = (h2 + 1<<25) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[4] = (h4 + 1<<25) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[6] = (h6 + 1<<25) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	carry[8] = (h8 + 1<<25) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	dst[0] = int32(h0)
+	dst[1] = int32(h1)
+	dst[2] = int32(h2)
+	dst[3] = int32(h3)
+	dst[4] = int32(h4)
+	dst[5] = int32(h5)
+	dst[6] = int32(h6)
+	dst[7] = int32(h7)
+	dst[8] = int32(h8)
+	dst[9] = int32(h9)
+}
+
+// feToBytes marshals h to s.
+// Preconditions:
+//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+//
+// Write p=2^255-19; q=floor(h/p).
+// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
+//
+// Proof:
+//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
+//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
+//
+//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
+//   Then 0<y<1.
+//
+//   Write r=h-pq.
+//   Have 0<=r<=p-1=2^255-20.
+//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
+//
+//   Write x=r+19(2^-255)r+y.
+//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
+//
+//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
+//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
+func feToBytes(s *[32]byte, h *fieldElement) {
+	var carry [10]int32
+
+	q := (19*h[9] + (1 << 24)) >> 25
+	q = (h[0] + q) >> 26
+	q = (h[1] + q) >> 25
+	q = (h[2] + q) >> 26
+	q = (h[3] + q) >> 25
+	q = (h[4] + q) >> 26
+	q = (h[5] + q) >> 25
+	q = (h[6] + q) >> 26
+	q = (h[7] + q) >> 25
+	q = (h[8] + q) >> 26
+	q = (h[9] + q) >> 25
+
+	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
+	h[0] += 19 * q
+	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
+
+	carry[0] = h[0] >> 26
+	h[1] += carry[0]
+	h[0] -= carry[0] << 26
+	carry[1] = h[1] >> 25
+	h[2] += carry[1]
+	h[1] -= carry[1] << 25
+	carry[2] = h[2] >> 26
+	h[3] += carry[2]
+	h[2] -= carry[2] << 26
+	carry[3] = h[3] >> 25
+	h[4] += carry[3]
+	h[3] -= carry[3] << 25
+	carry[4] = h[4] >> 26
+	h[5] += carry[4]
+	h[4] -= carry[4] << 26
+	carry[5] = h[5] >> 25
+	h[6] += carry[5]
+	h[5] -= carry[5] << 25
+	carry[6] = h[6] >> 26
+	h[7] += carry[6]
+	h[6] -= carry[6] << 26
+	carry[7] = h[7] >> 25
+	h[8] += carry[7]
+	h[7] -= carry[7] << 25
+	carry[8] = h[8] >> 26
+	h[9] += carry[8]
+	h[8] -= carry[8] << 26
+	carry[9] = h[9] >> 25
+	h[9] -= carry[9] << 25
+	// h10 = carry9
+
+	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
+	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
+	// evidently 2^255 h10-2^255 q = 0.
+	// Goal: Output h[0]+...+2^230 h[9].
+
+	s[0] = byte(h[0] >> 0)
+	s[1] = byte(h[0] >> 8)
+	s[2] = byte(h[0] >> 16)
+	s[3] = byte((h[0] >> 24) | (h[1] << 2))
+	s[4] = byte(h[1] >> 6)
+	s[5] = byte(h[1] >> 14)
+	s[6] = byte((h[1] >> 22) | (h[2] << 3))
+	s[7] = byte(h[2] >> 5)
+	s[8] = byte(h[2] >> 13)
+	s[9] = byte((h[2] >> 21) | (h[3] << 5))
+	s[10] = byte(h[3] >> 3)
+	s[11] = byte(h[3] >> 11)
+	s[12] = byte((h[3] >> 19) | (h[4] << 6))
+	s[13] = byte(h[4] >> 2)
+	s[14] = byte(h[4] >> 10)
+	s[15] = byte(h[4] >> 18)
+	s[16] = byte(h[5] >> 0)
+	s[17] = byte(h[5] >> 8)
+	s[18] = byte(h[5] >> 16)
+	s[19] = byte((h[5] >> 24) | (h[6] << 1))
+	s[20] = byte(h[6] >> 7)
+	s[21] = byte(h[6] >> 15)
+	s[22] = byte((h[6] >> 23) | (h[7] << 3))
+	s[23] = byte(h[7] >> 5)
+	s[24] = byte(h[7] >> 13)
+	s[25] = byte((h[7] >> 21) | (h[8] << 4))
+	s[26] = byte(h[8] >> 4)
+	s[27] = byte(h[8] >> 12)
+	s[28] = byte((h[8] >> 20) | (h[9] << 6))
+	s[29] = byte(h[9] >> 2)
+	s[30] = byte(h[9] >> 10)
+	s[31] = byte(h[9] >> 18)
+}
+
+// feMul calculates h = f * g
+// Can overlap h with f or g.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+//
+// Notes on implementation strategy:
+//
+// Using schoolbook multiplication.
+// Karatsuba would save a little in some cost models.
+//
+// Most multiplications by 2 and 19 are 32-bit precomputations;
+// cheaper than 64-bit postcomputations.
+//
+// There is one remaining multiplication by 19 in the carry chain;
+// one *19 precomputation can be merged into this,
+// but the resulting data flow is considerably less clean.
+//
+// There are 12 carries below.
+// 10 of them are 2-way parallelizable and vectorizable.
+// Can get away with 11 carries, but then data flow is much deeper.
+//
+// With tighter constraints on inputs can squeeze carries into int32.
+func feMul(h, f, g *fieldElement) {
+	f0 := f[0]
+	f1 := f[1]
+	f2 := f[2]
+	f3 := f[3]
+	f4 := f[4]
+	f5 := f[5]
+	f6 := f[6]
+	f7 := f[7]
+	f8 := f[8]
+	f9 := f[9]
+	g0 := g[0]
+	g1 := g[1]
+	g2 := g[2]
+	g3 := g[3]
+	g4 := g[4]
+	g5 := g[5]
+	g6 := g[6]
+	g7 := g[7]
+	g8 := g[8]
+	g9 := g[9]
+	g1_19 := 19 * g1 // 1.4*2^29
+	g2_19 := 19 * g2 // 1.4*2^30; still ok
+	g3_19 := 19 * g3
+	g4_19 := 19 * g4
+	g5_19 := 19 * g5
+	g6_19 := 19 * g6
+	g7_19 := 19 * g7
+	g8_19 := 19 * g8
+	g9_19 := 19 * g9
+	f1_2 := 2 * f1
+	f3_2 := 2 * f3
+	f5_2 := 2 * f5
+	f7_2 := 2 * f7
+	f9_2 := 2 * f9
+	f0g0 := int64(f0) * int64(g0)
+	f0g1 := int64(f0) * int64(g1)
+	f0g2 := int64(f0) * int64(g2)
+	f0g3 := int64(f0) * int64(g3)
+	f0g4 := int64(f0) * int64(g4)
+	f0g5 := int64(f0) * int64(g5)
+	f0g6 := int64(f0) * int64(g6)
+	f0g7 := int64(f0) * int64(g7)
+	f0g8 := int64(f0) * int64(g8)
+	f0g9 := int64(f0) * int64(g9)
+	f1g0 := int64(f1) * int64(g0)
+	f1g1_2 := int64(f1_2) * int64(g1)
+	f1g2 := int64(f1) * int64(g2)
+	f1g3_2 := int64(f1_2) * int64(g3)
+	f1g4 := int64(f1) * int64(g4)
+	f1g5_2 := int64(f1_2) * int64(g5)
+	f1g6 := int64(f1) * int64(g6)
+	f1g7_2 := int64(f1_2) * int64(g7)
+	f1g8 := int64(f1) * int64(g8)
+	f1g9_38 := int64(f1_2) * int64(g9_19)
+	f2g0 := int64(f2) * int64(g0)
+	f2g1 := int64(f2) * int64(g1)
+	f2g2 := int64(f2) * int64(g2)
+	f2g3 := int64(f2) * int64(g3)
+	f2g4 := int64(f2) * int64(g4)
+	f2g5 := int64(f2) * int64(g5)
+	f2g6 := int64(f2) * int64(g6)
+	f2g7 := int64(f2) * int64(g7)
+	f2g8_19 := int64(f2) * int64(g8_19)
+	f2g9_19 := int64(f2) * int64(g9_19)
+	f3g0 := int64(f3) * int64(g0)
+	f3g1_2 := int64(f3_2) * int64(g1)
+	f3g2 := int64(f3) * int64(g2)
+	f3g3_2 := int64(f3_2) * int64(g3)
+	f3g4 := int64(f3) * int64(g4)
+	f3g5_2 := int64(f3_2) * int64(g5)
+	f3g6 := int64(f3) * int64(g6)
+	f3g7_38 := int64(f3_2) * int64(g7_19)
+	f3g8_19 := int64(f3) * int64(g8_19)
+	f3g9_38 := int64(f3_2) * int64(g9_19)
+	f4g0 := int64(f4) * int64(g0)
+	f4g1 := int64(f4) * int64(g1)
+	f4g2 := int64(f4) * int64(g2)
+	f4g3 := int64(f4) * int64(g3)
+	f4g4 := int64(f4) * int64(g4)
+	f4g5 := int64(f4) * int64(g5)
+	f4g6_19 := int64(f4) * int64(g6_19)
+	f4g7_19 := int64(f4) * int64(g7_19)
+	f4g8_19 := int64(f4) * int64(g8_19)
+	f4g9_19 := int64(f4) * int64(g9_19)
+	f5g0 := int64(f5) * int64(g0)
+	f5g1_2 := int64(f5_2) * int64(g1)
+	f5g2 := int64(f5) * int64(g2)
+	f5g3_2 := int64(f5_2) * int64(g3)
+	f5g4 := int64(f5) * int64(g4)
+	f5g5_38 := int64(f5_2) * int64(g5_19)
+	f5g6_19 := int64(f5) * int64(g6_19)
+	f5g7_38 := int64(f5_2) * int64(g7_19)
+	f5g8_19 := int64(f5) * int64(g8_19)
+	f5g9_38 := int64(f5_2) * int64(g9_19)
+	f6g0 := int64(f6) * int64(g0)
+	f6g1 := int64(f6) * int64(g1)
+	f6g2 := int64(f6) * int64(g2)
+	f6g3 := int64(f6) * int64(g3)
+	f6g4_19 := int64(f6) * int64(g4_19)
+	f6g5_19 := int64(f6) * int64(g5_19)
+	f6g6_19 := int64(f6) * int64(g6_19)
+	f6g7_19 := int64(f6) * int64(g7_19)
+	f6g8_19 := int64(f6) * int64(g8_19)
+	f6g9_19 := int64(f6) * int64(g9_19)
+	f7g0 := int64(f7) * int64(g0)
+	f7g1_2 := int64(f7_2) * int64(g1)
+	f7g2 := int64(f7) * int64(g2)
+	f7g3_38 := int64(f7_2) * int64(g3_19)
+	f7g4_19 := int64(f7) * int64(g4_19)
+	f7g5_38 := int64(f7_2) * int64(g5_19)
+	f7g6_19 := int64(f7) * int64(g6_19)
+	f7g7_38 := int64(f7_2) * int64(g7_19)
+	f7g8_19 := int64(f7) * int64(g8_19)
+	f7g9_38 := int64(f7_2) * int64(g9_19)
+	f8g0 := int64(f8) * int64(g0)
+	f8g1 := int64(f8) * int64(g1)
+	f8g2_19 := int64(f8) * int64(g2_19)
+	f8g3_19 := int64(f8) * int64(g3_19)
+	f8g4_19 := int64(f8) * int64(g4_19)
+	f8g5_19 := int64(f8) * int64(g5_19)
+	f8g6_19 := int64(f8) * int64(g6_19)
+	f8g7_19 := int64(f8) * int64(g7_19)
+	f8g8_19 := int64(f8) * int64(g8_19)
+	f8g9_19 := int64(f8) * int64(g9_19)
+	f9g0 := int64(f9) * int64(g0)
+	f9g1_38 := int64(f9_2) * int64(g1_19)
+	f9g2_19 := int64(f9) * int64(g2_19)
+	f9g3_38 := int64(f9_2) * int64(g3_19)
+	f9g4_19 := int64(f9) * int64(g4_19)
+	f9g5_38 := int64(f9_2) * int64(g5_19)
+	f9g6_19 := int64(f9) * int64(g6_19)
+	f9g7_38 := int64(f9_2) * int64(g7_19)
+	f9g8_19 := int64(f9) * int64(g8_19)
+	f9g9_38 := int64(f9_2) * int64(g9_19)
+	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
+	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
+	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
+	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
+	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
+	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
+	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
+	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
+	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
+	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
+	var carry [10]int64
+
+	// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
+	//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
+	// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
+	//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	// |h0| <= 2^25
+	// |h4| <= 2^25
+	// |h1| <= 1.51*2^58
+	// |h5| <= 1.51*2^58
+
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	// |h1| <= 2^24; from now on fits into int32
+	// |h5| <= 2^24; from now on fits into int32
+	// |h2| <= 1.21*2^59
+	// |h6| <= 1.21*2^59
+
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	// |h2| <= 2^25; from now on fits into int32 unchanged
+	// |h6| <= 2^25; from now on fits into int32 unchanged
+	// |h3| <= 1.51*2^58
+	// |h7| <= 1.51*2^58
+
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+	// |h3| <= 2^24; from now on fits into int32 unchanged
+	// |h7| <= 2^24; from now on fits into int32 unchanged
+	// |h4| <= 1.52*2^33
+	// |h8| <= 1.52*2^33
+
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+	// |h4| <= 2^25; from now on fits into int32 unchanged
+	// |h8| <= 2^25; from now on fits into int32 unchanged
+	// |h5| <= 1.01*2^24
+	// |h9| <= 1.51*2^58
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	// |h9| <= 2^24; from now on fits into int32 unchanged
+	// |h0| <= 1.8*2^37
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	// |h0| <= 2^25; from now on fits into int32 unchanged
+	// |h1| <= 1.01*2^24
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feSquare calculates h = f*f. Can overlap h with f.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+func feSquare(h, f *fieldElement) {
+	f0 := f[0]
+	f1 := f[1]
+	f2 := f[2]
+	f3 := f[3]
+	f4 := f[4]
+	f5 := f[5]
+	f6 := f[6]
+	f7 := f[7]
+	f8 := f[8]
+	f9 := f[9]
+	f0_2 := 2 * f0
+	f1_2 := 2 * f1
+	f2_2 := 2 * f2
+	f3_2 := 2 * f3
+	f4_2 := 2 * f4
+	f5_2 := 2 * f5
+	f6_2 := 2 * f6
+	f7_2 := 2 * f7
+	f5_38 := 38 * f5 // 1.31*2^30
+	f6_19 := 19 * f6 // 1.31*2^30
+	f7_38 := 38 * f7 // 1.31*2^30
+	f8_19 := 19 * f8 // 1.31*2^30
+	f9_38 := 38 * f9 // 1.31*2^30
+	f0f0 := int64(f0) * int64(f0)
+	f0f1_2 := int64(f0_2) * int64(f1)
+	f0f2_2 := int64(f0_2) * int64(f2)
+	f0f3_2 := int64(f0_2) * int64(f3)
+	f0f4_2 := int64(f0_2) * int64(f4)
+	f0f5_2 := int64(f0_2) * int64(f5)
+	f0f6_2 := int64(f0_2) * int64(f6)
+	f0f7_2 := int64(f0_2) * int64(f7)
+	f0f8_2 := int64(f0_2) * int64(f8)
+	f0f9_2 := int64(f0_2) * int64(f9)
+	f1f1_2 := int64(f1_2) * int64(f1)
+	f1f2_2 := int64(f1_2) * int64(f2)
+	f1f3_4 := int64(f1_2) * int64(f3_2)
+	f1f4_2 := int64(f1_2) * int64(f4)
+	f1f5_4 := int64(f1_2) * int64(f5_2)
+	f1f6_2 := int64(f1_2) * int64(f6)
+	f1f7_4 := int64(f1_2) * int64(f7_2)
+	f1f8_2 := int64(f1_2) * int64(f8)
+	f1f9_76 := int64(f1_2) * int64(f9_38)
+	f2f2 := int64(f2) * int64(f2)
+	f2f3_2 := int64(f2_2) * int64(f3)
+	f2f4_2 := int64(f2_2) * int64(f4)
+	f2f5_2 := int64(f2_2) * int64(f5)
+	f2f6_2 := int64(f2_2) * int64(f6)
+	f2f7_2 := int64(f2_2) * int64(f7)
+	f2f8_38 := int64(f2_2) * int64(f8_19)
+	f2f9_38 := int64(f2) * int64(f9_38)
+	f3f3_2 := int64(f3_2) * int64(f3)
+	f3f4_2 := int64(f3_2) * int64(f4)
+	f3f5_4 := int64(f3_2) * int64(f5_2)
+	f3f6_2 := int64(f3_2) * int64(f6)
+	f3f7_76 := int64(f3_2) * int64(f7_38)
+	f3f8_38 := int64(f3_2) * int64(f8_19)
+	f3f9_76 := int64(f3_2) * int64(f9_38)
+	f4f4 := int64(f4) * int64(f4)
+	f4f5_2 := int64(f4_2) * int64(f5)
+	f4f6_38 := int64(f4_2) * int64(f6_19)
+	f4f7_38 := int64(f4) * int64(f7_38)
+	f4f8_38 := int64(f4_2) * int64(f8_19)
+	f4f9_38 := int64(f4) * int64(f9_38)
+	f5f5_38 := int64(f5) * int64(f5_38)
+	f5f6_38 := int64(f5_2) * int64(f6_19)
+	f5f7_76 := int64(f5_2) * int64(f7_38)
+	f5f8_38 := int64(f5_2) * int64(f8_19)
+	f5f9_76 := int64(f5_2) * int64(f9_38)
+	f6f6_19 := int64(f6) * int64(f6_19)
+	f6f7_38 := int64(f6) * int64(f7_38)
+	f6f8_38 := int64(f6_2) * int64(f8_19)
+	f6f9_38 := int64(f6) * int64(f9_38)
+	f7f7_38 := int64(f7) * int64(f7_38)
+	f7f8_38 := int64(f7_2) * int64(f8_19)
+	f7f9_76 := int64(f7_2) * int64(f9_38)
+	f8f8_19 := int64(f8) * int64(f8_19)
+	f8f9_38 := int64(f8) * int64(f9_38)
+	f9f9_38 := int64(f9) * int64(f9_38)
+	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
+	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
+	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
+	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
+	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
+	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
+	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
+	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
+	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
+	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
+	var carry [10]int64
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feMul121666 calculates h = f * 121666. Can overlap h with f.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+func feMul121666(h, f *fieldElement) {
+	h0 := int64(f[0]) * 121666
+	h1 := int64(f[1]) * 121666
+	h2 := int64(f[2]) * 121666
+	h3 := int64(f[3]) * 121666
+	h4 := int64(f[4]) * 121666
+	h5 := int64(f[5]) * 121666
+	h6 := int64(f[6]) * 121666
+	h7 := int64(f[7]) * 121666
+	h8 := int64(f[8]) * 121666
+	h9 := int64(f[9]) * 121666
+	var carry [10]int64
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feInvert sets out = z^-1.
+func feInvert(out, z *fieldElement) {
+	var t0, t1, t2, t3 fieldElement
+	var i int
+
+	feSquare(&t0, z)
+	for i = 1; i < 1; i++ {
+		feSquare(&t0, &t0)
+	}
+	feSquare(&t1, &t0)
+	for i = 1; i < 2; i++ {
+		feSquare(&t1, &t1)
+	}
+	feMul(&t1, z, &t1)
+	feMul(&t0, &t0, &t1)
+	feSquare(&t2, &t0)
+	for i = 1; i < 1; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t1, &t2)
+	feSquare(&t2, &t1)
+	for i = 1; i < 5; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t2, &t1)
+	for i = 1; i < 10; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t2, &t2, &t1)
+	feSquare(&t3, &t2)
+	for i = 1; i < 20; i++ {
+		feSquare(&t3, &t3)
+	}
+	feMul(&t2, &t3, &t2)
+	feSquare(&t2, &t2)
+	for i = 1; i < 10; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t2, &t1)
+	for i = 1; i < 50; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t2, &t2, &t1)
+	feSquare(&t3, &t2)
+	for i = 1; i < 100; i++ {
+		feSquare(&t3, &t3)
+	}
+	feMul(&t2, &t3, &t2)
+	feSquare(&t2, &t2)
+	for i = 1; i < 50; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t1, &t1)
+	for i = 1; i < 5; i++ {
+		feSquare(&t1, &t1)
+	}
+	feMul(out, &t1, &t0)
+}
+
+func scalarMultGeneric(out, in, base *[32]byte) {
+	var e [32]byte
+
+	copy(e[:], in[:])
+	e[0] &= 248
+	e[31] &= 127
+	e[31] |= 64
+
+	var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
+	feFromBytes(&x1, base)
+	feOne(&x2)
+	feCopy(&x3, &x1)
+	feOne(&z3)
+
+	swap := int32(0)
+	for pos := 254; pos >= 0; pos-- {
+		b := e[pos/8] >> uint(pos&7)
+		b &= 1
+		swap ^= int32(b)
+		feCSwap(&x2, &x3, swap)
+		feCSwap(&z2, &z3, swap)
+		swap = int32(b)
+
+		feSub(&tmp0, &x3, &z3)
+		feSub(&tmp1, &x2, &z2)
+		feAdd(&x2, &x2, &z2)
+		feAdd(&z2, &x3, &z3)
+		feMul(&z3, &tmp0, &x2)
+		feMul(&z2, &z2, &tmp1)
+		feSquare(&tmp0, &tmp1)
+		feSquare(&tmp1, &x2)
+		feAdd(&x3, &z3, &z2)
+		feSub(&z2, &z3, &z2)
+		feMul(&x2, &tmp1, &tmp0)
+		feSub(&tmp1, &tmp1, &tmp0)
+		feSquare(&z2, &z2)
+		feMul121666(&z3, &tmp1)
+		feSquare(&x3, &x3)
+		feAdd(&tmp0, &tmp0, &z3)
+		feMul(&z3, &x1, &z2)
+		feMul(&z2, &tmp1, &tmp0)
+	}
+
+	feCSwap(&x2, &x3, swap)
+	feCSwap(&z2, &z3, swap)
+
+	feInvert(&z2, &z2)
+	feMul(&x2, &x2, &z2)
+	feToBytes(out, &x2)
+}
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go
@ -0,0 +1,11 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 gccgo appengine purego
+
+package curve25519
+
+func scalarMult(out, in, base *[32]byte) {
+	scalarMultGeneric(out, in, base)
+}
--- a/vendor/golang.org/x/crypto/curve25519/doc.go
+++ b/vendor/golang.org/x/crypto/curve25519/doc.go
@ -1,23 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package curve25519 provides an implementation of scalar multiplication on
-// the elliptic curve known as curve25519. See https://cr.yp.to/ecdh.html
-package curve25519 // import "golang.org/x/crypto/curve25519"
-
-// basePoint is the x coordinate of the generator of the curve.
-var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
-
-// ScalarMult sets dst to the product in*base where dst and base are the x
-// coordinates of group points and all values are in little-endian form.
-func ScalarMult(dst, in, base *[32]byte) {
-	scalarMult(dst, in, base)
-}
-
-// ScalarBaseMult sets dst to the product in*base where dst and base are the x
-// coordinates of group points, base is the standard generator and all values
-// are in little-endian form.
-func ScalarBaseMult(dst, in *[32]byte) {
-	ScalarMult(dst, in, &basePoint)
-}
--- a/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s
@ -1,73 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func freeze(inout *[5]uint64)
-TEXT ·freeze(SB),7,$0-8
-	MOVQ inout+0(FP), DI
-
-	MOVQ 0(DI),SI
-	MOVQ 8(DI),DX
-	MOVQ 16(DI),CX
-	MOVQ 24(DI),R8
-	MOVQ 32(DI),R9
-	MOVQ $REDMASK51,AX
-	MOVQ AX,R10
-	SUBQ $18,R10
-	MOVQ $3,R11
-REDUCELOOP:
-	MOVQ SI,R12
-	SHRQ $51,R12
-	ANDQ AX,SI
-	ADDQ R12,DX
-	MOVQ DX,R12
-	SHRQ $51,R12
-	ANDQ AX,DX
-	ADDQ R12,CX
-	MOVQ CX,R12
-	SHRQ $51,R12
-	ANDQ AX,CX
-	ADDQ R12,R8
-	MOVQ R8,R12
-	SHRQ $51,R12
-	ANDQ AX,R8
-	ADDQ R12,R9
-	MOVQ R9,R12
-	SHRQ $51,R12
-	ANDQ AX,R9
-	IMUL3Q $19,R12,R12
-	ADDQ R12,SI
-	SUBQ $1,R11
-	JA REDUCELOOP
-	MOVQ $1,R12
-	CMPQ R10,SI
-	CMOVQLT R11,R12
-	CMPQ AX,DX
-	CMOVQNE R11,R12
-	CMPQ AX,CX
-	CMOVQNE R11,R12
-	CMPQ AX,R8
-	CMOVQNE R11,R12
-	CMPQ AX,R9
-	CMOVQNE R11,R12
-	NEGQ R12
-	ANDQ R12,AX
-	ANDQ R12,R10
-	SUBQ R10,SI
-	SUBQ AX,DX
-	SUBQ AX,CX
-	SUBQ AX,R8
-	SUBQ AX,R9
-	MOVQ SI,0(DI)
-	MOVQ DX,8(DI)
-	MOVQ CX,16(DI)
-	MOVQ R8,24(DI)
-	MOVQ R9,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/mul_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/mul_amd64.s
@ -1,169 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func mul(dest, a, b *[5]uint64)
-TEXT ·mul(SB),0,$16-24
-	MOVQ dest+0(FP), DI
-	MOVQ a+8(FP), SI
-	MOVQ b+16(FP), DX
-
-	MOVQ DX,CX
-	MOVQ 24(SI),DX
-	IMUL3Q $19,DX,AX
-	MOVQ AX,0(SP)
-	MULQ 16(CX)
-	MOVQ AX,R8
-	MOVQ DX,R9
-	MOVQ 32(SI),DX
-	IMUL3Q $19,DX,AX
-	MOVQ AX,8(SP)
-	MULQ 8(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 0(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 0(SI),AX
-	MULQ 8(CX)
-	MOVQ AX,R10
-	MOVQ DX,R11
-	MOVQ 0(SI),AX
-	MULQ 16(CX)
-	MOVQ AX,R12
-	MOVQ DX,R13
-	MOVQ 0(SI),AX
-	MULQ 24(CX)
-	MOVQ AX,R14
-	MOVQ DX,R15
-	MOVQ 0(SI),AX
-	MULQ 32(CX)
-	MOVQ AX,BX
-	MOVQ DX,BP
-	MOVQ 8(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 8(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 8(SI),AX
-	MULQ 16(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 8(SI),AX
-	MULQ 24(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 8(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 16(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 16(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 16(SI),AX
-	MULQ 16(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 16(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 24(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 16(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 24(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 24(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 0(SP),AX
-	MULQ 24(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 0(SP),AX
-	MULQ 32(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 32(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 8(SP),AX
-	MULQ 16(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 8(SP),AX
-	MULQ 24(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 8(SP),AX
-	MULQ 32(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ $REDMASK51,SI
-	SHLQ $13,R8,R9
-	ANDQ SI,R8
-	SHLQ $13,R10,R11
-	ANDQ SI,R10
-	ADDQ R9,R10
-	SHLQ $13,R12,R13
-	ANDQ SI,R12
-	ADDQ R11,R12
-	SHLQ $13,R14,R15
-	ANDQ SI,R14
-	ADDQ R13,R14
-	SHLQ $13,BX,BP
-	ANDQ SI,BX
-	ADDQ R15,BX
-	IMUL3Q $19,BP,DX
-	ADDQ DX,R8
-	MOVQ R8,DX
-	SHRQ $51,DX
-	ADDQ R10,DX
-	MOVQ DX,CX
-	SHRQ $51,DX
-	ANDQ SI,R8
-	ADDQ R12,DX
-	MOVQ DX,R9
-	SHRQ $51,DX
-	ANDQ SI,CX
-	ADDQ R14,DX
-	MOVQ DX,AX
-	SHRQ $51,DX
-	ANDQ SI,R9
-	ADDQ BX,DX
-	MOVQ DX,R10
-	SHRQ $51,DX
-	ANDQ SI,AX
-	IMUL3Q $19,DX,DX
-	ADDQ DX,R8
-	ANDQ SI,R10
-	MOVQ R8,0(DI)
-	MOVQ CX,8(DI)
-	MOVQ R9,16(DI)
-	MOVQ AX,24(DI)
-	MOVQ R10,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/square_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/square_amd64.s
@ -1,132 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func square(out, in *[5]uint64)
-TEXT ·square(SB),7,$0-16
-	MOVQ out+0(FP), DI
-	MOVQ in+8(FP), SI
-
-	MOVQ 0(SI),AX
-	MULQ 0(SI)
-	MOVQ AX,CX
-	MOVQ DX,R8
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 8(SI)
-	MOVQ AX,R9
-	MOVQ DX,R10
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 16(SI)
-	MOVQ AX,R11
-	MOVQ DX,R12
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 24(SI)
-	MOVQ AX,R13
-	MOVQ DX,R14
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 32(SI)
-	MOVQ AX,R15
-	MOVQ DX,BX
-	MOVQ 8(SI),AX
-	MULQ 8(SI)
-	ADDQ AX,R11
-	ADCQ DX,R12
-	MOVQ 8(SI),AX
-	SHLQ $1,AX
-	MULQ 16(SI)
-	ADDQ AX,R13
-	ADCQ DX,R14
-	MOVQ 8(SI),AX
-	SHLQ $1,AX
-	MULQ 24(SI)
-	ADDQ AX,R15
-	ADCQ DX,BX
-	MOVQ 8(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,CX
-	ADCQ DX,R8
-	MOVQ 16(SI),AX
-	MULQ 16(SI)
-	ADDQ AX,R15
-	ADCQ DX,BX
-	MOVQ 16(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 24(SI)
-	ADDQ AX,CX
-	ADCQ DX,R8
-	MOVQ 16(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R9
-	ADCQ DX,R10
-	MOVQ 24(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 24(SI)
-	ADDQ AX,R9
-	ADCQ DX,R10
-	MOVQ 24(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R11
-	ADCQ DX,R12
-	MOVQ 32(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R13
-	ADCQ DX,R14
-	MOVQ $REDMASK51,SI
-	SHLQ $13,CX,R8
-	ANDQ SI,CX
-	SHLQ $13,R9,R10
-	ANDQ SI,R9
-	ADDQ R8,R9
-	SHLQ $13,R11,R12
-	ANDQ SI,R11
-	ADDQ R10,R11
-	SHLQ $13,R13,R14
-	ANDQ SI,R13
-	ADDQ R12,R13
-	SHLQ $13,R15,BX
-	ANDQ SI,R15
-	ADDQ R14,R15
-	IMUL3Q $19,BX,DX
-	ADDQ DX,CX
-	MOVQ CX,DX
-	SHRQ $51,DX
-	ADDQ R9,DX
-	ANDQ SI,CX
-	MOVQ DX,R8
-	SHRQ $51,DX
-	ADDQ R11,DX
-	ANDQ SI,R8
-	MOVQ DX,R9
-	SHRQ $51,DX
-	ADDQ R13,DX
-	ANDQ SI,R9
-	MOVQ DX,AX
-	SHRQ $51,DX
-	ADDQ R15,DX
-	ANDQ SI,AX
-	MOVQ DX,R10
-	SHRQ $51,DX
-	IMUL3Q $19,DX,DX
-	ADDQ DX,CX
-	ANDQ SI,R10
-	MOVQ CX,0(DI)
-	MOVQ R8,8(DI)
-	MOVQ R9,16(DI)
-	MOVQ AX,24(DI)
-	MOVQ R10,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/poly1305/bits_compat.go
+++ b/vendor/golang.org/x/crypto/poly1305/bits_compat.go
@ -0,0 +1,39 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !go1.13
+
+package poly1305
+
+// Generic fallbacks for the math/bits intrinsics, copied from
+// src/math/bits/bits.go. They were added in Go 1.12, but Add64 and Sum64 had
+// variable time fallbacks until Go 1.13.
+
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
+	sum = x + y + carry
+	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
+	return
+}
+
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	diff = x - y - borrow
+	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
+	return
+}
+
+func bitsMul64(x, y uint64) (hi, lo uint64) {
+	const mask32 = 1<<32 - 1
+	x0 := x & mask32
+	x1 := x >> 32
+	y0 := y & mask32
+	y1 := y >> 32
+	w0 := x0 * y0
+	t := x1*y0 + w0>>32
+	w1 := t & mask32
+	w2 := t >> 32
+	w1 += x0 * y1
+	hi = x1*y1 + w2 + w1>>32
+	lo = x * y
+	return
+}
--- a/vendor/golang.org/x/crypto/poly1305/bits_go1.13.go
+++ b/vendor/golang.org/x/crypto/poly1305/bits_go1.13.go
@ -0,0 +1,21 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.13
+
+package poly1305
+
+import "math/bits"
+
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
+	return bits.Add64(x, y, carry)
+}
+
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	return bits.Sub64(x, y, borrow)
+}
+
+func bitsMul64(x, y uint64) (hi, lo uint64) {
+	return bits.Mul64(x, y)
+}
--- a/vendor/golang.org/x/crypto/poly1305/poly1305.go
+++ b/vendor/golang.org/x/crypto/poly1305/poly1305.go
@ -22,8 +22,14 @@ import "crypto/subtle"
 // TagSize is the size, in bytes, of a poly1305 authenticator.
 const TagSize = 16

-// Verify returns true if mac is a valid authenticator for m with the given
-// key.
+// Sum generates an authenticator for msg using a one-time key and puts the
+// 16-byte result into out. Authenticating two different messages with the same
+// key allows an attacker to forge messages at will.
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
+	sum(out, m, key)
+}
+
+// Verify returns true if mac is a valid authenticator for m with the given key.
 func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
 	var tmp [16]byte
 	Sum(&tmp, m, key)
--- a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go
@ -7,62 +7,52 @@
 package poly1305

 //go:noescape
-func initialize(state *[7]uint64, key *[32]byte)
+func update(state *macState, msg []byte)

-//go:noescape
-func update(state *[7]uint64, msg []byte)
-
-//go:noescape
-func finalize(tag *[TagSize]byte, state *[7]uint64)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
+func sum(out *[16]byte, m []byte, key *[32]byte) {
 	h := newMAC(key)
 	h.Write(m)
 	h.Sum(out)
 }

 func newMAC(key *[32]byte) (h mac) {
-	initialize(&h.state, key)
+	initialize(key, &h.r, &h.s)
 	return
 }

-type mac struct {
-	state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
+// updateGeneric to update.
+//
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
+// using function pointers would carry a major performance cost.
+type mac struct{ macGeneric }

-	buffer [TagSize]byte
-	offset int
-}
-
-func (h *mac) Write(p []byte) (n int, err error) {
-	n = len(p)
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
 	if h.offset > 0 {
-		remaining := TagSize - h.offset
-		if n < remaining {
-			h.offset += copy(h.buffer[h.offset:], p)
-			return n, nil
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
 		}
-		copy(h.buffer[h.offset:], p[:remaining])
-		p = p[remaining:]
+		p = p[n:]
 		h.offset = 0
-		update(&h.state, h.buffer[:])
+		update(&h.macState, h.buffer[:])
 	}
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
-		update(&h.state, p[:nn])
-		p = p[nn:]
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		update(&h.macState, p[:n])
+		p = p[n:]
 	}
 	if len(p) > 0 {
 		h.offset += copy(h.buffer[h.offset:], p)
 	}
-	return n, nil
+	return nn, nil
 }

 func (h *mac) Sum(out *[16]byte) {
-	state := h.state
+	state := h.macState
 	if h.offset > 0 {
 		update(&state, h.buffer[:h.offset])
 	}
-	finalize(out, &state)
+	finalize(out, &state.h, &state.s)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
@ -54,10 +54,6 @@
 	ADCQ  t3, h1;                  \
 	ADCQ  $0, h2

-DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
-DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
-GLOBL ·poly1305Mask<>(SB), RODATA, $16
-
 // func update(state *[7]uint64, msg []byte)
 TEXT ·update(SB), $0-32
 	MOVQ state+0(FP), DI
@ -110,39 +106,3 @@ done:
 	MOVQ R9, 8(DI)
 	MOVQ R10, 16(DI)
 	RET
-
-// func initialize(state *[7]uint64, key *[32]byte)
-TEXT ·initialize(SB), $0-16
-	MOVQ state+0(FP), DI
-	MOVQ key+8(FP), SI
-
-	// state[0...7] is initialized with zero
-	MOVOU 0(SI), X0
-	MOVOU 16(SI), X1
-	MOVOU ·poly1305Mask<>(SB), X2
-	PAND  X2, X0
-	MOVOU X0, 24(DI)
-	MOVOU X1, 40(DI)
-	RET
-
-// func finalize(tag *[TagSize]byte, state *[7]uint64)
-TEXT ·finalize(SB), $0-16
-	MOVQ tag+0(FP), DI
-	MOVQ state+8(FP), SI
-
-	MOVQ    0(SI), AX
-	MOVQ    8(SI), BX
-	MOVQ    16(SI), CX
-	MOVQ    AX, R8
-	MOVQ    BX, R9
-	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
-	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
-	SBBQ    $3, CX
-	CMOVQCS R8, AX
-	CMOVQCS R9, BX
-	ADDQ    40(SI), AX
-	ADCQ    48(SI), BX
-
-	MOVQ AX, 0(DI)
-	MOVQ BX, 8(DI)
-	RET
--- a/vendor/golang.org/x/crypto/poly1305/sum_arm.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.go
@ -6,14 +6,11 @@

 package poly1305

-// This function is implemented in sum_arm.s
+// poly1305_auth_armv6 is implemented in sum_arm.s
 //go:noescape
 func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)

-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
+func sum(out *[16]byte, m []byte, key *[32]byte) {
 	var mPtr *byte
 	if len(m) > 0 {
 		mPtr = &m[0]
--- a/vendor/golang.org/x/crypto/poly1305/sum_generic.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_generic.go
@ -2,18 +2,29 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// This file provides the generic implementation of Sum and MAC. Other files
+// might provide optimized assembly implementations of some of this code.
+
 package poly1305

 import "encoding/binary"

-const (
-	msgBlock   = uint32(1 << 24)
-	finalBlock = uint32(0)
-)
+// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag
+// for a 64 bytes message is approximately
+//
+//     s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r  mod  2¹³⁰ - 5
+//
+// for some secret r and s. It can be computed sequentially like
+//
+//     for len(msg) > 0:
+//         h += read(msg, 16)
+//         h *= r
+//         h %= 2¹³⁰ - 5
+//     return h + s
+//
+// All the complexity is about doing performant constant-time math on numbers
+// larger than any available numeric type.

-// sumGeneric generates an authenticator for msg using a one-time key and
-// puts the 16-byte result into out. This is the generic implementation of
-// Sum and should be called if no assembly implementation is available.
 func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
 	h := newMACGeneric(key)
 	h.Write(msg)
@ -21,152 +32,276 @@ func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
 }

 func newMACGeneric(key *[32]byte) (h macGeneric) {
-	h.r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
-	h.r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
-	h.r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
-	h.r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
-	h.r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
-
-	h.s[0] = binary.LittleEndian.Uint32(key[16:])
-	h.s[1] = binary.LittleEndian.Uint32(key[20:])
-	h.s[2] = binary.LittleEndian.Uint32(key[24:])
-	h.s[3] = binary.LittleEndian.Uint32(key[28:])
+	initialize(key, &h.r, &h.s)
 	return
 }

+// macState holds numbers in saturated 64-bit little-endian limbs. That is,
+// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸.
+type macState struct {
+	// h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but
+	// can grow larger during and after rounds.
+	h [3]uint64
+	// r and s are the private key components.
+	r [2]uint64
+	s [2]uint64
+}
+
 type macGeneric struct {
-	h, r [5]uint32
-	s    [4]uint32
+	macState

 	buffer [TagSize]byte
 	offset int
 }

-func (h *macGeneric) Write(p []byte) (n int, err error) {
-	n = len(p)
+// Write splits the incoming message into TagSize chunks, and passes them to
+// update. It buffers incomplete chunks.
+func (h *macGeneric) Write(p []byte) (int, error) {
+	nn := len(p)
 	if h.offset > 0 {
-		remaining := TagSize - h.offset
-		if n < remaining {
-			h.offset += copy(h.buffer[h.offset:], p)
-			return n, nil
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
 		}
-		copy(h.buffer[h.offset:], p[:remaining])
-		p = p[remaining:]
+		p = p[n:]
 		h.offset = 0
-		updateGeneric(h.buffer[:], msgBlock, &(h.h), &(h.r))
+		updateGeneric(&h.macState, h.buffer[:])
 	}
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
-		updateGeneric(p, msgBlock, &(h.h), &(h.r))
-		p = p[nn:]
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		updateGeneric(&h.macState, p[:n])
+		p = p[n:]
 	}
 	if len(p) > 0 {
 		h.offset += copy(h.buffer[h.offset:], p)
 	}
-	return n, nil
+	return nn, nil
 }

-func (h *macGeneric) Sum(out *[16]byte) {
-	H, R := h.h, h.r
+// Sum flushes the last incomplete chunk from the buffer, if any, and generates
+// the MAC output. It does not modify its state, in order to allow for multiple
+// calls to Sum, even if no Write is allowed after Sum.
+func (h *macGeneric) Sum(out *[TagSize]byte) {
+	state := h.macState
 	if h.offset > 0 {
-		var buffer [TagSize]byte
-		copy(buffer[:], h.buffer[:h.offset])
-		buffer[h.offset] = 1 // invariant: h.offset < TagSize
-		updateGeneric(buffer[:], finalBlock, &H, &R)
+		updateGeneric(&state, h.buffer[:h.offset])
 	}
-	finalizeGeneric(out, &H, &(h.s))
+	finalize(out, &state.h, &state.s)
 }

-func updateGeneric(msg []byte, flag uint32, h, r *[5]uint32) {
-	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
-	r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
-	R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
+// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It
+// clears some bits of the secret coefficient to make it possible to implement
+// multiplication more efficiently.
+const (
+	rMask0 = 0x0FFFFFFC0FFFFFFF
+	rMask1 = 0x0FFFFFFC0FFFFFFC
+)

-	for len(msg) >= TagSize {
-		// h += msg
-		h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
-		h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
-		h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
-		h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
-		h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
+func initialize(key *[32]byte, r, s *[2]uint64) {
+	r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
+	r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
+	s[0] = binary.LittleEndian.Uint64(key[16:24])
+	s[1] = binary.LittleEndian.Uint64(key[24:32])
+}

-		// h *= r
-		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
-		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
-		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
-		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
-		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
+// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
+// bits.Mul64 and bits.Add64 intrinsics.
+type uint128 struct {
+	lo, hi uint64
+}

-		// h %= p
-		h0 = uint32(d0) & 0x3ffffff
-		h1 = uint32(d1) & 0x3ffffff
-		h2 = uint32(d2) & 0x3ffffff
-		h3 = uint32(d3) & 0x3ffffff
-		h4 = uint32(d4) & 0x3ffffff
+func mul64(a, b uint64) uint128 {
+	hi, lo := bitsMul64(a, b)
+	return uint128{lo, hi}
+}

-		h0 += uint32(d4>>26) * 5
-		h1 += h0 >> 26
-		h0 = h0 & 0x3ffffff
+func add128(a, b uint128) uint128 {
+	lo, c := bitsAdd64(a.lo, b.lo, 0)
+	hi, c := bitsAdd64(a.hi, b.hi, c)
+	if c != 0 {
+		panic("poly1305: unexpected overflow")
+	}
+	return uint128{lo, hi}
+}

-		msg = msg[TagSize:]
+func shiftRightBy2(a uint128) uint128 {
+	a.lo = a.lo>>2 | (a.hi&3)<<62
+	a.hi = a.hi >> 2
+	return a
+}
+
+// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
+// 128 bits of message, it computes
+//
+//     h₊ = (h + m) * r  mod  2¹³⁰ - 5
+//
+// If the msg length is not a multiple of TagSize, it assumes the last
+// incomplete chunk is the final one.
+func updateGeneric(state *macState, msg []byte) {
+	h0, h1, h2 := state.h[0], state.h[1], state.h[2]
+	r0, r1 := state.r[0], state.r[1]
+
+	for len(msg) > 0 {
+		var c uint64
+
+		// For the first step, h + m, we use a chain of bits.Add64 intrinsics.
+		// The resulting value of h might exceed 2¹³⁰ - 5, but will be partially
+		// reduced at the end of the multiplication below.
+		//
+		// The spec requires us to set a bit just above the message size, not to
+		// hide leading zeroes. For full chunks, that's 1 << 128, so we can just
+		// add 1 to the most significant (2¹²⁸) limb, h2.
+		if len(msg) >= TagSize {
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
+			h2 += c + 1
+
+			msg = msg[TagSize:]
+		} else {
+			var buf [TagSize]byte
+			copy(buf[:], msg)
+			buf[len(msg)] = 1
+
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
+			h2 += c
+
+			msg = nil
+		}
+
+		// Multiplication of big number limbs is similar to elementary school
+		// columnar multiplication. Instead of digits, there are 64-bit limbs.
+		//
+		// We are multiplying a 3 limbs number, h, by a 2 limbs number, r.
+		//
+		//                        h2    h1    h0  x
+		//                              r1    r0  =
+		//                       ----------------
+		//                      h2r0  h1r0  h0r0     <-- individual 128-bit products
+		//            +   h2r1  h1r1  h0r1
+		//               ------------------------
+		//                 m3    m2    m1    m0      <-- result in 128-bit overlapping limbs
+		//               ------------------------
+		//         m3.hi m2.hi m1.hi m0.hi           <-- carry propagation
+		//     +         m3.lo m2.lo m1.lo m0.lo
+		//        -------------------------------
+		//           t4    t3    t2    t1    t0      <-- final result in 64-bit limbs
+		//
+		// The main difference from pen-and-paper multiplication is that we do
+		// carry propagation in a separate step, as if we wrote two digit sums
+		// at first (the 128-bit limbs), and then carried the tens all at once.
+
+		h0r0 := mul64(h0, r0)
+		h1r0 := mul64(h1, r0)
+		h2r0 := mul64(h2, r0)
+		h0r1 := mul64(h0, r1)
+		h1r1 := mul64(h1, r1)
+		h2r1 := mul64(h2, r1)
+
+		// Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their
+		// top 4 bits cleared by rMask{0,1}, we know that their product is not going
+		// to overflow 64 bits, so we can ignore the high part of the products.
+		//
+		// This also means that the product doesn't have a fifth limb (t4).
+		if h2r0.hi != 0 {
+			panic("poly1305: unexpected overflow")
+		}
+		if h2r1.hi != 0 {
+			panic("poly1305: unexpected overflow")
+		}
+
+		m0 := h0r0
+		m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again
+		m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1.
+		m3 := h2r1
+
+		t0 := m0.lo
+		t1, c := bitsAdd64(m1.lo, m0.hi, 0)
+		t2, c := bitsAdd64(m2.lo, m1.hi, c)
+		t3, _ := bitsAdd64(m3.lo, m2.hi, c)
+
+		// Now we have the result as 4 64-bit limbs, and we need to reduce it
+		// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do
+		// a cheap partial reduction according to the reduction identity
+		//
+		//     c * 2¹³⁰ + n  =  c * 5 + n  mod  2¹³⁰ - 5
+		//
+		// because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is
+		// likely to be larger than 2¹³⁰ - 5, but still small enough to fit the
+		// assumptions we make about h in the rest of the code.
+		//
+		// See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23
+
+		// We split the final result at the 2¹³⁰ mark into h and cc, the carry.
+		// Note that the carry bits are effectively shifted left by 2, in other
+		// words, cc = c * 4 for the c in the reduction identity.
+		h0, h1, h2 = t0, t1, t2&maskLow2Bits
+		cc := uint128{t2 & maskNotLow2Bits, t3}
+
+		// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.
+
+		h0, c = bitsAdd64(h0, cc.lo, 0)
+		h1, c = bitsAdd64(h1, cc.hi, c)
+		h2 += c
+
+		cc = shiftRightBy2(cc)
+
+		h0, c = bitsAdd64(h0, cc.lo, 0)
+		h1, c = bitsAdd64(h1, cc.hi, c)
+		h2 += c
+
+		// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most
+		//
+		//     5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1
 	}

-	h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
+	state.h[0], state.h[1], state.h[2] = h0, h1, h2
 }

-func finalizeGeneric(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
-	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
+const (
+	maskLow2Bits    uint64 = 0x0000000000000003
+	maskNotLow2Bits uint64 = ^maskLow2Bits
+)

-	// h %= p reduction
-	h2 += h1 >> 26
-	h1 &= 0x3ffffff
-	h3 += h2 >> 26
-	h2 &= 0x3ffffff
-	h4 += h3 >> 26
-	h3 &= 0x3ffffff
-	h0 += 5 * (h4 >> 26)
-	h4 &= 0x3ffffff
-	h1 += h0 >> 26
-	h0 &= 0x3ffffff
+// select64 returns x if v == 1 and y if v == 0, in constant time.
+func select64(v, x, y uint64) uint64 { return ^(v-1)&x | (v-1)&y }

-	// h - p
-	t0 := h0 + 5
-	t1 := h1 + (t0 >> 26)
-	t2 := h2 + (t1 >> 26)
-	t3 := h3 + (t2 >> 26)
-	t4 := h4 + (t3 >> 26) - (1 << 26)
-	t0 &= 0x3ffffff
-	t1 &= 0x3ffffff
-	t2 &= 0x3ffffff
-	t3 &= 0x3ffffff
+// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order.
+const (
+	p0 = 0xFFFFFFFFFFFFFFFB
+	p1 = 0xFFFFFFFFFFFFFFFF
+	p2 = 0x0000000000000003
+)

-	// select h if h < p else h - p
-	t_mask := (t4 >> 31) - 1
-	h_mask := ^t_mask
-	h0 = (h0 & h_mask) | (t0 & t_mask)
-	h1 = (h1 & h_mask) | (t1 & t_mask)
-	h2 = (h2 & h_mask) | (t2 & t_mask)
-	h3 = (h3 & h_mask) | (t3 & t_mask)
-	h4 = (h4 & h_mask) | (t4 & t_mask)
+// finalize completes the modular reduction of h and computes
+//
+//     out = h + s  mod  2¹²⁸
+//
+func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
+	h0, h1, h2 := h[0], h[1], h[2]

-	// h %= 2^128
-	h0 |= h1 << 26
-	h1 = ((h1 >> 6) | (h2 << 20))
-	h2 = ((h2 >> 12) | (h3 << 14))
-	h3 = ((h3 >> 18) | (h4 << 8))
+	// After the partial reduction in updateGeneric, h might be more than
+	// 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction
+	// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the
+	// result if the subtraction underflows, and t otherwise.

-	// s: the s part of the key
-	// tag = (h + s) % (2^128)
-	t := uint64(h0) + uint64(s[0])
-	h0 = uint32(t)
-	t = uint64(h1) + uint64(s[1]) + (t >> 32)
-	h1 = uint32(t)
-	t = uint64(h2) + uint64(s[2]) + (t >> 32)
-	h2 = uint32(t)
-	t = uint64(h3) + uint64(s[3]) + (t >> 32)
-	h3 = uint32(t)
+	hMinusP0, b := bitsSub64(h0, p0, 0)
+	hMinusP1, b := bitsSub64(h1, p1, b)
+	_, b = bitsSub64(h2, p2, b)

-	binary.LittleEndian.PutUint32(out[0:], h0)
-	binary.LittleEndian.PutUint32(out[4:], h1)
-	binary.LittleEndian.PutUint32(out[8:], h2)
-	binary.LittleEndian.PutUint32(out[12:], h3)
+	// h = h if h < p else h - p
+	h0 = select64(b, h0, hMinusP0)
+	h1 = select64(b, h1, hMinusP1)
+
+	// Finally, we compute the last Poly1305 step
+	//
+	//     tag = h + s  mod  2¹²⁸
+	//
+	// by just doing a wide addition with the 128 low bits of h and discarding
+	// the overflow.
+	h0, c := bitsAdd64(h0, s[0], 0)
+	h1, _ = bitsAdd64(h1, s[1], c)
+
+	binary.LittleEndian.PutUint64(out[0:8], h0)
+	binary.LittleEndian.PutUint64(out[8:16], h1)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_noasm.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_noasm.go
@ -6,10 +6,7 @@

 package poly1305

-// Sum generates an authenticator for msg using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
+func sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
 	h := newMAC(key)
 	h.Write(msg)
 	h.Sum(out)
--- a/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go
@ -7,62 +7,52 @@
 package poly1305

 //go:noescape
-func initialize(state *[7]uint64, key *[32]byte)
+func update(state *macState, msg []byte)

-//go:noescape
-func update(state *[7]uint64, msg []byte)
-
-//go:noescape
-func finalize(tag *[TagSize]byte, state *[7]uint64)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
+func sum(out *[16]byte, m []byte, key *[32]byte) {
 	h := newMAC(key)
 	h.Write(m)
 	h.Sum(out)
 }

 func newMAC(key *[32]byte) (h mac) {
-	initialize(&h.state, key)
+	initialize(key, &h.r, &h.s)
 	return
 }

-type mac struct {
-	state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
+// updateGeneric to update.
+//
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
+// using function pointers would carry a major performance cost.
+type mac struct{ macGeneric }

-	buffer [TagSize]byte
-	offset int
-}
-
-func (h *mac) Write(p []byte) (n int, err error) {
-	n = len(p)
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
 	if h.offset > 0 {
-		remaining := TagSize - h.offset
-		if n < remaining {
-			h.offset += copy(h.buffer[h.offset:], p)
-			return n, nil
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
 		}
-		copy(h.buffer[h.offset:], p[:remaining])
-		p = p[remaining:]
+		p = p[n:]
 		h.offset = 0
-		update(&h.state, h.buffer[:])
+		update(&h.macState, h.buffer[:])
 	}
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
-		update(&h.state, p[:nn])
-		p = p[nn:]
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		update(&h.macState, p[:n])
+		p = p[n:]
 	}
 	if len(p) > 0 {
 		h.offset += copy(h.buffer[h.offset:], p)
 	}
-	return n, nil
+	return nn, nil
 }

 func (h *mac) Sum(out *[16]byte) {
-	state := h.state
+	state := h.macState
 	if h.offset > 0 {
 		update(&state, h.buffer[:h.offset])
 	}
-	finalize(out, &state)
+	finalize(out, &state.h, &state.s)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s
@ -58,7 +58,6 @@ DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
 GLOBL ·poly1305Mask<>(SB), RODATA, $16

 // func update(state *[7]uint64, msg []byte)
-
 TEXT ·update(SB), $0-32
 	MOVD state+0(FP), R3
 	MOVD msg_base+8(FP), R4
@ -180,68 +179,3 @@ done:
 	MOVD R9, 8(R3)
 	MOVD R10, 16(R3)
 	RET
-
-// func initialize(state *[7]uint64, key *[32]byte)
-TEXT ·initialize(SB), $0-16
-	MOVD state+0(FP), R3
-	MOVD key+8(FP), R4
-
-	// state[0...7] is initialized with zero
-	// Load key
-	MOVD 0(R4), R5
-	MOVD 8(R4), R6
-	MOVD 16(R4), R7
-	MOVD 24(R4), R8
-
-	// Address of key mask
-	MOVD $·poly1305Mask<>(SB), R9
-
-	// Save original key in state
-	MOVD R7, 40(R3)
-	MOVD R8, 48(R3)
-
-	// Get mask
-	MOVD (R9), R7
-	MOVD 8(R9), R8
-
-	// And with key
-	AND R5, R7, R5
-	AND R6, R8, R6
-
-	// Save masked key in state
-	MOVD R5, 24(R3)
-	MOVD R6, 32(R3)
-	RET
-
-// func finalize(tag *[TagSize]byte, state *[7]uint64)
-TEXT ·finalize(SB), $0-16
-	MOVD tag+0(FP), R3
-	MOVD state+8(FP), R4
-
-	// Get h0, h1, h2 from state
-	MOVD 0(R4), R5
-	MOVD 8(R4), R6
-	MOVD 16(R4), R7
-
-	// Save h0, h1
-	MOVD  R5, R8
-	MOVD  R6, R9
-	MOVD  $3, R20
-	MOVD  $-1, R21
-	SUBC  $-5, R5
-	SUBE  R21, R6
-	SUBE  R20, R7
-	MOVD  $0, R21
-	SUBZE R21
-
-	// Check for carry
-	CMP  $0, R21
-	ISEL $2, R5, R8, R5
-	ISEL $2, R6, R9, R6
-	MOVD 40(R4), R8
-	MOVD 48(R4), R9
-	ADDC R8, R5
-	ADDE R9, R6
-	MOVD R5, 0(R3)
-	MOVD R6, 8(R3)
-	RET
--- a/vendor/golang.org/x/crypto/poly1305/sum_s390x.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_s390x.go
@ -22,10 +22,7 @@ func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
 //go:noescape
 func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte)

-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
+func sum(out *[16]byte, m []byte, key *[32]byte) {
 	if cpu.S390X.HasVX {
 		var mPtr *byte
 		if len(m) > 0 {
--- a/vendor/golang.org/x/net/http2/server.go
+++ b/vendor/golang.org/x/net/http2/server.go
@ -252,7 +252,7 @@ func ConfigureServer(s *http.Server, conf *Server) error {
 			}
 		}
 		if !haveRequired {
-			return fmt.Errorf("http2: TLSConfig.CipherSuites is missing an HTTP/2-required AES_128_GCM_SHA256 cipher.")
+			return fmt.Errorf("http2: TLSConfig.CipherSuites is missing an HTTP/2-required AES_128_GCM_SHA256 cipher (need at least one of TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 or TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256).")
 		}
 	}

--- a/vendor/golang.org/x/net/internal/socket/norace.go
+++ b/vendor/golang.org/x/net/internal/socket/norace.go
@ -0,0 +1,12 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !race
+
+package socket
+
+func (m *Message) raceRead() {
+}
+func (m *Message) raceWrite() {
+}
--- a/vendor/golang.org/x/net/internal/socket/race.go
+++ b/vendor/golang.org/x/net/internal/socket/race.go
@ -0,0 +1,37 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build race
+
+package socket
+
+import (
+	"runtime"
+	"unsafe"
+)
+
+// This package reads and writes the Message buffers using a
+// direct system call, which the race detector can't see.
+// These functions tell the race detector what is going on during the syscall.
+
+func (m *Message) raceRead() {
+	for _, b := range m.Buffers {
+		if len(b) > 0 {
+			runtime.RaceReadRange(unsafe.Pointer(&b[0]), len(b))
+		}
+	}
+	if b := m.OOB; len(b) > 0 {
+		runtime.RaceReadRange(unsafe.Pointer(&b[0]), len(b))
+	}
+}
+func (m *Message) raceWrite() {
+	for _, b := range m.Buffers {
+		if len(b) > 0 {
+			runtime.RaceWriteRange(unsafe.Pointer(&b[0]), len(b))
+		}
+	}
+	if b := m.OOB; len(b) > 0 {
+		runtime.RaceWriteRange(unsafe.Pointer(&b[0]), len(b))
+	}
+}
--- a/vendor/golang.org/x/net/internal/socket/rawconn_mmsg.go
+++ b/vendor/golang.org/x/net/internal/socket/rawconn_mmsg.go
@ -13,6 +13,9 @@ import (
 )

 func (c *Conn) recvMsgs(ms []Message, flags int) (int, error) {
+	for i := range ms {
+		ms[i].raceWrite()
+	}
 	hs := make(mmsghdrs, len(ms))
 	var parseFn func([]byte, string) (net.Addr, error)
 	if c.network != "tcp" {
@ -43,6 +46,9 @@ func (c *Conn) recvMsgs(ms []Message, flags int) (int, error) {
 }

 func (c *Conn) sendMsgs(ms []Message, flags int) (int, error) {
+	for i := range ms {
+		ms[i].raceRead()
+	}
 	hs := make(mmsghdrs, len(ms))
 	var marshalFn func(net.Addr) []byte
 	if c.network != "tcp" {
--- a/vendor/golang.org/x/net/internal/socket/rawconn_msg.go
+++ b/vendor/golang.org/x/net/internal/socket/rawconn_msg.go
@ -12,6 +12,7 @@ import (
 )

 func (c *Conn) recvMsg(m *Message, flags int) error {
+	m.raceWrite()
 	var h msghdr
 	vs := make([]iovec, len(m.Buffers))
 	var sa []byte
@ -48,6 +49,7 @@ func (c *Conn) recvMsg(m *Message, flags int) error {
 }

 func (c *Conn) sendMsg(m *Message, flags int) error {
+	m.raceRead()
 	var h msghdr
 	vs := make([]iovec, len(m.Buffers))
 	var sa []byte
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
@ -0,0 +1,9 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,!arm,!arm64,!ppc64,!ppc64le,!s390x
+
+package cpu
+
+func doinit() {}
--- a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
@ -7,5 +7,3 @@
 package cpu

 const cacheLineSize = 32
-
-func doinit() {}
--- a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
@ -7,5 +7,3 @@
 package cpu

 const cacheLineSize = 32
-
-func doinit() {}
--- a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
@ -7,5 +7,3 @@
 package cpu

 const cacheLineSize = 64
-
-func doinit() {}
--- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
@ -0,0 +1,9 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build riscv64
+
+package cpu
+
+const cacheLineSize = 32
--- a/vendor/golang.org/x/sys/cpu/cpu_wasm.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_wasm.go
@ -11,5 +11,3 @@ package cpu
 // rules are good enough.

 const cacheLineSize = 0
-
-func doinit() {}
--- a/vendor/golang.org/x/sys/unix/fdset.go
+++ b/vendor/golang.org/x/sys/unix/fdset.go
@ -0,0 +1,29 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package unix
+
+// Set adds fd to the set fds.
+func (fds *FdSet) Set(fd int) {
+	fds.Bits[fd/NFDBITS] |= (1 << (uintptr(fd) % NFDBITS))
+}
+
+// Clear removes fd from the set fds.
+func (fds *FdSet) Clear(fd int) {
+	fds.Bits[fd/NFDBITS] &^= (1 << (uintptr(fd) % NFDBITS))
+}
+
+// IsSet returns whether fd is in the set fds.
+func (fds *FdSet) IsSet(fd int) bool {
+	return fds.Bits[fd/NFDBITS]&(1<<(uintptr(fd)%NFDBITS)) != 0
+}
+
+// Zero clears the set fds.
+func (fds *FdSet) Zero() {
+	for i := range fds.Bits {
+		fds.Bits[i] = 0
+	}
+}
--- a/vendor/golang.org/x/sys/windows/empty.s
+++ b/vendor/golang.org/x/sys/windows/empty.s
@ -0,0 +1,8 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !go1.12
+
+// This file is here to allow bodyless functions with go:linkname for Go 1.11
+// and earlier (see https://golang.org/issue/23311).
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -52,7 +52,7 @@ github.com/powerman/check
 # github.com/smartystreets/goconvey v1.6.4
 github.com/smartystreets/goconvey/convey/gotest
 github.com/smartystreets/goconvey/convey/reporting
-# golang.org/x/crypto v0.0.0-20191029031824-8986dd9e96cf
+# golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708
 golang.org/x/crypto/curve25519
 golang.org/x/crypto/ed25519
 golang.org/x/crypto/ed25519/internal/edwards25519
@ -61,7 +61,7 @@ golang.org/x/crypto/nacl/box
 golang.org/x/crypto/nacl/secretbox
 golang.org/x/crypto/poly1305
 golang.org/x/crypto/salsa20/salsa
-# golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9
+# golang.org/x/net v0.0.0-20191112182307-2180aed22343
 golang.org/x/net/bpf
 golang.org/x/net/http/httpguts
 golang.org/x/net/http2
@ -73,7 +73,7 @@ golang.org/x/net/internal/socks
 golang.org/x/net/ipv4
 golang.org/x/net/ipv6
 golang.org/x/net/proxy
-# golang.org/x/sys v0.0.0-20191104094858-e8c54fb511f6
+# golang.org/x/sys v0.0.0-20191113165036-4c7a9d0fe056
 golang.org/x/sys/cpu
 golang.org/x/sys/unix
 golang.org/x/sys/windows