diff --git a/Gopkg.lock b/Gopkg.lock index c49c2bdf..3ae5c272 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -40,6 +40,15 @@ ] revision = "5312a61534124124185d41f09206b9fef1d88403" +[[projects]] + name = "github.com/cloudflare/p751sidh" + packages = [ + ".", + "p751toolbox" + ] + revision = "e730a9e871a31cefd3bef50915c5721f1c5d5319" + version = "0.1" + [[projects]] name = "github.com/coreos/go-systemd" packages = [ @@ -159,6 +168,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "8cae66c7ccdf2b62925b458f82a9c9855db9106c0ffe6d022b65fa0e4387a0c9" + inputs-digest = "e2dc5bdef0a183bdd23fca263d0bab8575cd104c404e661bc7cd18e5e0942100" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index f741c047..f84962c0 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -57,3 +57,7 @@ [[constraint]] branch = "master" name = "golang.org/x/crypto" + +[[constraint]] + name = "github.com/cloudflare/p751sidh" + version = "0.1.0" diff --git a/README.md b/README.md index 2e4dcf4f..5e297516 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,10 @@ A flexible DNS proxy, with support for modern encrypted DNS protocols such as [DNSCrypt v2](https://github.com/DNSCrypt/dnscrypt-protocol/blob/master/DNSCRYPT-V2-PROTOCOL.txt) and [DNS-over-HTTP/2](https://tools.ietf.org/html/draft-ietf-doh-dns-over-https-03). +# EXPERIMENTAL POST-QUANTUM BRANCH + +This branch implements an experimental version of the DNSCrypt protocol that adds support for post-quantum key exchange using Supersingular isogeny Diffie–Hellman. + ## [dnscrypt-proxy 2.0.0 final is available for download!](https://github.com/jedisct1/dnscrypt-proxy/releases/latest) ## [Documentation](https://dnscrypt.info/doc) diff --git a/dnscrypt-proxy/common.go b/dnscrypt-proxy/common.go index 8dadfc84..a0289b62 100644 --- a/dnscrypt-proxy/common.go +++ b/dnscrypt-proxy/common.go @@ -15,6 +15,7 @@ const ( UndefinedConstruction CryptoConstruction = iota XSalsa20Poly1305 XChacha20Poly1305 + SIDHXChacha20Poly1305 ) const ( diff --git a/dnscrypt-proxy/crypto.go b/dnscrypt-proxy/crypto.go index 3be68aae..c2c30d85 100644 --- a/dnscrypt-proxy/crypto.go +++ b/dnscrypt-proxy/crypto.go @@ -57,10 +57,14 @@ func (proxy *Proxy) Encrypt(serverInfo *ServerInfo, packet []byte, proto string) err = errors.New("Question too large; cannot be padded") return } - encrypted = append(serverInfo.MagicQuery[:], proxy.proxyPublicKey[:]...) + if serverInfo.CryptoConstruction == SIDHXChacha20Poly1305 { + encrypted = append(serverInfo.MagicQuery[:], proxy.proxySIDHPublicKey[:]...) + } else { + encrypted = append(serverInfo.MagicQuery[:], proxy.proxyPublicKey[:]...) + } encrypted = append(encrypted, nonce[:HalfNonceSize]...) padded := pad(packet, paddedLength-QueryOverhead) - if serverInfo.CryptoConstruction == XChacha20Poly1305 { + if serverInfo.CryptoConstruction == XChacha20Poly1305 || serverInfo.CryptoConstruction == SIDHXChacha20Poly1305 { encrypted = xsecretbox.Seal(encrypted, nonce, padded, serverInfo.SharedKey[:]) } else { var xsalsaNonce [24]byte diff --git a/dnscrypt-proxy/dnscrypt_certs.go b/dnscrypt-proxy/dnscrypt_certs.go index 4f0d291d..49f67ef1 100644 --- a/dnscrypt-proxy/dnscrypt_certs.go +++ b/dnscrypt-proxy/dnscrypt_certs.go @@ -7,6 +7,7 @@ import ( "strings" "time" + "github.com/cloudflare/p751sidh" "github.com/jedisct1/dlog" "github.com/jedisct1/xsecretbox" "github.com/miekg/dns" @@ -63,6 +64,8 @@ func FetchCurrentDNSCryptCert(proxy *Proxy, serverName *string, proto string, pk cryptoConstruction = XSalsa20Poly1305 case 0x0002: cryptoConstruction = XChacha20Poly1305 + case 0x0003: + cryptoConstruction = SIDHXChacha20Poly1305 default: dlog.Noticef("[%v] Unsupported crypto construction", providerName) continue @@ -117,22 +120,37 @@ func FetchCurrentDNSCryptCert(proxy *Proxy, serverName *string, proto string, pk dlog.Noticef("[%v] Cryptographic construction %v not supported", providerName, cryptoConstruction) continue } - var serverPk [32]byte - copy(serverPk[:], binCert[72:104]) var sharedKey [32]byte - if cryptoConstruction == XChacha20Poly1305 { - sharedKey, err = xsecretbox.SharedKey(proxy.proxySecretKey, serverPk) - if err != nil { - dlog.Criticalf("[%v] Weak public key", providerName) + if cryptoConstruction == SIDHXChacha20Poly1305 { + if len(binCert) < 656 { + dlog.Warnf("[%v] Certificate too short", providerName) continue } + var serverPk [564]byte + copy(serverPk[:], binCert[72:636]) + if cryptoConstruction == XChacha20Poly1305 { + var serverPkX p751sidh.SIDHPublicKeyBob + serverPkX.FromBytes(serverPk[:]) + sharedKeyX := proxy.proxySIDHSecretKey.SharedSecret(&serverPkX) + copy(sharedKey[:], sharedKeyX[:32]) + } } else { - box.Precompute(&sharedKey, &serverPk, &proxy.proxySecretKey) + var serverPk [32]byte + copy(serverPk[:], binCert[72:104]) + if cryptoConstruction == XChacha20Poly1305 { + sharedKey, err = xsecretbox.SharedKey(proxy.proxySecretKey, serverPk) + if err != nil { + dlog.Criticalf("[%v] Weak public key", providerName) + continue + } + } else { + box.Precompute(&sharedKey, &serverPk, &proxy.proxySecretKey) + } + copy(certInfo.ServerPk[:], serverPk[:]) } certInfo.SharedKey = sharedKey highestSerial = serial certInfo.CryptoConstruction = cryptoConstruction - copy(certInfo.ServerPk[:], serverPk[:]) copy(certInfo.MagicQuery[:], binCert[104:112]) if isNew { dlog.Noticef("[%s] OK (crypto v%d) - rtt: %dms", *serverName, cryptoConstruction, rtt.Nanoseconds()/1000000) diff --git a/dnscrypt-proxy/proxy.go b/dnscrypt-proxy/proxy.go index 93d051ce..bb818638 100644 --- a/dnscrypt-proxy/proxy.go +++ b/dnscrypt-proxy/proxy.go @@ -1,14 +1,15 @@ package main import ( + "crypto/rand" "io" "io/ioutil" - "math/rand" "net" "net/http" "sync/atomic" "time" + "github.com/cloudflare/p751sidh" "github.com/jedisct1/dlog" "github.com/pquerna/cachecontrol/cacheobject" "golang.org/x/crypto/curve25519" @@ -17,6 +18,8 @@ import ( type Proxy struct { proxyPublicKey [32]byte proxySecretKey [32]byte + proxySIDHSecretKey *p751sidh.SIDHSecretKeyAlice + proxySIDHPublicKey [564]byte questionSizeEstimator QuestionSizeEstimator serversInfo ServersInfo timeout time.Duration @@ -60,6 +63,9 @@ func (proxy *Proxy) StartProxy() { dlog.Fatal(err) } curve25519.ScalarBaseMult(&proxy.proxyPublicKey, &proxy.proxySecretKey) + proxySIDHPublicKeyX, proxySIDHSecretKey, _ := p751sidh.GenerateAliceKeypair(rand.Reader) + proxy.proxySIDHSecretKey = proxySIDHSecretKey + proxySIDHPublicKeyX.ToBytes(proxy.proxySIDHPublicKey[:]) for _, registeredServer := range proxy.registeredServers { proxy.serversInfo.registerServer(proxy, registeredServer.name, registeredServer.stamp) } diff --git a/dnscrypt-proxy/serversInfo.go b/dnscrypt-proxy/serversInfo.go index 7ffab692..bca97af0 100644 --- a/dnscrypt-proxy/serversInfo.go +++ b/dnscrypt-proxy/serversInfo.go @@ -16,6 +16,7 @@ import ( "time" "github.com/VividCortex/ewma" + "github.com/cloudflare/p751sidh" "github.com/jedisct1/dlog" "golang.org/x/crypto/ed25519" ) @@ -44,6 +45,7 @@ type ServerInfo struct { Proto StampProtoType MagicQuery [8]byte ServerPk [32]byte + ServerSIDHPk *p751sidh.SIDHPublicKeyBob SharedKey [32]byte CryptoConstruction CryptoConstruction Name string diff --git a/vendor/github.com/cloudflare/p751sidh/LICENSE b/vendor/github.com/cloudflare/p751sidh/LICENSE new file mode 100644 index 00000000..ea60699d --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/LICENSE @@ -0,0 +1,57 @@ +Copyright (c) 2017 Cloudflare. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Cloudflare nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +======================================================================== + +The x64 field arithmetic implementation was derived from the Microsoft Research +SIDH implementation, , available +under the following license: + +======================================================================== + +MIT License + +Copyright (c) Microsoft Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE diff --git a/vendor/github.com/cloudflare/p751sidh/README.md b/vendor/github.com/cloudflare/p751sidh/README.md new file mode 100644 index 00000000..3dc4f4be --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/README.md @@ -0,0 +1,23 @@ +# `p751sidh` + +The `p751sidh` package provides a Go implementation of (ephemeral) +supersingular isogeny Diffie-Hellman, as described in [Costello-Longa-Naehrig 2016](https://eprint.iacr.org/2016/413). +Internal functions useful for the implementation are published +in the p751toolbox package. + +The implementation is intended for use on the `amd64` architecture only -- no +generic field arithmetic implementation is provided. Portions of the field +arithmetic were ported from the Microsoft Research implementation. + +This package follows their naming convention, writing "Alice" for the party +using 2^e-isogenies and "Bob" for the party using 3^e-isogenies. + +This package does NOT implement SIDH key validation, so it should only be +used for ephemeral DH. Each keypair should be used at most once. + +If you feel that SIDH may be appropriate for you, consult your +cryptographer. + +Special thanks to [Craig Costello](http://www.craigcostello.com.au/), [Diego Aranha](https://sites.google.com/site/dfaranha/), and [Deirdre Connolly](https://twitter.com/durumcrustulum) for advice +and discussion. + diff --git a/vendor/github.com/cloudflare/p751sidh/p751toolbox/consts.go b/vendor/github.com/cloudflare/p751sidh/p751toolbox/consts.go new file mode 100644 index 00000000..476a8fd5 --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/p751toolbox/consts.go @@ -0,0 +1,16 @@ +package p751toolbox + +// The x-coordinate of P_A = [3^239](11, oddsqrt(11^3 + 11)) on E_0(F_p) +var Affine_xPA = PrimeFieldElement{A: Fp751Element{0xd56fe52627914862, 0x1fad60dc96b5baea, 0x1e137d0bf07ab91, 0x404d3e9252161964, 0x3c5385e4cd09a337, 0x4476426769e4af73, 0x9790c6db989dfe33, 0xe06e1c04d2aa8b5e, 0x38c08185edea73b9, 0xaa41f678a4396ca6, 0x92b9259b2229e9a0, 0x2f9326818be0}} + +// The y-coordinate of P_A = [3^239](11, oddsqrt(11^3 + 11)) on E_0(F_p) +var Affine_yPA = PrimeFieldElement{A: Fp751Element{0x332bd16fbe3d7739, 0x7e5e20ff2319e3db, 0xea856234aefbd81b, 0xe016df7d6d071283, 0x8ae42796f73cd34f, 0x6364b408a4774575, 0xa71c97f17ce99497, 0xda03cdd9aa0cbe71, 0xe52b4fda195bd56f, 0xdac41f811fce0a46, 0x9333720f0ee84a61, 0x1399f006e578}} + +// The x-coordinate of P_B = [2^372](6, oddsqrt(6^3 + 6)) on E_0(F_p) +var Affine_xPB = PrimeFieldElement{A: Fp751Element{0xf1a8c9ed7b96c4ab, 0x299429da5178486e, 0xef4926f20cd5c2f4, 0x683b2e2858b4716a, 0xdda2fbcc3cac3eeb, 0xec055f9f3a600460, 0xd5a5a17a58c3848b, 0x4652d836f42eaed5, 0x2f2e71ed78b3a3b3, 0xa771c057180add1d, 0xc780a5d2d835f512, 0x114ea3b55ac1}} + +// The y-coordinate of P_B = [2^372](6, oddsqrt(6^3 + 6)) on E_0(F_p) +var Affine_yPB = PrimeFieldElement{A: Fp751Element{0xd1e1471273e3736b, 0xf9301ba94da241fe, 0xe14ab3c17fef0a85, 0xb4ddd26a037e9e62, 0x66142dfb2afeb69, 0xe297cb70649d6c9e, 0x214dfc6e8b1a0912, 0x9f5ba818b01cf859, 0x87d15b4907c12828, 0xa4da70c53a880dbf, 0xac5df62a72c8f253, 0x2e26a42ec617}} + +// The value of (a+2)/4 for the starting curve E_0 with a=0: this is 1/2 +var E0_aPlus2Over4 = PrimeFieldElement{A: Fp751Element{0x124d6, 0x0, 0x0, 0x0, 0x0, 0xb8e0000000000000, 0x9c8a2434c0aa7287, 0xa206996ca9a378a3, 0x6876280d41a41b52, 0xe903b49f175ce04f, 0xf8511860666d227, 0x4ea07cff6e7f}} diff --git a/vendor/github.com/cloudflare/p751sidh/p751toolbox/curve.go b/vendor/github.com/cloudflare/p751sidh/p751toolbox/curve.go new file mode 100644 index 00000000..6a2dc392 --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/p751toolbox/curve.go @@ -0,0 +1,625 @@ +package p751toolbox + +// A point on the projective line P^1(F_{p^2}). +// +// This is used to work projectively with the curve coefficients. +type ProjectiveCurveParameters struct { + A ExtensionFieldElement + C ExtensionFieldElement +} + +func (params *ProjectiveCurveParameters) FromAffine(a *ExtensionFieldElement) { + params.A = *a + params.C = oneExtensionField +} + +type CachedCurveParameters struct { + Aplus2C ExtensionFieldElement + C4 ExtensionFieldElement +} + +// = 256 +var const256 = ExtensionFieldElement{ + A: Fp751Element{0x249ad67, 0x0, 0x0, 0x0, 0x0, 0x730000000000000, 0x738154969973da8b, 0x856657c146718c7f, 0x461860e4e363a697, 0xf9fd6510bba838cd, 0x4e1a3c3f06993c0c, 0x55abef5b75c7}, + B: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, +} + +// Recover the curve parameters from three points on the curve. +func RecoverCurveParameters(affine_xP, affine_xQ, affine_xQmP *ExtensionFieldElement) ProjectiveCurveParameters { + var curveParams ProjectiveCurveParameters + var t0, t1 ExtensionFieldElement + t0.One() // = 1 + t1.Mul(affine_xP, affine_xQ) // = x_P * x_Q + t0.Sub(&t0, &t1) // = 1 - x_P * x_Q + t1.Mul(affine_xP, affine_xQmP) // = x_P * x_{Q-P} + t0.Sub(&t0, &t1) // = 1 - x_P * x_Q - x_P * x_{Q-P} + t1.Mul(affine_xQ, affine_xQmP) // = x_Q * x_{Q-P} + t0.Sub(&t0, &t1) // = 1 - x_P * x_Q - x_P * x_{Q-P} - x_Q * x_{Q-P} + curveParams.A.Square(&t0) // = (1 - x_P * x_Q - x_P * x_{Q-P} - x_Q * x_{Q-P})^2 + t1.Mul(&t1, affine_xP) // = x_P * x_Q * x_{Q-P} + t1.Add(&t1, &t1) // = 2 * x_P * x_Q * x_{Q-P} + curveParams.C.Add(&t1, &t1) // = 4 * x_P * x_Q * x_{Q-P} + t0.Add(affine_xP, affine_xQ) // = x_P + x_Q + t0.Add(&t0, affine_xQmP) // = x_P + x_Q + x_{Q-P} + t1.Mul(&curveParams.C, &t0) // = 4 * x_P * x_Q * x_{Q-P} * (x_P + x_Q + x_{Q-P}) + curveParams.A.Sub(&curveParams.A, &t1) // = (1 - x_P * x_Q - x_P * x_{Q-P} - x_Q * x_{Q-P})^2 - 4 * x_P * x_Q * x_{Q-P} * (x_P + x_Q + x_{Q-P}) + + return curveParams +} + +// Compute the j-invariant (not the J-invariant) of the given curve. +func (curveParams *ProjectiveCurveParameters) JInvariant() ExtensionFieldElement { + var v0, v1, v2, v3 ExtensionFieldElement + A := &curveParams.A + C := &curveParams.C + v0.Square(C) // C^2 + v1.Square(A) // A^2 + v2.Add(&v0, &v0) // 2C^2 + v3.Add(&v2, &v0) // 3C^2 + v2.Add(&v2, &v2) // 4C^2 + v2.Sub(&v1, &v2) // A^2 - 4C^2 + v1.Sub(&v1, &v3) // A^2 - 3C^2 + v3.Square(&v1) // (A^2 - 3C^2)^2 + v3.Mul(&v3, &v1) // (A^2 - 3C^2)^3 + v0.Square(&v0) // C^4 + v3.Mul(&v3, &const256) // 256(A^2 - 3C^2)^3 + v2.Mul(&v2, &v0) // C^4(A^2 - 4C^2) + v2.Inv(&v2) // 1/C^4(A^2 - 4C^2) + v0.Mul(&v3, &v2) // 256(A^2 - 3C^2)^3 / C^4(A^2 - 4C^2) + return v0 +} + +// Compute cached parameters A + 2C, 4C. +func (curve *ProjectiveCurveParameters) cachedParams() CachedCurveParameters { + var cached CachedCurveParameters + cached.Aplus2C.Add(&curve.C, &curve.C) // = 2*C + cached.C4.Add(&cached.Aplus2C, &cached.Aplus2C) // = 4*C + cached.Aplus2C.Add(&cached.Aplus2C, &curve.A) // = 2*C + A + return cached +} + +// A point on the projective line P^1(F_{p^2}). +// +// This represents a point on the (Kummer line) of a Montgomery curve. The +// curve is specified by a ProjectiveCurveParameters struct. +type ProjectivePoint struct { + X ExtensionFieldElement + Z ExtensionFieldElement +} + +// A point on the projective line P^1(F_p). +// +// This represents a point on the (Kummer line) of the prime-field subgroup of +// the base curve E_0(F_p), defined by E_0 : y^2 = x^3 + x. +type ProjectivePrimeFieldPoint struct { + X PrimeFieldElement + Z PrimeFieldElement +} + +func (point *ProjectivePoint) FromAffinePrimeField(x *PrimeFieldElement) { + point.X.A = x.A + point.X.B = zeroExtensionField.B + point.Z = oneExtensionField +} + +func (point *ProjectivePoint) FromAffine(x *ExtensionFieldElement) { + point.X = *x + point.Z = oneExtensionField +} + +func (point *ProjectivePrimeFieldPoint) FromAffine(x *PrimeFieldElement) { + point.X = *x + point.Z = onePrimeField +} + +func (point *ProjectivePoint) ToAffine() *ExtensionFieldElement { + affine_x := new(ExtensionFieldElement) + affine_x.Inv(&point.Z).Mul(affine_x, &point.X) + return affine_x +} + +func (point *ProjectivePrimeFieldPoint) ToAffine() *PrimeFieldElement { + affine_x := new(PrimeFieldElement) + affine_x.Inv(&point.Z).Mul(affine_x, &point.X) + return affine_x +} + +func (lhs *ProjectivePoint) VartimeEq(rhs *ProjectivePoint) bool { + var t0, t1 ExtensionFieldElement + t0.Mul(&lhs.X, &rhs.Z) + t1.Mul(&lhs.Z, &rhs.X) + return t0.VartimeEq(&t1) +} + +func (lhs *ProjectivePrimeFieldPoint) VartimeEq(rhs *ProjectivePrimeFieldPoint) bool { + var t0, t1 PrimeFieldElement + t0.Mul(&lhs.X, &rhs.Z) + t1.Mul(&lhs.Z, &rhs.X) + return t0.VartimeEq(&t1) +} + +func ProjectivePointConditionalSwap(xP, xQ *ProjectivePoint, choice uint8) { + ExtensionFieldConditionalSwap(&xP.X, &xQ.X, choice) + ExtensionFieldConditionalSwap(&xP.Z, &xQ.Z, choice) +} + +func ProjectivePrimeFieldPointConditionalSwap(xP, xQ *ProjectivePrimeFieldPoint, choice uint8) { + PrimeFieldConditionalSwap(&xP.X, &xQ.X, choice) + PrimeFieldConditionalSwap(&xP.Z, &xQ.Z, choice) +} + +// Given xP = x(P), xQ = x(Q), and xPmQ = x(P-Q), compute xR = x(P+Q). +// +// Returns xR to allow chaining. Safe to overlap xP, xQ, xR. +func (xR *ProjectivePoint) Add(xP, xQ, xPmQ *ProjectivePoint) *ProjectivePoint { + // Algorithm 1 of Costello-Smith. + var v0, v1, v2, v3, v4 ExtensionFieldElement + v0.Add(&xP.X, &xP.Z) // X_P + Z_P + v1.Sub(&xQ.X, &xQ.Z).Mul(&v1, &v0) // (X_Q - Z_Q)(X_P + Z_P) + v0.Sub(&xP.X, &xP.Z) // X_P - Z_P + v2.Add(&xQ.X, &xQ.Z).Mul(&v2, &v0) // (X_Q + Z_Q)(X_P - Z_P) + v3.Add(&v1, &v2).Square(&v3) // 4(X_Q X_P - Z_Q Z_P)^2 + v4.Sub(&v1, &v2).Square(&v4) // 4(X_Q Z_P - Z_Q X_P)^2 + v0.Mul(&xPmQ.Z, &v3) // 4X_{P-Q}(X_Q X_P - Z_Q Z_P)^2 + xR.Z.Mul(&xPmQ.X, &v4) // 4Z_{P-Q}(X_Q Z_P - Z_Q X_P)^2 + xR.X = v0 + return xR +} + +// Given xP = x(P), xQ = x(Q), and xPmQ = x(P-Q), compute xR = x(P+Q). +// +// Returns xR to allow chaining. Safe to overlap xP, xQ, xR. +func (xR *ProjectivePrimeFieldPoint) Add(xP, xQ, xPmQ *ProjectivePrimeFieldPoint) *ProjectivePrimeFieldPoint { + // Algorithm 1 of Costello-Smith. + var v0, v1, v2, v3, v4 PrimeFieldElement + v0.Add(&xP.X, &xP.Z) // X_P + Z_P + v1.Sub(&xQ.X, &xQ.Z).Mul(&v1, &v0) // (X_Q - Z_Q)(X_P + Z_P) + v0.Sub(&xP.X, &xP.Z) // X_P - Z_P + v2.Add(&xQ.X, &xQ.Z).Mul(&v2, &v0) // (X_Q + Z_Q)(X_P - Z_P) + v3.Add(&v1, &v2).Square(&v3) // 4(X_Q X_P - Z_Q Z_P)^2 + v4.Sub(&v1, &v2).Square(&v4) // 4(X_Q Z_P - Z_Q X_P)^2 + v0.Mul(&xPmQ.Z, &v3) // 4X_{P-Q}(X_Q X_P - Z_Q Z_P)^2 + xR.Z.Mul(&xPmQ.X, &v4) // 4Z_{P-Q}(X_Q Z_P - Z_Q X_P)^2 + xR.X = v0 + return xR +} + +// Given xP = x(P) and cached curve parameters Aplus2C = A + 2*C, C4 = 4*C, compute xQ = x([2]P). +// +// Returns xQ to allow chaining. Safe to overlap xP, xQ. +func (xQ *ProjectivePoint) Double(xP *ProjectivePoint, curve *CachedCurveParameters) *ProjectivePoint { + // Algorithm 2 of Costello-Smith, amended to work with projective curve coefficients. + var v1, v2, v3, xz4 ExtensionFieldElement + v1.Add(&xP.X, &xP.Z).Square(&v1) // (X+Z)^2 + v2.Sub(&xP.X, &xP.Z).Square(&v2) // (X-Z)^2 + xz4.Sub(&v1, &v2) // 4XZ = (X+Z)^2 - (X-Z)^2 + v2.Mul(&v2, &curve.C4) // 4C(X-Z)^2 + xQ.X.Mul(&v1, &v2) // 4C(X+Z)^2(X-Z)^2 + v3.Mul(&xz4, &curve.Aplus2C) // 4XZ(A + 2C) + v3.Add(&v3, &v2) // 4XZ(A + 2C) + 4C(X-Z)^2 + xQ.Z.Mul(&v3, &xz4) // (4XZ(A + 2C) + 4C(X-Z)^2)4XZ + // Now (xQ.x : xQ.z) + // = (4C(X+Z)^2(X-Z)^2 : (4XZ(A + 2C) + 4C(X-Z)^2)4XZ ) + // = ((X+Z)^2(X-Z)^2 : (4XZ((A + 2C)/4C) + (X-Z)^2)4XZ ) + // = ((X+Z)^2(X-Z)^2 : (4XZ((a + 2)/4) + (X-Z)^2)4XZ ) + return xQ +} + +// Given xP = x(P) and cached curve parameter aPlus2Over4 = (a+2)/4, compute xQ = x([2]P). +// +// Note that we don't use projective curve coefficients here because we only +// ever use a fixed curve (in our case, the base curve E_0). +// +// Returns xQ to allow chaining. Safe to overlap xP, xQ. +func (xQ *ProjectivePrimeFieldPoint) Double(xP *ProjectivePrimeFieldPoint, aPlus2Over4 *PrimeFieldElement) *ProjectivePrimeFieldPoint { + // Algorithm 2 of Costello-Smith + var v1, v2, v3, xz4 PrimeFieldElement + v1.Add(&xP.X, &xP.Z).Square(&v1) // (X+Z)^2 + v2.Sub(&xP.X, &xP.Z).Square(&v2) // (X-Z)^2 + xz4.Sub(&v1, &v2) // 4XZ = (X+Z)^2 - (X-Z)^2 + xQ.X.Mul(&v1, &v2) // (X+Z)^2(X-Z)^2 + v3.Mul(&xz4, aPlus2Over4) // 4XZ((a+2)/4) + v3.Add(&v3, &v2) // 4XZ((a+2)/4) + (X-Z)^2 + xQ.Z.Mul(&v3, &xz4) // (4XZ((a+2)/4) + (X-Z)^2)4XZ + // Now (xQ.x : xQ.z) + // = ((X+Z)^2(X-Z)^2 : (4XZ((a + 2)/4) + (X-Z)^2)4XZ ) + return xQ +} + +// Given the curve parameters, xP = x(P), and k >= 0, compute xQ = x([2^k]P). +// +// Returns xQ to allow chaining. Safe to overlap xP, xQ. +func (xQ *ProjectivePoint) Pow2k(curve *ProjectiveCurveParameters, xP *ProjectivePoint, k uint32) *ProjectivePoint { + cachedParams := curve.cachedParams() + *xQ = *xP + for i := uint32(0); i < k; i++ { + xQ.Double(xQ, &cachedParams) + } + + return xQ +} + +// Given xP = x(P) and cached curve parameters Aplus2C = A + 2*C, C4 = 4*C, compute xQ = x([3]P). +// +// Returns xQ to allow chaining. Safe to overlap xP, xQ. +func (xQ *ProjectivePoint) Triple(xP *ProjectivePoint, curve *CachedCurveParameters) *ProjectivePoint { + // Uses the efficient Montgomery tripling formulas from Costello-Longa-Naehrig. + var v0, v1, v2, v3, v4, v5 ExtensionFieldElement + // Compute (X_2 : Z_2) = x([2]P) + v2.Sub(&xP.X, &xP.Z) // X - Z + v3.Add(&xP.X, &xP.Z) // X + Z + v0.Square(&v2) // (X-Z)^2 + v1.Square(&v3) // (X+Z)^2 + v4.Mul(&v0, &curve.C4) // 4C(X-Z)^2 + v5.Mul(&v4, &v1) // 4C(X-Z)^2(X+Z)^2 = X_2 + v1.Sub(&v1, &v0) // (X+Z)^2 - (X-Z)^2 = 4XZ + v0.Mul(&v1, &curve.Aplus2C) // 4XZ(A+2C) + v4.Add(&v4, &v0).Mul(&v4, &v1) // (4C(X-Z)^2 + 4XZ(A+2C))4XZ = Z_2 + // Compute (X_3 : Z_3) = x(P + [2]P) + v0.Add(&v5, &v4).Mul(&v0, &v2) // (X_2 + Z_2)(X-Z) + v1.Sub(&v5, &v4).Mul(&v1, &v3) // (X_2 - Z_2)(X+Z) + v4.Sub(&v0, &v1).Square(&v4) // 4(XZ_2 - ZX_2)^2 + v5.Add(&v0, &v1).Square(&v5) // 4(XX_2 - ZZ_2)^2 + v2.Mul(&xP.Z, &v5) // 4Z(XX_2 - ZZ_2)^2 + xQ.Z.Mul(&xP.X, &v4) // 4X(XZ_2 - ZX_2)^2 + xQ.X = v2 + return xQ +} + +// Given the curve parameters, xP = x(P), and k >= 0, compute xQ = x([2^k]P). +// +// Returns xQ to allow chaining. Safe to overlap xP, xQ. +func (xQ *ProjectivePoint) Pow3k(curve *ProjectiveCurveParameters, xP *ProjectivePoint, k uint32) *ProjectivePoint { + cachedParams := curve.cachedParams() + *xQ = *xP + for i := uint32(0); i < k; i++ { + xQ.Triple(xQ, &cachedParams) + } + + return xQ +} + +// Given x(P) and a scalar m in little-endian bytes, compute x([m]P) using the +// Montgomery ladder. This is described in Algorithm 8 of Costello-Smith. +// +// This function's execution time is dependent only on the byte-length of the +// input scalar. All scalars of the same input length execute in uniform time. +// The scalar can be padded with zero bytes to ensure a uniform length. +// +// Safe to overlap the source with the destination. +func (xQ *ProjectivePoint) ScalarMult(curve *ProjectiveCurveParameters, xP *ProjectivePoint, scalar []uint8) *ProjectivePoint { + cachedParams := curve.cachedParams() + var x0, x1, tmp ProjectivePoint + + x0.X.One() + x0.Z.Zero() + x1 = *xP + + // Iterate over the bits of the scalar, top to bottom + prevBit := uint8(0) + for i := len(scalar) - 1; i >= 0; i-- { + scalarByte := scalar[i] + for j := 7; j >= 0; j-- { + bit := (scalarByte >> uint(j)) & 0x1 + ProjectivePointConditionalSwap(&x0, &x1, (bit ^ prevBit)) + tmp.Double(&x0, &cachedParams) + x1.Add(&x0, &x1, xP) + x0 = tmp + prevBit = bit + } + } + // now prevBit is the lowest bit of the scalar + ProjectivePointConditionalSwap(&x0, &x1, prevBit) + *xQ = x0 + return xQ +} + +// Given x(P) and a scalar m in little-endian bytes, compute x([m]P), x([m+1]P) using the +// Montgomery ladder. This is described in Algorithm 8 of Costello-Smith. +// +// The extra value x([m+1]P) is returned to allow y-coordinate recovery; +// otherwise, it can be ignored. +// +// This function's execution time is dependent only on the byte-length of the +// input scalar. All scalars of the same input length execute in uniform time. +// The scalar can be padded with zero bytes to ensure a uniform length. +func ScalarMultPrimeField(aPlus2Over4 *PrimeFieldElement, xP *ProjectivePrimeFieldPoint, scalar []uint8) (ProjectivePrimeFieldPoint, ProjectivePrimeFieldPoint) { + var x0, x1, tmp ProjectivePrimeFieldPoint + + x0.X.One() + x0.Z.Zero() + x1 = *xP + + // Iterate over the bits of the scalar, top to bottom + prevBit := uint8(0) + for i := len(scalar) - 1; i >= 0; i-- { + scalarByte := scalar[i] + for j := 7; j >= 0; j-- { + bit := (scalarByte >> uint(j)) & 0x1 + ProjectivePrimeFieldPointConditionalSwap(&x0, &x1, (bit ^ prevBit)) + tmp.Double(&x0, aPlus2Over4) + x1.Add(&x0, &x1, xP) + x0 = tmp + prevBit = bit + } + } + // now prevBit is the lowest bit of the scalar + ProjectivePrimeFieldPointConditionalSwap(&x0, &x1, prevBit) + return x0, x1 +} + +// Given P = (x_P, y_P) in affine coordinates, as well as projective points +// x(Q), x(R) = x(P+Q), all in the prime-field subgroup of the starting curve +// E_0(F_p), use the Okeya-Sakurai coordinate recovery strategy to recover Q = +// (X_Q : Y_Q : Z_Q). +// +// This is Algorithm 5 of Costello-Smith, with the constants a = 0, b = 1 hardcoded. +func OkeyaSakuraiCoordinateRecovery(affine_xP, affine_yP *PrimeFieldElement, xQ, xR *ProjectivePrimeFieldPoint) (X_Q, Y_Q, Z_Q PrimeFieldElement) { + var v1, v2, v3, v4 PrimeFieldElement + v1.Mul(affine_xP, &xQ.Z) // = x_P*Z_Q + v2.Add(&xQ.X, &v1) // = X_Q + x_P*Z_Q + v3.Sub(&xQ.X, &v1).Square(&v3) // = (X_Q - x_P*Z_Q)^2 + v3.Mul(&v3, &xR.X) // = X_R*(X_Q - x_P*Z_Q)^2 + // Skip setting v1 = 2a*Z_Q (step 6) since we hardcode a = 0 + // Skip adding v1 to v2 (step 7) since v1 is zero + v4.Mul(affine_xP, &xQ.X) // = x_P*X_Q + v4.Add(&v4, &xQ.Z) // = x_P*X_Q + Z_Q + v2.Mul(&v2, &v4) // = (x_P*X_Q + Z_Q)*(X_Q + x_P*Z_Q) + // Skip multiplication by v1 (step 11) since v1 is zero + // Skip subtracting v1 from v2 (step 12) since v1 is zero + v2.Mul(&v2, &xR.Z) // = (x_P*X_Q + Z_Q)*(X_Q + x_P*Z_Q)*Z_R + Y_Q.Sub(&v2, &v3) // = (x_P*X_Q + Z_Q)*(X_Q + x_P*Z_Q)*Z_R - X_R*(X_Q - x_P*Z_Q)^2 + v1.Add(affine_yP, affine_yP) // = 2b*y_P + v1.Mul(&v1, &xQ.Z).Mul(&v1, &xR.Z) // = 2b*y_P*Z_Q*Z_R + X_Q.Mul(&v1, &xQ.X) // = 2b*y_P*Z_Q*Z_R*X_Q + Z_Q.Mul(&v1, &xQ.Z) // = 2b*y_P*Z_Q^2*Z_R + + return +} + +// Given x(P), x(Q), x(P-Q), as well as a scalar m in little-endian bytes, +// compute x(P + [m]Q) using the "three-point ladder" of de Feo, Jao, and Plut. +// +// Safe to overlap the source with the destination. +// +// This function's execution time is dependent only on the byte-length of the +// input scalar. All scalars of the same input length execute in uniform time. +// The scalar can be padded with zero bytes to ensure a uniform length. +// +// The algorithm, as described in de Feo-Jao-Plut, is as follows: +// +// (x0, x1, x2) <--- (x(O), x(Q), x(P)) +// +// for i = |m| down to 0, indexing the bits of m: +// Invariant: (x0, x1, x2) == (x( [t]Q ), x( [t+1]Q ), x( P + [t]Q )) +// where t = m//2^i is the high bits of m, starting at i +// if m_i == 0: +// (x0, x1, x2) <--- (xDBL(x0), xADD(x1, x0, x(Q)), xADD(x2, x0, x(P))) +// Invariant: (x0, x1, x2) == (x( [2t]Q ), x( [2t+1]Q ), x( P + [2t]Q )) +// == (x( [t']Q ), x( [t'+1]Q ), x( P + [t']Q )) +// where t' = m//2^{i-1} is the high bits of m, starting at i-1 +// if m_i == 1: +// (x0, x1, x2) <--- (xADD(x1, x0, x(Q)), xDBL(x1), xADD(x2, x1, x(P-Q))) +// Invariant: (x0, x1, x2) == (x( [2t+1]Q ), x( [2t+2]Q ), x( P + [2t+1]Q )) +// == (x( [t']Q ), x( [t'+1]Q ), x( P + [t']Q )) +// where t' = m//2^{i-1} is the high bits of m, starting at i-1 +// return x2 +// +// Notice that the roles of (x0,x1) and (x(P), x(P-Q)) swap depending on the +// current bit of the scalar. Instead of swapping which operations we do, we +// can swap variable names, producing the following uniform algorithm: +// +// (x0, x1, x2) <--- (x(O), x(Q), x(P)) +// (y0, y1) <--- (x(P), x(P-Q)) +// +// for i = |m| down to 0, indexing the bits of m: +// (x0, x1) <--- SWAP( m_{i+1} xor m_i, (x0,x1) ) +// (y0, y1) <--- SWAP( m_{i+1} xor m_i, (y0,y1) ) +// (x0, x1, x2) <--- ( xDBL(x0), xADD(x1,x0,x(Q)), xADD(x2, x0, y0) ) +// +// return x2 +// +func (xR *ProjectivePoint) ThreePointLadder(curve *ProjectiveCurveParameters, xP, xQ, xPmQ *ProjectivePoint, scalar []uint8) *ProjectivePoint { + cachedParams := curve.cachedParams() + var x0, x1, x2, y0, y1, tmp ProjectivePoint + + // (x0, x1, x2) <--- (x(O), x(Q), x(P)) + x0.X.One() + x0.Z.Zero() + x1 = *xQ + x2 = *xP + // (y0, y1) <--- (x(P), x(P-Q)) + y0 = *xP + y1 = *xPmQ + + // Iterate over the bits of the scalar, top to bottom + prevBit := uint8(0) + for i := len(scalar) - 1; i >= 0; i-- { + scalarByte := scalar[i] + for j := 7; j >= 0; j-- { + bit := (scalarByte >> uint(j)) & 0x1 + ProjectivePointConditionalSwap(&x0, &x1, (bit ^ prevBit)) + ProjectivePointConditionalSwap(&y0, &y1, (bit ^ prevBit)) + x2.Add(&x2, &x0, &y0) // = xADD(x2, x0, y0) + tmp.Double(&x0, &cachedParams) + x1.Add(&x1, &x0, xQ) // = xADD(x1, x0, x(Q)) + x0 = tmp // = xDBL(x0) + prevBit = bit + } + } + + *xR = x2 + return xR +} + +// Given the affine x-coordinate affine_xP of P, compute the x-coordinate +// x(\tau(P)-P) of \tau(P)-P. +func DistortAndDifference(affine_xP *PrimeFieldElement) ProjectivePoint { + var xR ProjectivePoint + var t0, t1 PrimeFieldElement + t0.Square(affine_xP) // = x_P^2 + t1.One().Add(&t1, &t0) // = x_P^2 + 1 + xR.X.B = t1.A // = 0 + (x_P^2 + 1)*i + t0.Add(affine_xP, affine_xP) // = 2*x_P + xR.Z.A = t0.A // = 2*x_P + 0*i + + return xR +} + +// Given an affine point P = (x_P, y_P) in the prime-field subgroup of the +// starting curve E_0(F_p), together with a secret scalar m, compute x(P+[m]Q), +// where Q = \tau(P) is the image of P under the distortion map described +// below. +// +// The computation uses basically the same strategy as the +// Costello-Longa-Naehrig implementation: +// +// 1. Use the standard Montgomery ladder to compute x([m]Q), x([m+1]Q) +// +// 2. Use Okeya-Sakurai coordinate recovery to recover [m]Q from Q, x([m]Q), +// x([m+1]Q) +// +// 3. Use P and [m]Q to compute x(P + [m]Q) +// +// The distortion map \tau is defined as +// +// \tau : E_0(F_{p^2}) ---> E_0(F_{p^2}) +// +// \tau : (x,y) |---> (-x, iy). +// +// The image of the distortion map is the _trace-zero_ subgroup of E_0(F_{p^2}) +// defined by Tr(P) = P + \pi_p(P) = id, where \pi_p((x,y)) = (x^p, y^p) is the +// p-power Frobenius map. To see this, take P = (x,y) \in E_0(F_{p^2}). Then +// Tr(P) = id if and only if \pi_p(P) = -P, so that +// +// -P = (x, -y) = (x^p, y^p) = \pi_p(P); +// +// we have x^p = x if and only if x \in F_p, while y^p = -y if and only if y = +// i*y' for y' \in F_p. +// +// Thus (excepting the identity) every point in the trace-zero subgroup is of +// the form \tau((x,y)) = (-x,i*y) for (x,y) \in E_0(F_p). +// +// Since the Montgomery ladder only uses the x-coordinate, and the x-coordinate +// is always in the prime subfield, we can compute x([m]Q), x([m+1]Q) entirely +// in the prime subfield. +// +// The affine form of the relation for Okeya-Sakurai coordinate recovery is +// given on p. 13 of Costello-Smith: +// +// y_Q = ((x_P*x_Q + 1)*(x_P + x_Q + 2*a) - 2*a - x_R*(x_P - x_Q)^2)/(2*b*y_P), +// +// where R = Q + P and a,b are the Montgomery parameters. In our setting +// (a,b)=(0,1) and our points are P=Q, Q=[m]Q, P+Q=[m+1]Q, so this becomes +// +// y_{mQ} = ((x_Q*x_{mQ} + 1)*(x_Q + x_{mQ}) - x_{m1Q}*(x_Q - x_{mQ})^2)/(2*y_Q) +// +// y_{mQ} = ((1 - x_P*x_{mQ})*(x_{mQ} - x_P) - x_{m1Q}*(x_P + x_{mQ})^2)/(2*y_P*i) +// +// y_{mQ} = i*((1 - x_P*x_{mQ})*(x_{mQ} - x_P) - x_{m1Q}*(x_P + x_{mQ})^2)/(-2*y_P) +// +// since (x_Q, y_Q) = (-x_P, y_P*i). In projective coordinates this is +// +// Y_{mQ}' = ((Z_{mQ} - x_P*X_{mQ})*(X_{mQ} - x_P*Z_{mQ})*Z_{m1Q} +// - X_{m1Q}*(X_{mQ} + x_P*Z_{mQ})^2) +// +// with denominator +// +// Z_{mQ}' = (-2*y_P*Z_{mQ}*Z_{m1Q})*Z_{mQ}. +// +// Setting +// +// X_{mQ}' = (-2*y_P*Z_{mQ}*Z_{m1Q})*X_{mQ} +// +// gives [m]Q = (X_{mQ}' : i*Y_{mQ}' : Z_{mQ}') with X,Y,Z all in F_p. (Here +// the ' just denotes that we've added extra terms to the denominators during +// the computation of Y) +// +// To compute the x-coordinate x(P+[m]Q) from P and [m]Q, we use the affine +// addition formulas of section 2.2 of Costello-Smith. We're only interested +// in the x-coordinate, giving +// +// X_R = Z_{mQ}*(i*Y_{mQ} - y_P*Z_{mQ})^2 - (x_P*Z_{mQ} + X_{mQ})*(X_{mQ} - x_P*Z_{mQ})^2 +// +// Z_R = Z_{mQ}*(X_{mQ} - x_P*Z_{mQ})^2. +// +// Notice that although X_R \in F_{p^2}, we can split the computation into +// coordinates X_R = X_{R,a} + X_{R,b}*i as +// +// (i*Y_{mQ} - y_P*Z_{mQ})^2 = (y_P*Z_{mQ})^2 - Y_{mQ}^2 - 2*y_P*Z_{mQ}*Y_{mQ}*i, +// +// giving +// +// X_{R,a} = Z_{mQ}*((y_P*Z_{mQ})^2 - Y_{mQ}^2) +// - (x_P*Z_{mQ} + X_{mQ})*(X_{mQ} - x_P*Z_{mQ})^2 +// +// X_{R,b} = -2*y_P*Y_{mQ}*Z_{mQ}^2 +// +// Z_R = Z_{mQ}*(X_{mQ} - x_P*Z_{mQ})^2. +// +// These formulas could probably be combined with the formulas for y-recover +// and computed more efficiently, but efficiency isn't the biggest concern +// here, since the bulk of the cost is already in the ladder. +func SecretPoint(affine_xP, affine_yP *PrimeFieldElement, scalar []uint8) ProjectivePoint { + var xQ ProjectivePrimeFieldPoint + xQ.FromAffine(affine_xP) + xQ.X.Neg(&xQ.X) + + // Compute x([m]Q) = (X_{mQ} : Z_{mQ}), x([m+1]Q) = (X_{m1Q} : Z_{m1Q}) + var xmQ, xm1Q = ScalarMultPrimeField(&E0_aPlus2Over4, &xQ, scalar) + + // Now perform coordinate recovery: + // [m]Q = (X_{mQ} : Y_{mQ}*i : Z_{mQ}) + var XmQ, YmQ, ZmQ PrimeFieldElement + var t0, t1 PrimeFieldElement + + // Y_{mQ} = (Z_{mQ} - x_P*X_{mQ})*(X_{mQ} - x_P*Z_{mQ})*Z_{m1Q} + // - X_{m1Q}*(X_{mQ} + x_P*Z_{mQ})^2 + t0.Mul(affine_xP, &xmQ.X) // = x_P*X_{mQ} + YmQ.Sub(&xmQ.Z, &t0) // = Z_{mQ} - x_P*X_{mQ} + t1.Mul(affine_xP, &xmQ.Z) // = x_P*Z_{mQ} + t0.Sub(&xmQ.X, &t1) // = X_{mQ} - x_P*Z_{mQ} + YmQ.Mul(&YmQ, &t0) // = (Z_{mQ} - x_P*X_{mQ})*(X_{mQ} - x_P*Z_{mQ}) + YmQ.Mul(&YmQ, &xm1Q.Z) // = (Z_{mQ} - x_P*X_{mQ})*(X_{mQ} - x_P*Z_{mQ})*Z_{m1Q} + t1.Add(&t1, &xmQ.X).Square(&t1) // = (X_{mQ} + x_P*Z_{mQ})^2 + t1.Mul(&t1, &xm1Q.X) // = X_{m1Q}*(X_{mQ} + x_P*Z_{mQ})^2 + YmQ.Sub(&YmQ, &t1) // = Y_{mQ} + + // Z_{mQ} = -2*(Z_{mQ}^2 * Z_{m1Q} * y_P) + t0.Mul(&xmQ.Z, &xm1Q.Z).Mul(&t0, affine_yP) // = Z_{mQ} * Z_{m1Q} * y_P + t0.Neg(&t0) // = -1*(Z_{mQ} * Z_{m1Q} * y_P) + t0.Add(&t0, &t0) // = -2*(Z_{mQ} * Z_{m1Q} * y_P) + ZmQ.Mul(&xmQ.Z, &t0) // = -2*(Z_{mQ}^2 * Z_{m1Q} * y_P) + + // We added terms to the denominator Z_{mQ}, so multiply them to X_{mQ} + // X_{mQ} = -2*X_{mQ}*Z_{mQ}*Z_{m1Q}*y_P + XmQ.Mul(&xmQ.X, &t0) + + // Now compute x(P + [m]Q) = (X_Ra + i*X_Rb : Z_R) + var XRa, XRb, ZR PrimeFieldElement + + XRb.Square(&ZmQ).Mul(&XRb, &YmQ) // = Y_{mQ} * Z_{mQ}^2 + XRb.Mul(&XRb, affine_yP) // = Y_{mQ} * y_P * Z_{mQ}^2 + XRb.Add(&XRb, &XRb) // = 2 * Y_{mQ} * y_P * Z_{mQ}^2 + XRb.Neg(&XRb) // = -2 * Y_{mQ} * y_P * Z_{mQ}^2 + + t0.Mul(affine_yP, &ZmQ).Square(&t0) // = (y_P * Z_{mQ})^2 + t1.Square(&YmQ) // = Y_{mQ}^2 + XRa.Sub(&t0, &t1) // = (y_P * Z_{mQ})^2 - Y_{mQ}^2 + XRa.Mul(&XRa, &ZmQ) // = Z_{mQ}*((y_P * Z_{mQ})^2 - Y_{mQ}^2) + t0.Mul(affine_xP, &ZmQ) // = x_P * Z_{mQ} + t1.Add(&XmQ, &t0) // = X_{mQ} + x_P*Z_{mQ} + t0.Sub(&XmQ, &t0) // = X_{mQ} - x_P*Z_{mQ} + t0.Square(&t0) // = (X_{mQ} - x_P*Z_{mQ})^2 + t1.Mul(&t1, &t0) // = (X_{mQ} + x_P*Z_{mQ})*(X_{mQ} - x_P*Z_{mQ})^2 + XRa.Sub(&XRa, &t1) // = Z_{mQ}*((y_P*Z_{mQ})^2 - Y_{mQ}^2) - (X_{mQ} + x_P*Z_{mQ})*(X_{mQ} - x_P*Z_{mQ})^2 + + ZR.Mul(&ZmQ, &t0) // = Z_{mQ}*(X_{mQ} - x_P*Z_{mQ})^2 + + var xR ProjectivePoint + xR.X.A = XRa.A + xR.X.B = XRb.A + xR.Z.A = ZR.A + + return xR +} diff --git a/vendor/github.com/cloudflare/p751sidh/p751toolbox/curve_test.go b/vendor/github.com/cloudflare/p751sidh/p751toolbox/curve_test.go new file mode 100644 index 00000000..49db3a1b --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/p751toolbox/curve_test.go @@ -0,0 +1,318 @@ +package p751toolbox + +import ( + "math/rand" + "reflect" + "testing" + "testing/quick" +) + +// Sage script for generating test vectors: +// sage: p = 2^372 * 3^239 - 1; Fp = GF(p) +// sage: R. = Fp[] +// sage: Fp2 = Fp.extension(x^2 + 1, 'i') +// sage: i = Fp2.gen() +// sage: A = 4385300808024233870220415655826946795549183378139271271040522089756750951667981765872679172832050962894122367066234419550072004266298327417513857609747116903999863022476533671840646615759860564818837299058134292387429068536219*i + 1408083354499944307008104531475821995920666351413327060806684084512082259107262519686546161682384352696826343970108773343853651664489352092568012759783386151707999371397181344707721407830640876552312524779901115054295865393760 +// sage: C = 933177602672972392833143808100058748100491911694554386487433154761658932801917030685312352302083870852688835968069519091048283111836766101703759957146191882367397129269726925521881467635358356591977198680477382414690421049768*i + 9088894745865170214288643088620446862479558967886622582768682946704447519087179261631044546285104919696820250567182021319063155067584445633834024992188567423889559216759336548208016316396859149888322907914724065641454773776307 +// sage: E = EllipticCurve(Fp2, [0,A/C,0,1,0]) +// sage: X, Y, Z = (8172151271761071554796221948801462094972242987811852753144865524899433583596839357223411088919388342364651632180452081960511516040935428737829624206426287774255114241789158000915683252363913079335550843837650671094705509470594*i + 9326574858039944121604015439381720195556183422719505497448541073272720545047742235526963773359004021838961919129020087515274115525812121436661025030481584576474033630899768377131534320053412545346268645085054880212827284581557, 2381174772709336084066332457520782192315178511983342038392622832616744048226360647551642232950959910067260611740876401494529727990031260499974773548012283808741733925525689114517493995359390158666069816204787133942283380884077*i + 5378956232034228335189697969144556552783858755832284194802470922976054645696324118966333158267442767138528227968841257817537239745277092206433048875637709652271370008564179304718555812947398374153513738054572355903547642836171, 1) +// sage: P = E((X,Y,Z)) +// sage: X2, Y2, Z2 = 2*P +// sage: X3, Y3, Z3 = 3*P +// sage: m = 96550223052359874398280314003345143371473380422728857598463622014420884224892 + +// A = 4385300808024233870220415655826946795549183378139271271040522089756750951667981765872679172832050962894122367066234419550072004266298327417513857609747116903999863022476533671840646615759860564818837299058134292387429068536219*i + 1408083354499944307008104531475821995920666351413327060806684084512082259107262519686546161682384352696826343970108773343853651664489352092568012759783386151707999371397181344707721407830640876552312524779901115054295865393760 +var curve_A = ExtensionFieldElement{A: Fp751Element{0x8319eb18ca2c435e, 0x3a93beae72cd0267, 0x5e465e1f72fd5a84, 0x8617fa4150aa7272, 0x887da24799d62a13, 0xb079b31b3c7667fe, 0xc4661b150fa14f2e, 0xd4d2b2967bc6efd6, 0x854215a8b7239003, 0x61c5302ccba656c2, 0xf93194a27d6f97a2, 0x1ed9532bca75}, B: Fp751Element{0xb6f541040e8c7db6, 0x99403e7365342e15, 0x457e9cee7c29cced, 0x8ece72dc073b1d67, 0x6e73cef17ad28d28, 0x7aed836ca317472, 0x89e1de9454263b54, 0x745329277aa0071b, 0xf623dfc73bc86b9b, 0xb8e3c1d8a9245882, 0x6ad0b3d317770bec, 0x5b406e8d502b}} + +// C = 933177602672972392833143808100058748100491911694554386487433154761658932801917030685312352302083870852688835968069519091048283111836766101703759957146191882367397129269726925521881467635358356591977198680477382414690421049768*i + 9088894745865170214288643088620446862479558967886622582768682946704447519087179261631044546285104919696820250567182021319063155067584445633834024992188567423889559216759336548208016316396859149888322907914724065641454773776307 +var curve_C = ExtensionFieldElement{A: Fp751Element{0x4fb2358bbf723107, 0x3a791521ac79e240, 0x283e24ef7c4c922f, 0xc89baa1205e33cc, 0x3031be81cff6fee1, 0xaf7a494a2f6a95c4, 0x248d251eaac83a1d, 0xc122fca1e2550c88, 0xbc0451b11b6cfd3d, 0x9c0a114ab046222c, 0x43b957b32f21f6ea, 0x5b9c87fa61de}, B: Fp751Element{0xacf142afaac15ec6, 0xfd1322a504a071d5, 0x56bb205e10f6c5c6, 0xe204d2849a97b9bd, 0x40b0122202fe7f2e, 0xecf72c6fafacf2cb, 0x45dfc681f869f60a, 0x11814c9aff4af66c, 0x9278b0c4eea54fe7, 0x9a633d5baf7f2e2e, 0x69a329e6f1a05112, 0x1d874ace23e4}} + +var curve = ProjectiveCurveParameters{A: curve_A, C: curve_C} + +// x(P) = 8172151271761071554796221948801462094972242987811852753144865524899433583596839357223411088919388342364651632180452081960511516040935428737829624206426287774255114241789158000915683252363913079335550843837650671094705509470594*i + 9326574858039944121604015439381720195556183422719505497448541073272720545047742235526963773359004021838961919129020087515274115525812121436661025030481584576474033630899768377131534320053412545346268645085054880212827284581557 +var affine_xP = ExtensionFieldElement{A: Fp751Element{0xe8d05f30aac47247, 0x576ec00c55441de7, 0xbf1a8ec5fe558518, 0xd77cb17f77515881, 0x8e9852837ee73ec4, 0x8159634ad4f44a6b, 0x2e4eb5533a798c5, 0x9be8c4354d5bc849, 0xf47dc61806496b84, 0x25d0e130295120e0, 0xdbef54095f8139e3, 0x5a724f20862c}, B: Fp751Element{0x3ca30d7623602e30, 0xfb281eddf45f07b7, 0xd2bf62d5901a45bc, 0xc67c9baf86306dd2, 0x4e2bd93093f538ca, 0xcfd92075c25b9cbe, 0xceafe9a3095bcbab, 0x7d928ad380c85414, 0x37c5f38b2afdc095, 0x75325899a7b779f4, 0xf130568249f20fdd, 0x178f264767d1}} + +// x([2]P) = 1476586462090705633631615225226507185986710728845281579274759750260315746890216330325246185232948298241128541272709769576682305216876843626191069809810990267291824247158062860010264352034514805065784938198193493333201179504845*i + 3623708673253635214546781153561465284135688791018117615357700171724097420944592557655719832228709144190233454198555848137097153934561706150196041331832421059972652530564323645509890008896574678228045006354394485640545367112224 +var affine_xP2 = ExtensionFieldElement{A: Fp751Element{0x2a77afa8576ce979, 0xab1360e69b0aeba0, 0xd79e3e3cbffad660, 0x5fd0175aa10f106b, 0x1800ebafce9fbdbc, 0x228fc9142bdd6166, 0x867cf907314e34c3, 0xa58d18c94c13c31c, 0x699a5bc78b11499f, 0xa29fc29a01f7ccf1, 0x6c69c0c5347eebce, 0x38ecee0cc57}, B: Fp751Element{0x43607fd5f4837da0, 0x560bad4ce27f8f4a, 0x2164927f8495b4dd, 0x621103fdb831a997, 0xad740c4eea7db2db, 0x2cde0442205096cd, 0x2af51a70ede8324e, 0x41a4e680b9f3466, 0x5481f74660b8f476, 0xfcb2f3e656ff4d18, 0x42e3ce0837171acc, 0x44238c30530c}} + +// x([3]P) = 9351941061182433396254169746041546943662317734130813745868897924918150043217746763025923323891372857734564353401396667570940585840576256269386471444236630417779544535291208627646172485976486155620044292287052393847140181703665*i + 9010417309438761934687053906541862978676948345305618417255296028956221117900864204687119686555681136336037659036201780543527957809743092793196559099050594959988453765829339642265399496041485088089691808244290286521100323250273 +var affine_xP3 = ExtensionFieldElement{A: Fp751Element{0x2096e3f23feca947, 0xf36f635aa4ad8634, 0xdae3b1c6983c5e9a, 0xe08df6c262cb74b4, 0xd2ca4edc37452d3d, 0xfb5f3fe42f500c79, 0x73740aa3abc2b21f, 0xd535fd869f914cca, 0x4a558466823fb67f, 0x3e50a7a0e3bfc715, 0xf43c6da9183a132f, 0x61aca1e1b8b9}, B: Fp751Element{0x1e54ec26ea5077bd, 0x61380572d8769f9a, 0xc615170684f59818, 0x6309c3b93e84ef6e, 0x33c74b1318c3fcd0, 0xfe8d7956835afb14, 0x2d5a7b55423c1ecc, 0x869db67edfafea68, 0x1292632394f0a628, 0x10bba48225bfd141, 0x6466c28b408daba, 0x63cacfdb7c43}} + +// m = 96550223052359874398280314003345143371473380422728857598463622014420884224892 +var mScalarBytes = [...]uint8{124, 123, 149, 250, 180, 117, 108, 72, 140, 23, 85, 180, 73, 245, 30, 163, 11, 49, 240, 164, 166, 129, 173, 148, 81, 17, 231, 245, 91, 125, 117, 213} + +// x([a]P) = 7893578558852400052689739833699289348717964559651707250677393044951777272628231794999463214496545377542328262828965953246725804301238040891993859185944339366910592967840967752138115122568615081881937109746463885908097382992642*i + 8293895847098220389503562888233557012043261770526854885191188476280014204211818299871679993460086974249554528517413590157845430186202704783785316202196966198176323445986064452630594623103149383929503089342736311904030571524837 +var affine_xaP = ExtensionFieldElement{A: Fp751Element{0x2112f3c7d7f938bb, 0x704a677f0a4df08f, 0x825370e31fb4ef00, 0xddbf79b7469f902, 0x27640c899ea739fd, 0xfb7b8b19f244108e, 0x546a6679dd3baebc, 0xe9f0ecf398d5265f, 0x223d2b350e75e461, 0x84b322a0b6aff016, 0xfabe426f539f8b39, 0x4507a0604f50}, B: Fp751Element{0xac77737e5618a5fe, 0xf91c0e08c436ca52, 0xd124037bc323533c, 0xc9a772bf52c58b63, 0x3b30c8f38ef6af4d, 0xb9eed160e134f36e, 0x24e3836393b25017, 0xc828be1b11baf1d9, 0x7b7dab585df50e93, 0x1ca3852c618bd8e0, 0x4efa73bcb359fa00, 0x50b6a923c2d4}} + +var one = ExtensionFieldElement{A: Fp751Element{0x249ad, 0x0, 0x0, 0x0, 0x0, 0x8310000000000000, 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x2d5b24bce5e2}, B: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}} + +func TestOne(t *testing.T) { + var tmp ExtensionFieldElement + tmp.Mul(&one, &affine_xP) + if !tmp.VartimeEq(&affine_xP) { + t.Error("Not equal 1") + } +} + +func (P ProjectivePoint) Generate(rand *rand.Rand, size int) reflect.Value { + f := ExtensionFieldElement{} + x, _ := f.Generate(rand, size).Interface().(ExtensionFieldElement) + z, _ := f.Generate(rand, size).Interface().(ExtensionFieldElement) + return reflect.ValueOf(ProjectivePoint{ + X: x, + Z: z, + }) +} + +func (curve ProjectiveCurveParameters) Generate(rand *rand.Rand, size int) reflect.Value { + f := ExtensionFieldElement{} + A, _ := f.Generate(rand, size).Interface().(ExtensionFieldElement) + C, _ := f.Generate(rand, size).Interface().(ExtensionFieldElement) + return reflect.ValueOf(ProjectiveCurveParameters{ + A: A, + C: C, + }) +} + +func Test_jInvariant(t *testing.T) { + var curve = ProjectiveCurveParameters{A: curve_A, C: curve_C} + j := curve.JInvariant() + // Computed using Sage + // j = 3674553797500778604587777859668542828244523188705960771798425843588160903687122861541242595678107095655647237100722594066610650373491179241544334443939077738732728884873568393760629500307797547379838602108296735640313894560419*i + 3127495302417548295242630557836520229396092255080675419212556702820583041296798857582303163183558315662015469648040494128968509467224910895884358424271180055990446576645240058960358037224785786494172548090318531038910933793845 + known_j := ExtensionFieldElement{ + A: Fp751Element{0xc7a8921c1fb23993, 0xa20aea321327620b, 0xf1caa17ed9676fa8, 0x61b780e6b1a04037, 0x47784af4c24acc7a, 0x83926e2e300b9adf, 0xcd891d56fae5b66, 0x49b66985beb733bc, 0xd4bcd2a473d518f, 0xe242239991abe224, 0xa8af5b20f98672f8, 0x139e4d4e4d98}, + B: Fp751Element{0xb5b52a21f81f359, 0x715e3a865db6d920, 0x9bac2f9d8911978b, 0xef14acd8ac4c1e3d, 0xe81aacd90cfb09c8, 0xaf898288de4a09d9, 0xb85a7fb88c5c4601, 0x2c37c3f1dd303387, 0x7ad3277fe332367c, 0xd4cbee7f25a8e6f8, 0x36eacbe979eaeffa, 0x59eb5a13ac33}, + } + + if !j.VartimeEq(&known_j) { + t.Error("Computed incorrect j-invariant: found\n", j, "\nexpected\n", known_j) + } +} + +func TestProjectivePointVartimeEq(t *testing.T) { + xP := ProjectivePoint{X: affine_xP, Z: one} + xQ := xP + // Scale xQ, which results in the same projective point + xQ.X.Mul(&xQ.X, &curve_A) + xQ.Z.Mul(&xQ.Z, &curve_A) + if !xQ.VartimeEq(&xP) { + t.Error("Expected the scaled point to be equal to the original") + } +} + +func TestPointDoubleVersusSage(t *testing.T) { + var curve = ProjectiveCurveParameters{A: curve_A, C: curve_C} + var xP, xQ ProjectivePoint + xP = ProjectivePoint{X: affine_xP, Z: one} + affine_xQ := xQ.Pow2k(&curve, &xP, 1).ToAffine() + + if !affine_xQ.VartimeEq(&affine_xP2) { + t.Error("\nExpected\n", affine_xP2, "\nfound\n", affine_xQ) + } +} + +func TestPointTripleVersusSage(t *testing.T) { + var curve = ProjectiveCurveParameters{A: curve_A, C: curve_C} + var xP, xQ ProjectivePoint + xP = ProjectivePoint{X: affine_xP, Z: one} + affine_xQ := xQ.Pow3k(&curve, &xP, 1).ToAffine() + + if !affine_xQ.VartimeEq(&affine_xP3) { + t.Error("\nExpected\n", affine_xP3, "\nfound\n", affine_xQ) + } +} + +func TestPointPow2kVersusScalarMult(t *testing.T) { + var xP, xQ, xR ProjectivePoint + xP = ProjectivePoint{X: affine_xP, Z: one} + affine_xQ := xQ.Pow2k(&curve, &xP, 5).ToAffine() // = x([32]P) + affine_xR := xR.ScalarMult(&curve, &xP, []byte{32}).ToAffine() // = x([32]P) + + if !affine_xQ.VartimeEq(affine_xR) { + t.Error("\nExpected\n", affine_xQ, "\nfound\n", affine_xR) + } +} + +func TestScalarMultVersusSage(t *testing.T) { + xP := ProjectivePoint{X: affine_xP, Z: one} + affine_xQ := xP.ScalarMult(&curve, &xP, mScalarBytes[:]).ToAffine() // = x([m]P) + + if !affine_xaP.VartimeEq(affine_xQ) { + t.Error("\nExpected\n", affine_xaP, "\nfound\n", affine_xQ) + } +} + +func TestRecoverCurveParameters(t *testing.T) { + // Created using old public key generation code that output the a value: + var a = ExtensionFieldElement{A: Fp751Element{0x9331d9c5aaf59ea4, 0xb32b702be4046931, 0xcebb333912ed4d34, 0x5628ce37cd29c7a2, 0xbeac5ed48b7f58e, 0x1fb9d3e281d65b07, 0x9c0cfacc1e195662, 0xae4bce0f6b70f7d9, 0x59e4e63d43fe71a0, 0xef7ce57560cc8615, 0xe44a8fb7901e74e8, 0x69d13c8366d1}, B: Fp751Element{0xf6da1070279ab966, 0xa78fb0ce7268c762, 0x19b40f044a57abfa, 0x7ac8ee6160c0c233, 0x93d4993442947072, 0x757d2b3fa4e44860, 0x73a920f8c4d5257, 0x2031f1b054734037, 0xdefaa1d2406555cd, 0x26f9c70e1496be3d, 0x5b3f335a0a4d0976, 0x13628b2e9c59}} + var affine_xP = ExtensionFieldElement{A: Fp751Element{0xea6b2d1e2aebb250, 0x35d0b205dc4f6386, 0xb198e93cb1830b8d, 0x3b5b456b496ddcc6, 0x5be3f0d41132c260, 0xce5f188807516a00, 0x54f3e7469ea8866d, 0x33809ef47f36286, 0x6fa45f83eabe1edb, 0x1b3391ae5d19fd86, 0x1e66daf48584af3f, 0xb430c14aaa87}, B: Fp751Element{0x97b41ebc61dcb2ad, 0x80ead31cb932f641, 0x40a940099948b642, 0x2a22fd16cdc7fe84, 0xaabf35b17579667f, 0x76c1d0139feb4032, 0x71467e1e7b1949be, 0x678ca8dadd0d6d81, 0x14445daea9064c66, 0x92d161eab4fa4691, 0x8dfbb01b6b238d36, 0x2e3718434e4e}} + var affine_xQ = ExtensionFieldElement{A: Fp751Element{0xb055cf0ca1943439, 0xa9ff5de2fa6c69ed, 0x4f2761f934e5730a, 0x61a1dcaa1f94aa4b, 0xce3c8fadfd058543, 0xeac432aaa6701b8e, 0x8491d523093aea8b, 0xba273f9bd92b9b7f, 0xd8f59fd34439bb5a, 0xdc0350261c1fe600, 0x99375ab1eb151311, 0x14d175bbdbc5}, B: Fp751Element{0xffb0ef8c2111a107, 0x55ceca3825991829, 0xdbf8a1ccc075d34b, 0xb8e9187bd85d8494, 0x670aa2d5c34a03b0, 0xef9fe2ed2b064953, 0xc911f5311d645aee, 0xf4411f409e410507, 0x934a0a852d03e1a8, 0xe6274e67ae1ad544, 0x9f4bc563c69a87bc, 0x6f316019681e}} + var affine_xQmP = ExtensionFieldElement{A: Fp751Element{0x6ffb44306a153779, 0xc0ffef21f2f918f3, 0x196c46d35d77f778, 0x4a73f80452edcfe6, 0x9b00836bce61c67f, 0x387879418d84219e, 0x20700cf9fc1ec5d1, 0x1dfe2356ec64155e, 0xf8b9e33038256b1c, 0xd2aaf2e14bada0f0, 0xb33b226e79a4e313, 0x6be576fad4e5}, B: Fp751Element{0x7db5dbc88e00de34, 0x75cc8cb9f8b6e11e, 0x8c8001c04ebc52ac, 0x67ef6c981a0b5a94, 0xc3654fbe73230738, 0xc6a46ee82983ceca, 0xed1aa61a27ef49f0, 0x17fe5a13b0858fe0, 0x9ae0ca945a4c6b3c, 0x234104a218ad8878, 0xa619627166104394, 0x556a01ff2e7e}} + + var curveParams = RecoverCurveParameters(&affine_xP, &affine_xQ, &affine_xQmP) + + var tmp ExtensionFieldElement + tmp.Inv(&curveParams.C).Mul(&tmp, &curveParams.A) + + if !tmp.VartimeEq(&a) { + t.Error("\nExpected\n", a, "\nfound\n", tmp) + } +} + +var threePointLadderInputs = [3]ProjectivePoint{ + // x(P) + ProjectivePoint{ + X: ExtensionFieldElement{A: Fp751Element{0xe8d05f30aac47247, 0x576ec00c55441de7, 0xbf1a8ec5fe558518, 0xd77cb17f77515881, 0x8e9852837ee73ec4, 0x8159634ad4f44a6b, 0x2e4eb5533a798c5, 0x9be8c4354d5bc849, 0xf47dc61806496b84, 0x25d0e130295120e0, 0xdbef54095f8139e3, 0x5a724f20862c}, B: Fp751Element{0x3ca30d7623602e30, 0xfb281eddf45f07b7, 0xd2bf62d5901a45bc, 0xc67c9baf86306dd2, 0x4e2bd93093f538ca, 0xcfd92075c25b9cbe, 0xceafe9a3095bcbab, 0x7d928ad380c85414, 0x37c5f38b2afdc095, 0x75325899a7b779f4, 0xf130568249f20fdd, 0x178f264767d1}}, + Z: oneExtensionField, + }, + // x(Q) + ProjectivePoint{ + X: ExtensionFieldElement{A: Fp751Element{0x2b71a2a93ad1e10e, 0xf0b9842a92cfb333, 0xae17373615a27f5c, 0x3039239f428330c4, 0xa0c4b735ed7dcf98, 0x6e359771ddf6af6a, 0xe986e4cac4584651, 0x8233a2b622d5518, 0xbfd67bf5f06b818b, 0xdffe38d0f5b966a6, 0xa86b36a3272ee00a, 0x193e2ea4f68f}, B: Fp751Element{0x5a0f396459d9d998, 0x479f42250b1b7dda, 0x4016b57e2a15bf75, 0xc59f915203fa3749, 0xd5f90257399cf8da, 0x1fb2dadfd86dcef4, 0x600f20e6429021dc, 0x17e347d380c57581, 0xc1b0d5fa8fe3e440, 0xbcf035330ac20e8, 0x50c2eb5f6a4f03e6, 0x86b7c4571}}, + Z: oneExtensionField, + }, + // x(P-Q) + ProjectivePoint{ + X: ExtensionFieldElement{A: Fp751Element{0x4aafa9f378f7b5ff, 0x1172a683aa8eee0, 0xea518d8cbec2c1de, 0xe191bcbb63674557, 0x97bc19637b259011, 0xdbeae5c9f4a2e454, 0x78f64d1b72a42f95, 0xe71cb4ea7e181e54, 0xe4169d4c48543994, 0x6198c2286a98730f, 0xd21d675bbab1afa5, 0x2e7269fce391}, B: Fp751Element{0x23355783ce1d0450, 0x683164cf4ce3d93f, 0xae6d1c4d25970fd8, 0x7807007fb80b48cf, 0xa005a62ec2bbb8a2, 0x6b5649bd016004cb, 0xbb1a13fa1330176b, 0xbf38e51087660461, 0xe577fddc5dd7b930, 0x5f38116f56947cd3, 0x3124f30b98c36fde, 0x4ca9b6e6db37}}, + Z: oneExtensionField, + }, +} + +func TestThreePointLadderVersusSage(t *testing.T) { + var xR ProjectivePoint + xR.ThreePointLadder(&curve, &threePointLadderInputs[0], &threePointLadderInputs[1], &threePointLadderInputs[2], mScalarBytes[:]) + + affine_xR := xR.ToAffine() + + sageAffine_xR := ExtensionFieldElement{A: Fp751Element{0x729465ba800d4fd5, 0x9398015b59e514a1, 0x1a59dd6be76c748e, 0x1a7db94eb28dd55c, 0x444686e680b1b8ec, 0xcc3d4ace2a2454ff, 0x51d3dab4ec95a419, 0xc3b0f33594acac6a, 0x9598a74e7fd44f8a, 0x4fbf8c638f1c2e37, 0x844e347033052f51, 0x6cd6de3eafcf}, B: Fp751Element{0x85da145412d73430, 0xd83c0e3b66eb3232, 0xd08ff2d453ec1369, 0xa64aaacfdb395b13, 0xe9cba211a20e806e, 0xa4f80b175d937cfc, 0x556ce5c64b1f7937, 0xb59b39ea2b3fdf7a, 0xc2526b869a4196b3, 0x8dad90bca9371750, 0xdfb4a30c9d9147a2, 0x346d2130629b}} + + if !affine_xR.VartimeEq(&sageAffine_xR) { + t.Error("\nExpected\n", sageAffine_xR, "\nfound\n", affine_xR) + } +} + +func TestPointTripleVersusAddDouble(t *testing.T) { + tripleEqualsAddDouble := func(curve ProjectiveCurveParameters, P ProjectivePoint) bool { + + cachedParams := curve.cachedParams() + var P2, P3, P2plusP ProjectivePoint + P2.Double(&P, &cachedParams) // = x([2]P) + P3.Triple(&P, &cachedParams) // = x([3]P) + P2plusP.Add(&P2, &P, &P) // = x([2]P + P) + + return P3.VartimeEq(&P2plusP) + } + + if err := quick.Check(tripleEqualsAddDouble, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestScalarMultPrimeFieldAndCoordinateRecoveryVersusSageGeneratedTorsionPoints(t *testing.T) { + // x((11,...)) = 11 + var x11 = ProjectivePrimeFieldPoint{X: PrimeFieldElement{A: Fp751Element{0x192a73, 0x0, 0x0, 0x0, 0x0, 0xe6f0000000000000, 0x19024ab93916c5c3, 0x1dcd18cf68876318, 0x7d8c830e0c47ba23, 0x3588ea6a9388299a, 0x8259082aa8e3256c, 0x33533f160446}}, Z: onePrimeField} + // y((11,...)) = oddsqrt(11^3 + 11) + var y11 = PrimeFieldElement{A: Fp751Element{0xd38a264df57f3c8a, 0x9c0450d25042dcdf, 0xaf1ab7be7bbed0b6, 0xa307981c42b29630, 0x845a7e79e0fa2ecb, 0x7ef77ef732108f55, 0x97b5836751081f0d, 0x59e3d115f5275ff4, 0x9a02736282284916, 0xec39f71196540e99, 0xf8b521b28dcc965a, 0x6af0b9d7f54c}} + + // x((6,...)) = 6 + var x6 = ProjectivePrimeFieldPoint{X: PrimeFieldElement{A: Fp751Element{0xdba10, 0x0, 0x0, 0x0, 0x0, 0x3500000000000000, 0x3714fe4eb8399915, 0xc3a2584753eb43f4, 0xa3151d605c520428, 0xc116cf5232c7c978, 0x49a84d4b8efaf6aa, 0x305731e97514}}, Z: onePrimeField} + // y((6,...)) = oddsqrt(6^3 + 6) + var y6 = PrimeFieldElement{A: Fp751Element{0xe4786c67ba55ff3c, 0x6ffa02bcc2a148e0, 0xe1c5d019df326e2a, 0x232148910f712e87, 0x6ade324bee99c196, 0x4372f82c6bb821f3, 0x91a374a15d391ec4, 0x6e98998b110b7c75, 0x2e093f44d4eeb574, 0x33cdd14668840958, 0xb017cea89e353067, 0x6f907085d4b7}} + + // Little-endian bytes of 3^239 + var three239Bytes = [...]byte{235, 142, 138, 135, 159, 84, 104, 201, 62, 110, 199, 124, 63, 161, 177, 89, 169, 109, 135, 190, 110, 125, 134, 233, 132, 128, 116, 37, 203, 69, 80, 43, 86, 104, 198, 173, 123, 249, 9, 41, 225, 192, 113, 31, 84, 93, 254, 6} + // Little-endian bytes of 2^372 + var two372Bytes = [...]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16} + + // E_0 : y^2 = x^3 + x has a = 0, so (a+2)/4 = 1/2 + var aPlus2Over4 = PrimeFieldElement{A: Fp751Element{0x124d6, 0x0, 0x0, 0x0, 0x0, 0xb8e0000000000000, 0x9c8a2434c0aa7287, 0xa206996ca9a378a3, 0x6876280d41a41b52, 0xe903b49f175ce04f, 0xf8511860666d227, 0x4ea07cff6e7f}} + + // Compute x(P_A) = x([3^239](11,...)) and x([3^239 + 1](11,...)) + var xPA, xPAplus11 = ScalarMultPrimeField(&aPlus2Over4, &x11, three239Bytes[:]) + + // Compute x(P_B) = x([2^372](6,...)) and x([2^372 + 1](6,...)) + var xPB, xPBplus6 = ScalarMultPrimeField(&aPlus2Over4, &x6, two372Bytes[:]) + + // Check that the computed x-coordinates are correct: + + var testAffine_xPA = xPA.ToAffine() + if !testAffine_xPA.VartimeEq(&Affine_xPA) { + t.Error("Recomputed x(P_A) incorrectly: found\n", Affine_xPA, "\nexpected\n", testAffine_xPA) + } + + var testAffine_xPB = xPB.ToAffine() + if !testAffine_xPB.VartimeEq(&Affine_xPB) { + t.Error("Recomputed x(P_A) incorrectly: found\n", Affine_xPB, "\nexpected\n", testAffine_xPB) + } + + // Recover y-coordinates and check that those are correct: + var invZ_A, invZ_B PrimeFieldElement + + var X_A, Y_A, Z_A = OkeyaSakuraiCoordinateRecovery(&x11.X, &y11, &xPA, &xPAplus11) + invZ_A.Inv(&Z_A) + Y_A.Mul(&Y_A, &invZ_A) // = Y_A / Z_A + X_A.Mul(&X_A, &invZ_A) // = X_A / Z_A + if !Affine_yPA.VartimeEq(&Y_A) { + t.Error("Recovered y(P_A) incorrectly: found\n", Y_A, "\nexpected\n", Affine_yPA) + } + if !Affine_xPA.VartimeEq(&X_A) { + t.Error("Recovered x(P_A) incorrectly: found\n", X_A, "\nexpected\n", Affine_xPA) + } + + var X_B, Y_B, Z_B = OkeyaSakuraiCoordinateRecovery(&x6.X, &y6, &xPB, &xPBplus6) + invZ_B.Inv(&Z_B) + Y_B.Mul(&Y_B, &invZ_B) // = Y_B / Z_B + X_B.Mul(&X_B, &invZ_B) // = X_B / Z_B + if !Affine_yPB.VartimeEq(&Y_B) { + t.Error("Recovered y(P_B) incorrectly: found\n", Y_B, "\nexpected\n", Affine_yPB) + } + if !Affine_xPB.VartimeEq(&X_B) { + t.Error("Recovered x(P_B) incorrectly: found\n", X_B, "\nexpected\n", Affine_xPB) + } +} + +func BenchmarkPointAddition(b *testing.B) { + var xP = ProjectivePoint{X: curve_A, Z: curve_C} + var xP2, xP3 ProjectivePoint + cachedParams := curve.cachedParams() + xP2.Double(&xP, &cachedParams) + + for n := 0; n < b.N; n++ { + xP3.Add(&xP2, &xP, &xP) + } +} + +func BenchmarkPointDouble(b *testing.B) { + var xP = ProjectivePoint{X: curve_A, Z: curve_C} + cachedParams := curve.cachedParams() + + for n := 0; n < b.N; n++ { + xP.Double(&xP, &cachedParams) + } +} + +func BenchmarkPointTriple(b *testing.B) { + var xP = ProjectivePoint{X: curve_A, Z: curve_C} + cachedParams := curve.cachedParams() + + for n := 0; n < b.N; n++ { + xP.Triple(&xP, &cachedParams) + } +} + +func BenchmarkScalarMult379BitScalar(b *testing.B) { + var xR ProjectivePoint + var mScalarBytes = [...]uint8{84, 222, 146, 63, 85, 18, 173, 162, 167, 38, 10, 8, 143, 176, 93, 228, 247, 128, 50, 128, 205, 42, 15, 137, 119, 67, 43, 3, 61, 91, 237, 24, 235, 12, 53, 96, 186, 164, 232, 223, 197, 224, 64, 109, 137, 63, 246, 4} + + for n := 0; n < b.N; n++ { + xR.ScalarMult(&curve, &threePointLadderInputs[0], mScalarBytes[:]) + } +} + +func BenchmarkThreePointLadder379BitScalar(b *testing.B) { + var xR ProjectivePoint + var mScalarBytes = [...]uint8{84, 222, 146, 63, 85, 18, 173, 162, 167, 38, 10, 8, 143, 176, 93, 228, 247, 128, 50, 128, 205, 42, 15, 137, 119, 67, 43, 3, 61, 91, 237, 24, 235, 12, 53, 96, 186, 164, 232, 223, 197, 224, 64, 109, 137, 63, 246, 4} + + for n := 0; n < b.N; n++ { + xR.ThreePointLadder(&curve, &threePointLadderInputs[0], &threePointLadderInputs[1], &threePointLadderInputs[2], mScalarBytes[:]) + } +} diff --git a/vendor/github.com/cloudflare/p751sidh/p751toolbox/field.go b/vendor/github.com/cloudflare/p751sidh/p751toolbox/field.go new file mode 100644 index 00000000..cbace381 --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/p751toolbox/field.go @@ -0,0 +1,598 @@ +package p751toolbox + +//------------------------------------------------------------------------------ +// Extension Field +//------------------------------------------------------------------------------ + +// Represents an element of the extension field F_{p^2}. +type ExtensionFieldElement struct { + // This field element is in Montgomery form, so that the value `A` is + // represented by `aR mod p`. + A Fp751Element + // This field element is in Montgomery form, so that the value `B` is + // represented by `bR mod p`. + B Fp751Element +} + +var zeroExtensionField = ExtensionFieldElement{ + A: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + B: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, +} + +var oneExtensionField = ExtensionFieldElement{ + A: Fp751Element{0x249ad, 0x0, 0x0, 0x0, 0x0, 0x8310000000000000, 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x2d5b24bce5e2}, + B: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, +} + +// Set dest = 0. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Zero() *ExtensionFieldElement { + *dest = zeroExtensionField + return dest +} + +// Set dest = 1. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) One() *ExtensionFieldElement { + *dest = oneExtensionField + return dest +} + +// Set dest = lhs * rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Mul(lhs, rhs *ExtensionFieldElement) *ExtensionFieldElement { + // Let (a,b,c,d) = (lhs.a,lhs.b,rhs.a,rhs.b). + a := &lhs.A + b := &lhs.B + c := &rhs.A + d := &rhs.B + + // We want to compute + // + // (a + bi)*(c + di) = (a*c - b*d) + (a*d + b*c)i + // + // Use Karatsuba's trick: note that + // + // (b - a)*(c - d) = (b*c + a*d) - a*c - b*d + // + // so (a*d + b*c) = (b-a)*(c-d) + a*c + b*d. + + var ac, bd fp751X2 + fp751Mul(&ac, a, c) // = a*c*R*R + fp751Mul(&bd, b, d) // = b*d*R*R + + var b_minus_a, c_minus_d Fp751Element + fp751SubReduced(&b_minus_a, b, a) // = (b-a)*R + fp751SubReduced(&c_minus_d, c, d) // = (c-d)*R + + var ad_plus_bc fp751X2 + fp751Mul(&ad_plus_bc, &b_minus_a, &c_minus_d) // = (b-a)*(c-d)*R*R + fp751X2AddLazy(&ad_plus_bc, &ad_plus_bc, &ac) // = ((b-a)*(c-d) + a*c)*R*R + fp751X2AddLazy(&ad_plus_bc, &ad_plus_bc, &bd) // = ((b-a)*(c-d) + a*c + b*d)*R*R + + fp751MontgomeryReduce(&dest.B, &ad_plus_bc) // = (a*d + b*c)*R mod p + + var ac_minus_bd fp751X2 + fp751X2SubLazy(&ac_minus_bd, &ac, &bd) // = (a*c - b*d)*R*R + fp751MontgomeryReduce(&dest.A, &ac_minus_bd) // = (a*c - b*d)*R mod p + + return dest +} + +// Set dest = -x +// +// Allowed to overlap dest with x. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Neg(x *ExtensionFieldElement) *ExtensionFieldElement { + dest.Sub(&zeroExtensionField, x) + return dest +} + +// Set dest = 1/x +// +// Allowed to overlap dest with x. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Inv(x *ExtensionFieldElement) *ExtensionFieldElement { + a := &x.A + b := &x.B + + // We want to compute + // + // 1 1 (a - bi) (a - bi) + // -------- = -------- -------- = ----------- + // (a + bi) (a + bi) (a - bi) (a^2 + b^2) + // + // Letting c = 1/(a^2 + b^2), this is + // + // 1/(a+bi) = a*c - b*ci. + + var asq_plus_bsq PrimeFieldElement + var asq, bsq fp751X2 + fp751Mul(&asq, a, a) // = a*a*R*R + fp751Mul(&bsq, b, b) // = b*b*R*R + fp751X2AddLazy(&asq, &asq, &bsq) // = (a^2 + b^2)*R*R + fp751MontgomeryReduce(&asq_plus_bsq.A, &asq) // = (a^2 + b^2)*R mod p + // Now asq_plus_bsq = a^2 + b^2 + + var asq_plus_bsq_inv PrimeFieldElement + asq_plus_bsq_inv.Inv(&asq_plus_bsq) + c := &asq_plus_bsq_inv.A + + var ac fp751X2 + fp751Mul(&ac, a, c) + fp751MontgomeryReduce(&dest.A, &ac) + + var minus_b Fp751Element + fp751SubReduced(&minus_b, &minus_b, b) + var minus_bc fp751X2 + fp751Mul(&minus_bc, &minus_b, c) + fp751MontgomeryReduce(&dest.B, &minus_bc) + + return dest +} + +// Set (y1, y2, y3) = (1/x1, 1/x2, 1/x3). +// +// All xi, yi must be distinct. +func ExtensionFieldBatch3Inv(x1, x2, x3, y1, y2, y3 *ExtensionFieldElement) { + var x1x2, t ExtensionFieldElement + x1x2.Mul(x1, x2) // x1*x2 + t.Mul(&x1x2, x3).Inv(&t) // 1/(x1*x2*x3) + y1.Mul(&t, x2).Mul(y1, x3) // 1/x1 + y2.Mul(&t, x1).Mul(y2, x3) // 1/x2 + y3.Mul(&t, &x1x2) // 1/x3 +} + +// Set dest = x * x +// +// Allowed to overlap dest with x. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Square(x *ExtensionFieldElement) *ExtensionFieldElement { + a := &x.A + b := &x.B + + // We want to compute + // + // (a + bi)*(a + bi) = (a^2 - b^2) + 2abi. + + var a2, a_plus_b, a_minus_b Fp751Element + fp751AddReduced(&a2, a, a) // = a*R + a*R = 2*a*R + fp751AddReduced(&a_plus_b, a, b) // = a*R + b*R = (a+b)*R + fp751SubReduced(&a_minus_b, a, b) // = a*R - b*R = (a-b)*R + + var asq_minus_bsq, ab2 fp751X2 + fp751Mul(&asq_minus_bsq, &a_plus_b, &a_minus_b) // = (a+b)*(a-b)*R*R = (a^2 - b^2)*R*R + fp751Mul(&ab2, &a2, b) // = 2*a*b*R*R + + fp751MontgomeryReduce(&dest.A, &asq_minus_bsq) // = (a^2 - b^2)*R mod p + fp751MontgomeryReduce(&dest.B, &ab2) // = 2*a*b*R mod p + + return dest +} + +// Set dest = lhs + rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Add(lhs, rhs *ExtensionFieldElement) *ExtensionFieldElement { + fp751AddReduced(&dest.A, &lhs.A, &rhs.A) + fp751AddReduced(&dest.B, &lhs.B, &rhs.B) + + return dest +} + +// Set dest = lhs - rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Sub(lhs, rhs *ExtensionFieldElement) *ExtensionFieldElement { + fp751SubReduced(&dest.A, &lhs.A, &rhs.A) + fp751SubReduced(&dest.B, &lhs.B, &rhs.B) + + return dest +} + +// If choice = 1u8, set (x,y) = (y,x). If choice = 0u8, set (x,y) = (x,y). +// +// Returns dest to allow chaining operations. +func ExtensionFieldConditionalSwap(x, y *ExtensionFieldElement, choice uint8) { + fp751ConditionalSwap(&x.A, &y.A, choice) + fp751ConditionalSwap(&x.B, &y.B, choice) +} + +// Set dest = if choice == 0 { x } else { y }, in constant time. +// +// Can overlap z with x or y or both. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) ConditionalAssign(x, y *ExtensionFieldElement, choice uint8) *ExtensionFieldElement { + fp751ConditionalAssign(&dest.A, &x.A, &y.A, choice) + fp751ConditionalAssign(&dest.B, &x.B, &y.B, choice) + + return dest +} + +// Returns true if lhs = rhs. Takes variable time. +func (lhs *ExtensionFieldElement) VartimeEq(rhs *ExtensionFieldElement) bool { + return lhs.A.vartimeEq(rhs.A) && lhs.B.vartimeEq(rhs.B) +} + +// Convert the input to wire format. +// +// The output byte slice must be at least 188 bytes long. +func (x *ExtensionFieldElement) ToBytes(output []byte) { + if len(output) < 188 { + panic("output byte slice too short, need 188 bytes") + } + x.A.toBytesFromMontgomeryForm(output[0:94]) + x.B.toBytesFromMontgomeryForm(output[94:188]) +} + +// Read 188 bytes into the given ExtensionFieldElement. +// +// It is an error to call this function if the input byte slice is less than 188 bytes long. +func (x *ExtensionFieldElement) FromBytes(input []byte) { + if len(input) < 188 { + panic("input byte slice too short, need 188 bytes") + } + x.A.montgomeryFormFromBytes(input[:94]) + x.B.montgomeryFormFromBytes(input[94:188]) +} + +//------------------------------------------------------------------------------ +// Prime Field +//------------------------------------------------------------------------------ + +// Represents an element of the prime field F_p. +type PrimeFieldElement struct { + // This field element is in Montgomery form, so that the value `A` is + // represented by `aR mod p`. + A Fp751Element +} + +var zeroPrimeField = PrimeFieldElement{ + A: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, +} + +var onePrimeField = PrimeFieldElement{ + A: Fp751Element{0x249ad, 0x0, 0x0, 0x0, 0x0, 0x8310000000000000, 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x2d5b24bce5e2}, +} + +// Set dest = 0. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Zero() *PrimeFieldElement { + *dest = zeroPrimeField + return dest +} + +// Set dest = 1. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) One() *PrimeFieldElement { + *dest = onePrimeField + return dest +} + +// Set dest to x. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) SetUint64(x uint64) *PrimeFieldElement { + var xRR fp751X2 + dest.A = Fp751Element{} // = 0 + dest.A[0] = x // = x + fp751Mul(&xRR, &dest.A, &montgomeryRsq) // = x*R*R + fp751MontgomeryReduce(&dest.A, &xRR) // = x*R mod p + + return dest +} + +// Set dest = lhs * rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Mul(lhs, rhs *PrimeFieldElement) *PrimeFieldElement { + a := &lhs.A // = a*R + b := &rhs.A // = b*R + + var ab fp751X2 + fp751Mul(&ab, a, b) // = a*b*R*R + fp751MontgomeryReduce(&dest.A, &ab) // = a*b*R mod p + + return dest +} + +// Set dest = x^(2^k), for k >= 1, by repeated squarings. +// +// Allowed to overlap x with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Pow2k(x *PrimeFieldElement, k uint8) *PrimeFieldElement { + dest.Square(x) + for i := uint8(1); i < k; i++ { + dest.Square(dest) + } + + return dest +} + +// Set dest = x^2 +// +// Allowed to overlap x with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Square(x *PrimeFieldElement) *PrimeFieldElement { + a := &x.A // = a*R + b := &x.A // = b*R + + var ab fp751X2 + fp751Mul(&ab, a, b) // = a*b*R*R + fp751MontgomeryReduce(&dest.A, &ab) // = a*b*R mod p + + return dest +} + +// Set dest = -x +// +// Allowed to overlap x with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Neg(x *PrimeFieldElement) *PrimeFieldElement { + dest.Sub(&zeroPrimeField, x) + return dest +} + +// Set dest = lhs + rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Add(lhs, rhs *PrimeFieldElement) *PrimeFieldElement { + fp751AddReduced(&dest.A, &lhs.A, &rhs.A) + + return dest +} + +// Set dest = lhs - rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Sub(lhs, rhs *PrimeFieldElement) *PrimeFieldElement { + fp751SubReduced(&dest.A, &lhs.A, &rhs.A) + + return dest +} + +// Returns true if lhs = rhs. Takes variable time. +func (lhs *PrimeFieldElement) VartimeEq(rhs *PrimeFieldElement) bool { + return lhs.A.vartimeEq(rhs.A) +} + +// If choice = 1u8, set (x,y) = (y,x). If choice = 0u8, set (x,y) = (x,y). +// +// Returns dest to allow chaining operations. +func PrimeFieldConditionalSwap(x, y *PrimeFieldElement, choice uint8) { + fp751ConditionalSwap(&x.A, &y.A, choice) +} + +// Set dest = if choice == 0 { x } else { y }, in constant time. +// +// Can overlap z with x or y or both. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) ConditionalAssign(x, y *PrimeFieldElement, choice uint8) *PrimeFieldElement { + fp751ConditionalAssign(&dest.A, &x.A, &y.A, choice) + + return dest +} + +// Set dest = sqrt(x), if x is a square. If x is nonsquare dest is undefined. +// +// Allowed to overlap x with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Sqrt(x *PrimeFieldElement) *PrimeFieldElement { + tmp_x := *x // Copy x in case dest == x + // Since x is assumed to be square, x = y^2 + dest.P34(x) // dest = (y^2)^((p-3)/4) = y^((p-3)/2) + dest.Mul(dest, &tmp_x) // dest = y^2 * y^((p-3)/2) = y^((p+1)/2) + // Now dest^2 = y^(p+1) = y^2 = x, so dest = sqrt(x) + + return dest +} + +// Set dest = 1/x. +// +// Allowed to overlap x with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Inv(x *PrimeFieldElement) *PrimeFieldElement { + tmp_x := *x // Copy x in case dest == x + dest.Square(x) // dest = x^2 + dest.P34(dest) // dest = (x^2)^((p-3)/4) = x^((p-3)/2) + dest.Square(dest) // dest = x^(p-3) + dest.Mul(dest, &tmp_x) // dest = x^(p-2) + + return dest +} + +// Set dest = x^((p-3)/4). If x is square, this is 1/sqrt(x). +// +// Allowed to overlap x with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) P34(x *PrimeFieldElement) *PrimeFieldElement { + // Sliding-window strategy computed with Sage, awk, sed, and tr. + // + // This performs sum(powStrategy) = 744 squarings and len(mulStrategy) + // = 137 multiplications, in addition to 1 squaring and 15 + // multiplications to build a lookup table. + // + // In total this is 745 squarings, 152 multiplications. Since squaring + // is not implemented for the prime field, this is 897 multiplications + // in total. + powStrategy := [137]uint8{5, 7, 6, 2, 10, 4, 6, 9, 8, 5, 9, 4, 7, 5, 5, 4, 8, 3, 9, 5, 5, 4, 10, 4, 6, 6, 6, 5, 8, 9, 3, 4, 9, 4, 5, 6, 6, 2, 9, 4, 5, 5, 5, 7, 7, 9, 4, 6, 4, 8, 5, 8, 6, 6, 2, 9, 7, 4, 8, 8, 8, 4, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2} + mulStrategy := [137]uint8{31, 23, 21, 1, 31, 7, 7, 7, 9, 9, 19, 15, 23, 23, 11, 7, 25, 5, 21, 17, 11, 5, 17, 7, 11, 9, 23, 9, 1, 19, 5, 3, 25, 15, 11, 29, 31, 1, 29, 11, 13, 9, 11, 27, 13, 19, 15, 31, 3, 29, 23, 31, 25, 11, 1, 21, 19, 15, 15, 21, 29, 13, 23, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 3} + initialMul := uint8(27) + + // Build a lookup table of odd multiples of x. + lookup := [16]PrimeFieldElement{} + xx := &PrimeFieldElement{} + xx.Square(x) // Set xx = x^2 + lookup[0] = *x + for i := 1; i < 16; i++ { + lookup[i].Mul(&lookup[i-1], xx) + } + // Now lookup = {x, x^3, x^5, ... } + // so that lookup[i] = x^{2*i + 1} + // so that lookup[k/2] = x^k, for odd k + + *dest = lookup[initialMul/2] + for i := uint8(0); i < 137; i++ { + dest.Pow2k(dest, powStrategy[i]) + dest.Mul(dest, &lookup[mulStrategy[i]/2]) + } + + return dest +} + +//------------------------------------------------------------------------------ +// Internals +//------------------------------------------------------------------------------ + +const fp751NumWords = 12 + +// (2^768) mod p. +// This can't be a constant because Go doesn't allow array constants, so try +// not to modify it. +var montgomeryR = Fp751Element{149933, 0, 0, 0, 0, 9444048418595930112, 6136068611055053926, 7599709743867700432, 14455912356952952366, 5522737203492907350, 1222606818372667369, 49869481633250} + +// (2^768)^2 mod p +// This can't be a constant because Go doesn't allow array constants, so try +// not to modify it. +var montgomeryRsq = Fp751Element{2535603850726686808, 15780896088201250090, 6788776303855402382, 17585428585582356230, 5274503137951975249, 2266259624764636289, 11695651972693921304, 13072885652150159301, 4908312795585420432, 6229583484603254826, 488927695601805643, 72213483953973} + +// Internal representation of an element of the base field F_p. +// +// This type is distinct from PrimeFieldElement in that no particular meaning +// is assigned to the representation -- it could represent an element in +// Montgomery form, or not. Tracking the meaning of the field element is left +// to higher types. +type Fp751Element [fp751NumWords]uint64 + +// Represents an intermediate product of two elements of the base field F_p. +type fp751X2 [2 * fp751NumWords]uint64 + +// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. +// This function executes in constant time. +//go:noescape +func fp751ConditionalSwap(x, y *Fp751Element, choice uint8) + +// If choice = 0, set z = x. If choice = 1, set z = y. +// This function executes in constant time. +// +// Can overlap z with x or y or both. +//go:noescape +func fp751ConditionalAssign(z, x, y *Fp751Element, choice uint8) + +// Compute z = x + y (mod p). +//go:noescape +func fp751AddReduced(z, x, y *Fp751Element) + +// Compute z = x - y (mod p). +//go:noescape +func fp751SubReduced(z, x, y *Fp751Element) + +// Compute z = x + y, without reducing mod p. +//go:noescape +func fp751AddLazy(z, x, y *Fp751Element) + +// Compute z = x + y, without reducing mod p. +//go:noescape +func fp751X2AddLazy(z, x, y *fp751X2) + +// Compute z = x - y, without reducing mod p. +//go:noescape +func fp751X2SubLazy(z, x, y *fp751X2) + +// Compute z = x * y. +//go:noescape +func fp751Mul(z *fp751X2, x, y *Fp751Element) + +// Perform Montgomery reduction: set z = x R^{-1} (mod p). +// Destroys the input value. +//go:noescape +func fp751MontgomeryReduce(z *Fp751Element, x *fp751X2) + +// Reduce a field element in [0, 2*p) to one in [0,p). +//go:noescape +func fp751StrongReduce(x *Fp751Element) + +func (x Fp751Element) vartimeEq(y Fp751Element) bool { + fp751StrongReduce(&x) + fp751StrongReduce(&y) + eq := true + for i := 0; i < fp751NumWords; i++ { + eq = (x[i] == y[i]) && eq + } + + return eq +} + +// Read an Fp751Element from little-endian bytes and convert to Montgomery form. +// +// The input byte slice must be at least 94 bytes long. +func (x *Fp751Element) montgomeryFormFromBytes(input []byte) { + if len(input) < 94 { + panic("input byte slice too short") + } + + var a Fp751Element + for i := 0; i < 94; i++ { + // set i = j*8 + k + j := i / 8 + k := uint64(i % 8) + a[j] |= uint64(input[i]) << (8 * k) + } + + var aRR fp751X2 + fp751Mul(&aRR, &a, &montgomeryRsq) // = a*R*R + fp751MontgomeryReduce(x, &aRR) // = a*R mod p +} + +// Given an Fp751Element in Montgomery form, convert to little-endian bytes. +// +// The output byte slice must be at least 94 bytes long. +func (x *Fp751Element) toBytesFromMontgomeryForm(output []byte) { + if len(output) < 94 { + panic("output byte slice too short") + } + + var a Fp751Element + var aR fp751X2 + copy(aR[:], x[:]) // = a*R + fp751MontgomeryReduce(&a, &aR) // = a mod p in [0, 2p) + fp751StrongReduce(&a) // = a mod p in [0, p) + + // 8*12 = 96, but we drop the last two bytes since p is 751 < 752=94*8 bits. + for i := 0; i < 94; i++ { + // set i = j*8 + k + j := i / 8 + k := uint64(i % 8) + // Need parens because Go's operator precedence would interpret + // a[j] >> 8*k as (a[j] >> 8) * k + output[i] = byte(a[j] >> (8 * k)) + } +} diff --git a/vendor/github.com/cloudflare/p751sidh/p751toolbox/field_amd64.s b/vendor/github.com/cloudflare/p751sidh/p751toolbox/field_amd64.s new file mode 100644 index 00000000..59940253 --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/p751toolbox/field_amd64.s @@ -0,0 +1,2377 @@ +#include "textflag.h" + +// p751 + 1 +#define P751P1_5 $0xEEB0000000000000 +#define P751P1_6 $0xE3EC968549F878A8 +#define P751P1_7 $0xDA959B1A13F7CC76 +#define P751P1_8 $0x084E9867D6EBE876 +#define P751P1_9 $0x8562B5045CB25748 +#define P751P1_10 $0x0E12909F97BADC66 +#define P751P1_11 $0x00006FE5D541F71C + +#define P751_0 $0xFFFFFFFFFFFFFFFF +#define P751_5 $0xEEAFFFFFFFFFFFFF +#define P751_6 $0xE3EC968549F878A8 +#define P751_7 $0xDA959B1A13F7CC76 +#define P751_8 $0x084E9867D6EBE876 +#define P751_9 $0x8562B5045CB25748 +#define P751_10 $0x0E12909F97BADC66 +#define P751_11 $0x00006FE5D541F71C + +#define P751X2_0 $0xFFFFFFFFFFFFFFFE +#define P751X2_1 $0xFFFFFFFFFFFFFFFF +#define P751X2_5 $0xDD5FFFFFFFFFFFFF +#define P751X2_6 $0xC7D92D0A93F0F151 +#define P751X2_7 $0xB52B363427EF98ED +#define P751X2_8 $0x109D30CFADD7D0ED +#define P751X2_9 $0x0AC56A08B964AE90 +#define P751X2_10 $0x1C25213F2F75B8CD +#define P751X2_11 $0x0000DFCBAA83EE38 + +// The MSR code uses these registers for parameter passing. Keep using +// them to avoid significant code changes. This means that when the Go +// assembler does something strange, we can diff the machine code +// against a different assembler to find out what Go did. + +#define REG_P1 DI +#define REG_P2 SI +#define REG_P3 DX + +// We can't write MOVQ $0, AX because Go's assembler incorrectly +// optimizes this to XOR AX, AX, which clobbers the carry flags. +// +// This bug was defined to be "correct" behaviour (cf. +// https://github.com/golang/go/issues/12405 ) by declaring that the MOV +// pseudo-instruction clobbers flags, although this fact is mentioned +// nowhere in the documentation for the Go assembler. +// +// Defining MOVQ to clobber flags has the effect that it is never safe +// to interleave MOVQ with ADCQ and SBBQ instructions. Since this is +// required to write a carry chain longer than registers' working set, +// all of the below code therefore relies on the unspecified and +// undocumented behaviour that MOV won't clobber flags, except in the +// case of the above-mentioned bug. +// +// However, there's also no specification of which instructions +// correspond to machine instructions, and which are +// pseudo-instructions (i.e., no specification of what the assembler +// actually does), so this doesn't seem much worse than usual. +// +// Avoid the bug by dropping the bytes for `mov eax, 0` in directly: + +#define ZERO_AX_WITHOUT_CLOBBERING_FLAGS BYTE $0xB8; BYTE $0; BYTE $0; BYTE $0; BYTE $0; + +TEXT ·fp751StrongReduce(SB), NOSPLIT, $0-8 + MOVQ x+0(FP), REG_P1 + + // Zero AX for later use: + XORQ AX, AX + + // Load p into registers: + MOVQ P751_0, R8 + // P751_{1,2,3,4} = P751_0, so reuse R8 + MOVQ P751_5, R9 + MOVQ P751_6, R10 + MOVQ P751_7, R11 + MOVQ P751_8, R12 + MOVQ P751_9, R13 + MOVQ P751_10, R14 + MOVQ P751_11, R15 + + // Set x <- x - p + SUBQ R8, (REG_P1) + SBBQ R8, (8)(REG_P1) + SBBQ R8, (16)(REG_P1) + SBBQ R8, (24)(REG_P1) + SBBQ R8, (32)(REG_P1) + SBBQ R9, (40)(REG_P1) + SBBQ R10, (48)(REG_P1) + SBBQ R11, (56)(REG_P1) + SBBQ R12, (64)(REG_P1) + SBBQ R13, (72)(REG_P1) + SBBQ R14, (80)(REG_P1) + SBBQ R15, (88)(REG_P1) + + // Save carry flag indicating x-p < 0 as a mask in AX + SBBQ $0, AX + + // Conditionally add p to x if x-p < 0 + ANDQ AX, R8 + ANDQ AX, R9 + ANDQ AX, R10 + ANDQ AX, R11 + ANDQ AX, R12 + ANDQ AX, R13 + ANDQ AX, R14 + ANDQ AX, R15 + + ADDQ R8, (REG_P1) + ADCQ R8, (8)(REG_P1) + ADCQ R8, (16)(REG_P1) + ADCQ R8, (24)(REG_P1) + ADCQ R8, (32)(REG_P1) + ADCQ R9, (40)(REG_P1) + ADCQ R10, (48)(REG_P1) + ADCQ R11, (56)(REG_P1) + ADCQ R12, (64)(REG_P1) + ADCQ R13, (72)(REG_P1) + ADCQ R14, (80)(REG_P1) + ADCQ R15, (88)(REG_P1) + + RET + +TEXT ·fp751ConditionalSwap(SB), NOSPLIT, $0-17 + + MOVQ x+0(FP), REG_P1 + MOVQ y+8(FP), REG_P2 + MOVB choice+16(FP), AL // AL = 0 or 1 + MOVBLZX AL, AX // AX = 0 or 1 + NEGQ AX // RAX = 0x00..00 or 0xff..ff + + MOVQ (0*8)(REG_P1), BX // BX = x[0] + MOVQ (0*8)(REG_P2), CX // CX = y[0] + MOVQ CX, DX // DX = y[0] + XORQ BX, DX // DX = y[0] ^ x[0] + ANDQ AX, DX // DX = (y[0] ^ x[0]) & mask + XORQ DX, BX // BX = (y[0] ^ x[0]) & mask) ^ x[0] = x[0] or y[0] + XORQ DX, CX // CX = (y[0] ^ x[0]) & mask) ^ y[0] = y[0] or x[0] + MOVQ BX, (0*8)(REG_P1) + MOVQ CX, (0*8)(REG_P2) + + MOVQ (1*8)(REG_P1), BX + MOVQ (1*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (1*8)(REG_P1) + MOVQ CX, (1*8)(REG_P2) + + MOVQ (2*8)(REG_P1), BX + MOVQ (2*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (2*8)(REG_P1) + MOVQ CX, (2*8)(REG_P2) + + MOVQ (3*8)(REG_P1), BX + MOVQ (3*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (3*8)(REG_P1) + MOVQ CX, (3*8)(REG_P2) + + MOVQ (4*8)(REG_P1), BX + MOVQ (4*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (4*8)(REG_P1) + MOVQ CX, (4*8)(REG_P2) + + MOVQ (5*8)(REG_P1), BX + MOVQ (5*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (5*8)(REG_P1) + MOVQ CX, (5*8)(REG_P2) + + MOVQ (6*8)(REG_P1), BX + MOVQ (6*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (6*8)(REG_P1) + MOVQ CX, (6*8)(REG_P2) + + MOVQ (7*8)(REG_P1), BX + MOVQ (7*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (7*8)(REG_P1) + MOVQ CX, (7*8)(REG_P2) + + MOVQ (8*8)(REG_P1), BX + MOVQ (8*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (8*8)(REG_P1) + MOVQ CX, (8*8)(REG_P2) + + MOVQ (9*8)(REG_P1), BX + MOVQ (9*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (9*8)(REG_P1) + MOVQ CX, (9*8)(REG_P2) + + MOVQ (10*8)(REG_P1), BX + MOVQ (10*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (10*8)(REG_P1) + MOVQ CX, (10*8)(REG_P2) + + MOVQ (11*8)(REG_P1), BX + MOVQ (11*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (11*8)(REG_P1) + MOVQ CX, (11*8)(REG_P2) + + RET + +TEXT ·fp751ConditionalAssign(SB), NOSPLIT, $0-25 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + MOVB choice+24(FP), AL // AL = 0 or 1 + MOVBLZX AL, AX // AX = 0 or 1 + NEGQ AX // RAX = 0x00..00 or 0xff..ff + + MOVQ (0*8)(REG_P1), BX // BX = x[0] + MOVQ (0*8)(REG_P2), CX // CX = y[0] + XORQ BX, CX // CX = y[0] ^ x[0] + ANDQ AX, CX // CX = (y[0] ^ x[0]) & mask + XORQ BX, CX // CX = (y[0] ^ x[0]) & mask) ^ x[0] + MOVQ CX, (0*8)(REG_P3) // = x[0] or y[0] + + MOVQ (1*8)(REG_P1), BX + MOVQ (1*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (1*8)(REG_P3) + + MOVQ (2*8)(REG_P1), BX + MOVQ (2*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (2*8)(REG_P3) + + MOVQ (3*8)(REG_P1), BX + MOVQ (3*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (3*8)(REG_P3) + + MOVQ (4*8)(REG_P1), BX + MOVQ (4*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (4*8)(REG_P3) + + MOVQ (5*8)(REG_P1), BX + MOVQ (5*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (5*8)(REG_P3) + + MOVQ (6*8)(REG_P1), BX + MOVQ (6*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (6*8)(REG_P3) + + MOVQ (7*8)(REG_P1), BX + MOVQ (7*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (7*8)(REG_P3) + + MOVQ (8*8)(REG_P1), BX + MOVQ (8*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (8*8)(REG_P3) + + MOVQ (9*8)(REG_P1), BX + MOVQ (9*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (9*8)(REG_P3) + + MOVQ (10*8)(REG_P1), BX + MOVQ (10*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (10*8)(REG_P3) + + MOVQ (11*8)(REG_P1), BX + MOVQ (11*8)(REG_P2), CX + XORQ BX, CX + ANDQ AX, CX + XORQ BX, CX + MOVQ CX, (11*8)(REG_P3) + + RET + +TEXT ·fp751AddReduced(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), CX + ADDQ (REG_P2), R8 + ADCQ (8)(REG_P2), R9 + ADCQ (16)(REG_P2), R10 + ADCQ (24)(REG_P2), R11 + ADCQ (32)(REG_P2), R12 + ADCQ (40)(REG_P2), R13 + ADCQ (48)(REG_P2), R14 + ADCQ (56)(REG_P2), R15 + ADCQ (64)(REG_P2), CX + MOVQ (72)(REG_P1), AX + ADCQ (72)(REG_P2), AX + MOVQ AX, (72)(REG_P3) + MOVQ (80)(REG_P1), AX + ADCQ (80)(REG_P2), AX + MOVQ AX, (80)(REG_P3) + MOVQ (88)(REG_P1), AX + ADCQ (88)(REG_P2), AX + MOVQ AX, (88)(REG_P3) + + MOVQ P751X2_0, AX + SUBQ AX, R8 + MOVQ P751X2_1, AX + SBBQ AX, R9 + SBBQ AX, R10 + SBBQ AX, R11 + SBBQ AX, R12 + MOVQ P751X2_5, AX + SBBQ AX, R13 + MOVQ P751X2_6, AX + SBBQ AX, R14 + MOVQ P751X2_7, AX + SBBQ AX, R15 + MOVQ P751X2_8, AX + SBBQ AX, CX + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ CX, (64)(REG_P3) + MOVQ (72)(REG_P3), R8 + MOVQ (80)(REG_P3), R9 + MOVQ (88)(REG_P3), R10 + MOVQ P751X2_9, AX + SBBQ AX, R8 + MOVQ P751X2_10, AX + SBBQ AX, R9 + MOVQ P751X2_11, AX + SBBQ AX, R10 + MOVQ R8, (72)(REG_P3) + MOVQ R9, (80)(REG_P3) + MOVQ R10, (88)(REG_P3) + ZERO_AX_WITHOUT_CLOBBERING_FLAGS + SBBQ $0, AX + + MOVQ P751X2_0, SI + ANDQ AX, SI + MOVQ P751X2_1, R8 + ANDQ AX, R8 + MOVQ P751X2_5, R9 + ANDQ AX, R9 + MOVQ P751X2_6, R10 + ANDQ AX, R10 + MOVQ P751X2_7, R11 + ANDQ AX, R11 + MOVQ P751X2_8, R12 + ANDQ AX, R12 + MOVQ P751X2_9, R13 + ANDQ AX, R13 + MOVQ P751X2_10, R14 + ANDQ AX, R14 + MOVQ P751X2_11, R15 + ANDQ AX, R15 + + MOVQ (REG_P3), AX + ADDQ SI, AX + MOVQ AX, (REG_P3) + MOVQ (8)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (8)(REG_P3) + MOVQ (16)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (16)(REG_P3) + MOVQ (24)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (24)(REG_P3) + MOVQ (32)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (32)(REG_P3) + MOVQ (40)(REG_P3), AX + ADCQ R9, AX + MOVQ AX, (40)(REG_P3) + MOVQ (48)(REG_P3), AX + ADCQ R10, AX + MOVQ AX, (48)(REG_P3) + MOVQ (56)(REG_P3), AX + ADCQ R11, AX + MOVQ AX, (56)(REG_P3) + MOVQ (64)(REG_P3), AX + ADCQ R12, AX + MOVQ AX, (64)(REG_P3) + MOVQ (72)(REG_P3), AX + ADCQ R13, AX + MOVQ AX, (72)(REG_P3) + MOVQ (80)(REG_P3), AX + ADCQ R14, AX + MOVQ AX, (80)(REG_P3) + MOVQ (88)(REG_P3), AX + ADCQ R15, AX + MOVQ AX, (88)(REG_P3) + + RET + +TEXT ·fp751SubReduced(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), CX + SUBQ (REG_P2), R8 + SBBQ (8)(REG_P2), R9 + SBBQ (16)(REG_P2), R10 + SBBQ (24)(REG_P2), R11 + SBBQ (32)(REG_P2), R12 + SBBQ (40)(REG_P2), R13 + SBBQ (48)(REG_P2), R14 + SBBQ (56)(REG_P2), R15 + SBBQ (64)(REG_P2), CX + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ CX, (64)(REG_P3) + MOVQ (72)(REG_P1), AX + SBBQ (72)(REG_P2), AX + MOVQ AX, (72)(REG_P3) + MOVQ (80)(REG_P1), AX + SBBQ (80)(REG_P2), AX + MOVQ AX, (80)(REG_P3) + MOVQ (88)(REG_P1), AX + SBBQ (88)(REG_P2), AX + MOVQ AX, (88)(REG_P3) + ZERO_AX_WITHOUT_CLOBBERING_FLAGS + SBBQ $0, AX + + MOVQ P751X2_0, SI + ANDQ AX, SI + MOVQ P751X2_1, R8 + ANDQ AX, R8 + MOVQ P751X2_5, R9 + ANDQ AX, R9 + MOVQ P751X2_6, R10 + ANDQ AX, R10 + MOVQ P751X2_7, R11 + ANDQ AX, R11 + MOVQ P751X2_8, R12 + ANDQ AX, R12 + MOVQ P751X2_9, R13 + ANDQ AX, R13 + MOVQ P751X2_10, R14 + ANDQ AX, R14 + MOVQ P751X2_11, R15 + ANDQ AX, R15 + + MOVQ (REG_P3), AX + ADDQ SI, AX + MOVQ AX, (REG_P3) + MOVQ (8)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (8)(REG_P3) + MOVQ (16)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (16)(REG_P3) + MOVQ (24)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (24)(REG_P3) + MOVQ (32)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (32)(REG_P3) + MOVQ (40)(REG_P3), AX + ADCQ R9, AX + MOVQ AX, (40)(REG_P3) + MOVQ (48)(REG_P3), AX + ADCQ R10, AX + MOVQ AX, (48)(REG_P3) + MOVQ (56)(REG_P3), AX + ADCQ R11, AX + MOVQ AX, (56)(REG_P3) + MOVQ (64)(REG_P3), AX + ADCQ R12, AX + MOVQ AX, (64)(REG_P3) + MOVQ (72)(REG_P3), AX + ADCQ R13, AX + MOVQ AX, (72)(REG_P3) + MOVQ (80)(REG_P3), AX + ADCQ R14, AX + MOVQ AX, (80)(REG_P3) + MOVQ (88)(REG_P3), AX + ADCQ R15, AX + MOVQ AX, (88)(REG_P3) + + RET + +TEXT ·fp751Mul(SB), $96-24 + + // Here we store the destination in CX instead of in REG_P3 because the + // multiplication instructions use DX as an implicit destination + // operand: MULQ $REG sets DX:AX <-- AX * $REG. + + MOVQ z+0(FP), CX + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + XORQ AX, AX + MOVQ (48)(REG_P1), R8 + MOVQ (56)(REG_P1), R9 + MOVQ (64)(REG_P1), R10 + MOVQ (72)(REG_P1), R11 + MOVQ (80)(REG_P1), R12 + MOVQ (88)(REG_P1), R13 + ADDQ (REG_P1), R8 + ADCQ (8)(REG_P1), R9 + ADCQ (16)(REG_P1), R10 + ADCQ (24)(REG_P1), R11 + ADCQ (32)(REG_P1), R12 + ADCQ (40)(REG_P1), R13 + MOVQ R8, (CX) + MOVQ R9, (8)(CX) + MOVQ R10, (16)(CX) + MOVQ R11, (24)(CX) + MOVQ R12, (32)(CX) + MOVQ R13, (40)(CX) + SBBQ $0, AX + + XORQ DX, DX + MOVQ (48)(REG_P2), R8 + MOVQ (56)(REG_P2), R9 + MOVQ (64)(REG_P2), R10 + MOVQ (72)(REG_P2), R11 + MOVQ (80)(REG_P2), R12 + MOVQ (88)(REG_P2), R13 + ADDQ (REG_P2), R8 + ADCQ (8)(REG_P2), R9 + ADCQ (16)(REG_P2), R10 + ADCQ (24)(REG_P2), R11 + ADCQ (32)(REG_P2), R12 + ADCQ (40)(REG_P2), R13 + MOVQ R8, (48)(CX) + MOVQ R9, (56)(CX) + MOVQ R10, (64)(CX) + MOVQ R11, (72)(CX) + MOVQ R12, (80)(CX) + MOVQ R13, (88)(CX) + SBBQ $0, DX + MOVQ AX, (80)(SP) + MOVQ DX, (88)(SP) + + // (SP[0-8],R10,R8,R9) <- (AH+AL)*(BH+BL) + + MOVQ (CX), R11 + MOVQ R8, AX + MULQ R11 + MOVQ AX, (SP) // c0 + MOVQ DX, R14 + + XORQ R15, R15 + MOVQ R9, AX + MULQ R11 + XORQ R9, R9 + ADDQ AX, R14 + ADCQ DX, R9 + + MOVQ (8)(CX), R12 + MOVQ R8, AX + MULQ R12 + ADDQ AX, R14 + MOVQ R14, (8)(SP) // c1 + ADCQ DX, R9 + ADCQ $0, R15 + + XORQ R8, R8 + MOVQ R10, AX + MULQ R11 + ADDQ AX, R9 + MOVQ (48)(CX), R13 + ADCQ DX, R15 + ADCQ $0, R8 + + MOVQ (16)(CX), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R15 + MOVQ (56)(CX), AX + ADCQ $0, R8 + + MULQ R12 + ADDQ AX, R9 + MOVQ R9, (16)(SP) // c2 + ADCQ DX, R15 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (72)(CX), AX + MULQ R11 + ADDQ AX, R15 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (24)(CX), AX + MULQ R13 + ADDQ AX, R15 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ R10, AX + MULQ R12 + ADDQ AX, R15 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (16)(CX), R14 + MOVQ (56)(CX), AX + MULQ R14 + ADDQ AX, R15 + MOVQ R15, (24)(SP) // c3 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (80)(CX), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (64)(CX), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (48)(CX), R15 + MOVQ (32)(CX), AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(CX), AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (24)(CX), R13 + MOVQ (56)(CX), AX + MULQ R13 + ADDQ AX, R8 + MOVQ R8, (32)(SP) // c4 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (88)(CX), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (64)(CX), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (72)(CX), AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (40)(CX), AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (80)(CX), AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (32)(CX), R15 + MOVQ (56)(CX), AX + MULQ R15 + ADDQ AX, R9 + MOVQ R9, (40)(SP) // c5 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (64)(CX), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (88)(CX), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (80)(CX), AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (40)(CX), R11 + MOVQ (56)(CX), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (72)(CX), AX + MULQ R13 + ADDQ AX, R10 + MOVQ R10, (48)(SP) // c6 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (88)(CX), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (64)(CX), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (80)(CX), AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(CX), AX + MULQ R15 + ADDQ AX, R8 + MOVQ R8, (56)(SP) // c7 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (72)(CX), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (80)(CX), AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(CX), AX + MULQ R13 + ADDQ AX, R9 + MOVQ R9, (64)(SP) // c8 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (88)(CX), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (80)(CX), AX + MULQ R11 + ADDQ AX, R10 // c9 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (88)(CX), AX + MULQ R11 + ADDQ AX, R8 // c10 + ADCQ DX, R9 // c11 + + MOVQ (88)(SP), AX + MOVQ (CX), DX + ANDQ AX, R12 + ANDQ AX, R14 + ANDQ AX, DX + ANDQ AX, R13 + ANDQ AX, R15 + ANDQ AX, R11 + MOVQ (48)(SP), AX + ADDQ AX, DX + MOVQ (56)(SP), AX + ADCQ AX, R12 + MOVQ (64)(SP), AX + ADCQ AX, R14 + ADCQ R10, R13 + ADCQ R8, R15 + ADCQ R9, R11 + MOVQ (80)(SP), AX + MOVQ DX, (48)(SP) + MOVQ R12, (56)(SP) + MOVQ R14, (64)(SP) + MOVQ R13, (72)(SP) + MOVQ R15, (80)(SP) + MOVQ R11, (88)(SP) + + MOVQ (48)(CX), R8 + MOVQ (56)(CX), R9 + MOVQ (64)(CX), R10 + MOVQ (72)(CX), R11 + MOVQ (80)(CX), R12 + MOVQ (88)(CX), R13 + ANDQ AX, R8 + ANDQ AX, R9 + ANDQ AX, R10 + ANDQ AX, R11 + ANDQ AX, R12 + ANDQ AX, R13 + MOVQ (48)(SP), AX + ADDQ AX, R8 + MOVQ (56)(SP), AX + ADCQ AX, R9 + MOVQ (64)(SP), AX + ADCQ AX, R10 + MOVQ (72)(SP), AX + ADCQ AX, R11 + MOVQ (80)(SP), AX + ADCQ AX, R12 + MOVQ (88)(SP), AX + ADCQ AX, R13 + MOVQ R8, (48)(SP) + MOVQ R9, (56)(SP) + MOVQ R11, (72)(SP) + + // CX[0-11] <- AL*BL + MOVQ (REG_P1), R11 + MOVQ (REG_P2), AX + MULQ R11 + XORQ R9, R9 + MOVQ AX, (CX) // c0 + MOVQ R10, (64)(SP) + MOVQ DX, R8 + + MOVQ (8)(REG_P2), AX + MULQ R11 + XORQ R10, R10 + ADDQ AX, R8 + MOVQ R12, (80)(SP) + ADCQ DX, R9 + + MOVQ (8)(REG_P1), R12 + MOVQ (REG_P2), AX + MULQ R12 + ADDQ AX, R8 + MOVQ R8, (8)(CX) // c1 + ADCQ DX, R9 + MOVQ R13, (88)(SP) + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (16)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (REG_P2), R13 + MOVQ (16)(REG_P1), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (8)(REG_P2), AX + MULQ R12 + ADDQ AX, R9 + MOVQ R9, (16)(CX) // c2 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (24)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (24)(REG_P1), AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (16)(REG_P2), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (16)(REG_P1), R14 + MOVQ (8)(REG_P2), AX + MULQ R14 + ADDQ AX, R10 + MOVQ R10, (24)(CX) // c3 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (32)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (16)(REG_P2), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (32)(REG_P1), AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (24)(REG_P2), AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (24)(REG_P1), R13 + MOVQ (8)(REG_P2), AX + MULQ R13 + ADDQ AX, R8 + MOVQ R8, (32)(CX) // c4 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (40)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (16)(REG_P2), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (24)(REG_P2), AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (40)(REG_P1), R11 + MOVQ (REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (32)(REG_P2), AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (32)(REG_P1), R15 + MOVQ (8)(REG_P2), AX + MULQ R15 + ADDQ AX, R9 + MOVQ R9, (40)(CX) //c5 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (16)(REG_P2), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (40)(REG_P2), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (32)(REG_P2), AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (8)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (24)(REG_P2), AX + MULQ R13 + ADDQ AX, R10 + MOVQ R10, (48)(CX) // c6 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (40)(REG_P2), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (16)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (32)(REG_P2), AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (24)(REG_P2), AX + MULQ R15 + ADDQ AX, R8 + MOVQ R8, (56)(CX) // c7 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (24)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (32)(REG_P2), AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (40)(REG_P2), AX + MULQ R13 + ADDQ AX, R9 + MOVQ R9, (64)(CX) // c8 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (40)(REG_P2), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (32)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + MOVQ R10, (72)(CX) // c9 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (40)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + MOVQ R8, (80)(CX) // c10 + ADCQ DX, R9 + MOVQ R9, (88)(CX) // c11 + + // CX[12-23] <- AH*BH + MOVQ (48)(REG_P1), R11 + MOVQ (48)(REG_P2), AX + MULQ R11 + XORQ R9, R9 + MOVQ AX, (96)(CX) // c0 + MOVQ DX, R8 + + MOVQ (56)(REG_P2), AX + MULQ R11 + XORQ R10, R10 + ADDQ AX, R8 + ADCQ DX, R9 + + MOVQ (56)(REG_P1), R12 + MOVQ (48)(REG_P2), AX + MULQ R12 + ADDQ AX, R8 + MOVQ R8, (104)(CX) // c1 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (64)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (48)(REG_P2), R13 + MOVQ (64)(REG_P1), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (56)(REG_P2), AX + MULQ R12 + ADDQ AX, R9 + MOVQ R9, (112)(CX) // c2 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (72)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (72)(REG_P1), AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (64)(REG_P2), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (64)(REG_P1), R14 + MOVQ (56)(REG_P2), AX + MULQ R14 + ADDQ AX, R10 + MOVQ R10, (120)(CX) // c3 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (80)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (64)(REG_P2), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (80)(REG_P1), R15 + MOVQ R13, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(REG_P2), AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(REG_P1), R13 + MOVQ (56)(REG_P2), AX + MULQ R13 + ADDQ AX, R8 + MOVQ R8, (128)(CX) // c4 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (88)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (64)(REG_P2), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (72)(REG_P2), AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(REG_P1), R11 + MOVQ (48)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (80)(REG_P2), AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (56)(REG_P2), AX + MULQ R15 + ADDQ AX, R9 + MOVQ R9, (136)(CX) // c5 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (64)(REG_P2), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (88)(REG_P2), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (80)(REG_P2), AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (56)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (72)(REG_P2), AX + MULQ R13 + ADDQ AX, R10 + MOVQ R10, (144)(CX) // c6 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (88)(REG_P2), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (64)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (80)(REG_P2), AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(REG_P2), AX + MULQ R15 + ADDQ AX, R8 + MOVQ R8, (152)(CX) // c7 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (72)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (80)(REG_P2), AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(REG_P2), AX + MULQ R13 + ADDQ AX, R9 + MOVQ R9, (160)(CX) // c8 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(REG_P2), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + + MOVQ (80)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + MOVQ R10, (168)(CX) // c9 + ADCQ DX, R8 + + MOVQ (88)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + MOVQ R8, (176)(CX) // c10 + ADCQ $0, DX + MOVQ DX, (184)(CX) // c11 + + // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL)-AL*BL + MOVQ (SP), R8 + SUBQ (CX), R8 + MOVQ (8)(SP), R9 + SBBQ (8)(CX), R9 + MOVQ (16)(SP), R10 + SBBQ (16)(CX), R10 + MOVQ (24)(SP), R11 + SBBQ (24)(CX), R11 + MOVQ (32)(SP), R12 + SBBQ (32)(CX), R12 + MOVQ (40)(SP), R13 + SBBQ (40)(CX), R13 + MOVQ (48)(SP), R14 + SBBQ (48)(CX), R14 + MOVQ (56)(SP), R15 + SBBQ (56)(CX), R15 + MOVQ (64)(SP), AX + SBBQ (64)(CX), AX + MOVQ (72)(SP), DX + SBBQ (72)(CX), DX + MOVQ (80)(SP), DI + SBBQ (80)(CX), DI + MOVQ (88)(SP), SI + SBBQ (88)(CX), SI + MOVQ SI, (SP) + + // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH + MOVQ (96)(CX), SI + SUBQ SI, R8 + MOVQ (104)(CX), SI + SBBQ SI, R9 + MOVQ (112)(CX), SI + SBBQ SI, R10 + MOVQ (120)(CX), SI + SBBQ SI, R11 + MOVQ (128)(CX), SI + SBBQ SI, R12 + MOVQ (136)(CX), SI + SBBQ SI, R13 + MOVQ (144)(CX), SI + SBBQ SI, R14 + MOVQ (152)(CX), SI + SBBQ SI, R15 + MOVQ (160)(CX), SI + SBBQ SI, AX + MOVQ (168)(CX), SI + SBBQ SI, DX + MOVQ (176)(CX), SI + SBBQ SI, DI + MOVQ (SP), SI + SBBQ (184)(CX), SI + + // FINAL RESULT + ADDQ (48)(CX), R8 + MOVQ R8, (48)(CX) + ADCQ (56)(CX), R9 + MOVQ R9, (56)(CX) + ADCQ (64)(CX), R10 + MOVQ R10, (64)(CX) + ADCQ (72)(CX), R11 + MOVQ R11, (72)(CX) + ADCQ (80)(CX), R12 + MOVQ R12, (80)(CX) + ADCQ (88)(CX), R13 + MOVQ R13, (88)(CX) + ADCQ (96)(CX), R14 + MOVQ R14, (96)(CX) + ADCQ (104)(CX), R15 + MOVQ R15, (104)(CX) + ADCQ (112)(CX), AX + MOVQ AX, (112)(CX) + ADCQ (120)(CX), DX + MOVQ DX, (120)(CX) + ADCQ (128)(CX), DI + MOVQ DI, (128)(CX) + ADCQ (136)(CX), SI + MOVQ SI, (136)(CX) + MOVQ (144)(CX), AX + ADCQ $0, AX + MOVQ AX, (144)(CX) + MOVQ (152)(CX), AX + ADCQ $0, AX + MOVQ AX, (152)(CX) + MOVQ (160)(CX), AX + ADCQ $0, AX + MOVQ AX, (160)(CX) + MOVQ (168)(CX), AX + ADCQ $0, AX + MOVQ AX, (168)(CX) + MOVQ (176)(CX), AX + ADCQ $0, AX + MOVQ AX, (176)(CX) + MOVQ (184)(CX), AX + ADCQ $0, AX + MOVQ AX, (184)(CX) + + RET + +TEXT ·fp751MontgomeryReduce(SB), $0-16 + + MOVQ z+0(FP), REG_P2 + MOVQ x+8(FP), REG_P1 + + MOVQ (REG_P1), R11 + MOVQ P751P1_5, AX + MULQ R11 + XORQ R8, R8 + ADDQ (40)(REG_P1), AX + MOVQ AX, (40)(REG_P2) // Z5 + ADCQ DX, R8 + + XORQ R9, R9 + MOVQ P751P1_6, AX + MULQ R11 + XORQ R10, R10 + ADDQ AX, R8 + ADCQ DX, R9 + + MOVQ (8)(REG_P1), R12 + MOVQ P751P1_5, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (48)(REG_P1), R8 + MOVQ R8, (48)(REG_P2) // Z6 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_7, AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_6, AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (16)(REG_P1), R13 + MOVQ P751P1_5, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (56)(REG_P1), R9 + MOVQ R9, (56)(REG_P2) // Z7 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_8, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_7, AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_6, AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (24)(REG_P1), R14 + MOVQ P751P1_5, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (64)(REG_P1), R10 + MOVQ R10, (64)(REG_P2) // Z8 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_9, AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_8, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_7, AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_6, AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (32)(REG_P1), R15 + MOVQ P751P1_5, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (72)(REG_P1), R8 + MOVQ R8, (72)(REG_P2) // Z9 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_10, AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_9, AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_8, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_7, AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_6, AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (40)(REG_P2), CX + MOVQ P751P1_5, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (80)(REG_P1), R9 + MOVQ R9, (80)(REG_P2) // Z10 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_11, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_10, AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_9, AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_8, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_7, AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_6, AX + MULQ CX + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (48)(REG_P2), R11 + MOVQ P751P1_5, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (88)(REG_P1), R10 + MOVQ R10, (88)(REG_P2) // Z11 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_11, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_10, AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_9, AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_8, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_7, AX + MULQ CX + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_6, AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (56)(REG_P2), R12 + MOVQ P751P1_5, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (96)(REG_P1), R8 + MOVQ R8, (REG_P2) // Z0 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_11, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_10, AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_9, AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_8, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_7, AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_6, AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (64)(REG_P2), R13 + MOVQ P751P1_5, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (104)(REG_P1), R9 + MOVQ R9, (8)(REG_P2) // Z1 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_11, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_10, AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_9, AX + MULQ CX + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_8, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_7, AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_6, AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (72)(REG_P2), R14 + MOVQ P751P1_5, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (112)(REG_P1), R10 + MOVQ R10, (16)(REG_P2) // Z2 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_11, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_10, AX + MULQ CX + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_9, AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_8, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_7, AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_6, AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (80)(REG_P2), R15 + MOVQ P751P1_5, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (120)(REG_P1), R8 + MOVQ R8, (24)(REG_P2) // Z3 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_11, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_10, AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_9, AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_8, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_7, AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_6, AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(REG_P2), CX + MOVQ P751P1_5, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (128)(REG_P1), R9 + MOVQ R9, (32)(REG_P2) // Z4 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_11, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_10, AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_9, AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_8, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_7, AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_6, AX + MULQ CX + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (136)(REG_P1), R10 + MOVQ R10, (40)(REG_P2) // Z5 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_11, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_10, AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_9, AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_8, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_7, AX + MULQ CX + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (144)(REG_P1), R8 + MOVQ R8, (48)(REG_P2) // Z6 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_11, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_10, AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_9, AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_8, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (152)(REG_P1), R9 + MOVQ R9, (56)(REG_P2) // Z7 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_11, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_10, AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_9, AX + MULQ CX + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (160)(REG_P1), R10 + MOVQ R10, (64)(REG_P2) // Z8 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_11, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_10, AX + MULQ CX + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (168)(REG_P1), R8 // Z9 + MOVQ R8, (72)(REG_P2) // Z9 + ADCQ $0, R9 + ADCQ $0, R10 + + MOVQ P751P1_11, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADDQ (176)(REG_P1), R9 // Z10 + MOVQ R9, (80)(REG_P2) // Z10 + ADCQ $0, R10 + ADDQ (184)(REG_P1), R10 // Z11 + MOVQ R10, (88)(REG_P2) // Z11 + + RET + +TEXT ·fp751AddLazy(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), AX + MOVQ (72)(REG_P1), BX + MOVQ (80)(REG_P1), CX + MOVQ (88)(REG_P1), DI + + ADDQ (REG_P2), R8 + ADCQ (8)(REG_P2), R9 + ADCQ (16)(REG_P2), R10 + ADCQ (24)(REG_P2), R11 + ADCQ (32)(REG_P2), R12 + ADCQ (40)(REG_P2), R13 + ADCQ (48)(REG_P2), R14 + ADCQ (56)(REG_P2), R15 + ADCQ (64)(REG_P2), AX + ADCQ (72)(REG_P2), BX + ADCQ (80)(REG_P2), CX + ADCQ (88)(REG_P2), DI + + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ AX, (64)(REG_P3) + MOVQ BX, (72)(REG_P3) + MOVQ CX, (80)(REG_P3) + MOVQ DI, (88)(REG_P3) + + RET + +TEXT ·fp751X2AddLazy(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), AX + MOVQ (72)(REG_P1), BX + MOVQ (80)(REG_P1), CX + + ADDQ (REG_P2), R8 + ADCQ (8)(REG_P2), R9 + ADCQ (16)(REG_P2), R10 + ADCQ (24)(REG_P2), R11 + ADCQ (32)(REG_P2), R12 + ADCQ (40)(REG_P2), R13 + ADCQ (48)(REG_P2), R14 + ADCQ (56)(REG_P2), R15 + ADCQ (64)(REG_P2), AX + ADCQ (72)(REG_P2), BX + ADCQ (80)(REG_P2), CX + + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ AX, (64)(REG_P3) + MOVQ BX, (72)(REG_P3) + MOVQ CX, (80)(REG_P3) + MOVQ (88)(REG_P1), AX + ADCQ (88)(REG_P2), AX + MOVQ AX, (88)(REG_P3) + + MOVQ (96)(REG_P1), R8 + MOVQ (104)(REG_P1), R9 + MOVQ (112)(REG_P1), R10 + MOVQ (120)(REG_P1), R11 + MOVQ (128)(REG_P1), R12 + MOVQ (136)(REG_P1), R13 + MOVQ (144)(REG_P1), R14 + MOVQ (152)(REG_P1), R15 + MOVQ (160)(REG_P1), AX + MOVQ (168)(REG_P1), BX + MOVQ (176)(REG_P1), CX + MOVQ (184)(REG_P1), DI + + ADCQ (96)(REG_P2), R8 + ADCQ (104)(REG_P2), R9 + ADCQ (112)(REG_P2), R10 + ADCQ (120)(REG_P2), R11 + ADCQ (128)(REG_P2), R12 + ADCQ (136)(REG_P2), R13 + ADCQ (144)(REG_P2), R14 + ADCQ (152)(REG_P2), R15 + ADCQ (160)(REG_P2), AX + ADCQ (168)(REG_P2), BX + ADCQ (176)(REG_P2), CX + ADCQ (184)(REG_P2), DI + + MOVQ R8, (96)(REG_P3) + MOVQ R9, (104)(REG_P3) + MOVQ R10, (112)(REG_P3) + MOVQ R11, (120)(REG_P3) + MOVQ R12, (128)(REG_P3) + MOVQ R13, (136)(REG_P3) + MOVQ R14, (144)(REG_P3) + MOVQ R15, (152)(REG_P3) + MOVQ AX, (160)(REG_P3) + MOVQ BX, (168)(REG_P3) + MOVQ CX, (176)(REG_P3) + MOVQ DI, (184)(REG_P3) + + RET + + +TEXT ·fp751X2SubLazy(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), AX + MOVQ (72)(REG_P1), BX + MOVQ (80)(REG_P1), CX + + SUBQ (REG_P2), R8 + SBBQ (8)(REG_P2), R9 + SBBQ (16)(REG_P2), R10 + SBBQ (24)(REG_P2), R11 + SBBQ (32)(REG_P2), R12 + SBBQ (40)(REG_P2), R13 + SBBQ (48)(REG_P2), R14 + SBBQ (56)(REG_P2), R15 + SBBQ (64)(REG_P2), AX + SBBQ (72)(REG_P2), BX + SBBQ (80)(REG_P2), CX + + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ AX, (64)(REG_P3) + MOVQ BX, (72)(REG_P3) + MOVQ CX, (80)(REG_P3) + MOVQ (88)(REG_P1), AX + SBBQ (88)(REG_P2), AX + MOVQ AX, (88)(REG_P3) + + MOVQ (96)(REG_P1), R8 + MOVQ (104)(REG_P1), R9 + MOVQ (112)(REG_P1), R10 + MOVQ (120)(REG_P1), R11 + MOVQ (128)(REG_P1), R12 + MOVQ (136)(REG_P1), R13 + MOVQ (144)(REG_P1), R14 + MOVQ (152)(REG_P1), R15 + MOVQ (160)(REG_P1), AX + MOVQ (168)(REG_P1), BX + MOVQ (176)(REG_P1), CX + MOVQ (184)(REG_P1), DI + + SBBQ (96)(REG_P2), R8 + SBBQ (104)(REG_P2), R9 + SBBQ (112)(REG_P2), R10 + SBBQ (120)(REG_P2), R11 + SBBQ (128)(REG_P2), R12 + SBBQ (136)(REG_P2), R13 + SBBQ (144)(REG_P2), R14 + SBBQ (152)(REG_P2), R15 + SBBQ (160)(REG_P2), AX + SBBQ (168)(REG_P2), BX + SBBQ (176)(REG_P2), CX + SBBQ (184)(REG_P2), DI + + MOVQ R8, (96)(REG_P3) + MOVQ R9, (104)(REG_P3) + MOVQ R10, (112)(REG_P3) + MOVQ R11, (120)(REG_P3) + MOVQ R12, (128)(REG_P3) + MOVQ R13, (136)(REG_P3) + MOVQ R14, (144)(REG_P3) + MOVQ R15, (152)(REG_P3) + MOVQ AX, (160)(REG_P3) + MOVQ BX, (168)(REG_P3) + MOVQ CX, (176)(REG_P3) + MOVQ DI, (184)(REG_P3) + + // Now the carry flag is 1 if x-y < 0. If so, add p*2^768. + ZERO_AX_WITHOUT_CLOBBERING_FLAGS + SBBQ $0, AX + + // Load p into registers: + MOVQ P751_0, R8 + // P751_{1,2,3,4} = P751_0, so reuse R8 + MOVQ P751_5, R9 + MOVQ P751_6, R10 + MOVQ P751_7, R11 + MOVQ P751_8, R12 + MOVQ P751_9, R13 + MOVQ P751_10, R14 + MOVQ P751_11, R15 + + ANDQ AX, R8 + ANDQ AX, R9 + ANDQ AX, R10 + ANDQ AX, R11 + ANDQ AX, R12 + ANDQ AX, R13 + ANDQ AX, R14 + ANDQ AX, R15 + + ADDQ R8, (96 )(REG_P3) + ADCQ R8, (96+ 8)(REG_P3) + ADCQ R8, (96+16)(REG_P3) + ADCQ R8, (96+24)(REG_P3) + ADCQ R8, (96+32)(REG_P3) + ADCQ R9, (96+40)(REG_P3) + ADCQ R10, (96+48)(REG_P3) + ADCQ R11, (96+56)(REG_P3) + ADCQ R12, (96+64)(REG_P3) + ADCQ R13, (96+72)(REG_P3) + ADCQ R14, (96+80)(REG_P3) + ADCQ R15, (96+88)(REG_P3) + + RET + diff --git a/vendor/github.com/cloudflare/p751sidh/p751toolbox/field_test.go b/vendor/github.com/cloudflare/p751sidh/p751toolbox/field_test.go new file mode 100644 index 00000000..9742dab2 --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/p751toolbox/field_test.go @@ -0,0 +1,545 @@ +package p751toolbox + +import ( + "math/big" + "math/rand" + "reflect" + "testing" + "testing/quick" +) + +var quickCheckScaleFactor = uint8(3) +var quickCheckConfig = &quick.Config{MaxCount: (1 << (12 + quickCheckScaleFactor))} + +var cln16prime, _ = new(big.Int).SetString("10354717741769305252977768237866805321427389645549071170116189679054678940682478846502882896561066713624553211618840202385203911976522554393044160468771151816976706840078913334358399730952774926980235086850991501872665651576831", 10) + +// Convert an Fp751Element to a big.Int for testing. Because this is only +// for testing, no big.Int to Fp751Element conversion is provided. + +func radix64ToBigInt(x []uint64) *big.Int { + radix := new(big.Int) + // 2^64 + radix.UnmarshalText(([]byte)("18446744073709551616")) + + base := new(big.Int).SetUint64(1) + val := new(big.Int).SetUint64(0) + tmp := new(big.Int) + + for _, xi := range x { + tmp.SetUint64(xi) + tmp.Mul(tmp, base) + val.Add(val, tmp) + base.Mul(base, radix) + } + + return val +} + +func (x *PrimeFieldElement) toBigInt() *big.Int { + // Convert from Montgomery form + return x.A.toBigIntFromMontgomeryForm() +} + +func (x *Fp751Element) toBigIntFromMontgomeryForm() *big.Int { + // Convert from Montgomery form + a := Fp751Element{} + aR := fp751X2{} + copy(aR[:], x[:]) // = a*R + fp751MontgomeryReduce(&a, &aR) // = a mod p in [0,2p) + fp751StrongReduce(&a) // = a mod p in [0,p) + return radix64ToBigInt(a[:]) +} + +func TestPrimeFieldElementToBigInt(t *testing.T) { + // Chosen so that p < xR < 2p + x := PrimeFieldElement{A: Fp751Element{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 140737488355328, + }} + // Computed using Sage: + // sage: p = 2^372 * 3^239 - 1 + // sage: R = 2^768 + // sage: from_radix_64 = lambda xs: sum((xi * (2**64)**i for i,xi in enumerate(xs))) + // sage: xR = from_radix_64([1]*11 + [2^47]) + // sage: assert(p < xR) + // sage: assert(xR < 2*p) + // sage: (xR / R) % p + xBig, _ := new(big.Int).SetString("4469946751055876387821312289373600189787971305258234719850789711074696941114031433609871105823930699680637820852699269802003300352597419024286385747737509380032982821081644521634652750355306547718505685107272222083450567982240", 10) + if xBig.Cmp(x.toBigInt()) != 0 { + t.Error("Expected", xBig, "found", x.toBigInt()) + } +} + +func generateFp751(rand *rand.Rand) Fp751Element { + // Generation strategy: low limbs taken from [0,2^64); high limb + // taken from smaller range + // + // Size hint is ignored since all elements are fixed size. + // + // Field elements taken in range [0,2p). Emulate this by capping + // the high limb by the top digit of 2*p-1: + // + // sage: (2*p-1).digits(2^64)[-1] + // 246065832128056 + // + // This still allows generating values >= 2p, but hopefully that + // excess is OK (and if it's not, we'll find out, because it's for + // testing...) + // + highLimb := rand.Uint64() % 246065832128056 + + return Fp751Element{ + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + highLimb, + } +} + +func (x PrimeFieldElement) Generate(rand *rand.Rand, size int) reflect.Value { + return reflect.ValueOf(PrimeFieldElement{A: generateFp751(rand)}) +} + +func (x ExtensionFieldElement) Generate(rand *rand.Rand, size int) reflect.Value { + return reflect.ValueOf(ExtensionFieldElement{A: generateFp751(rand), B: generateFp751(rand)}) +} + +//------------------------------------------------------------------------------ +// Extension Field +//------------------------------------------------------------------------------ + +func TestOneExtensionFieldToBytes(t *testing.T) { + var x ExtensionFieldElement + var xBytes [188]byte + + x.One() + x.ToBytes(xBytes[:]) + + if xBytes[0] != 1 { + t.Error("Expected 1, got", xBytes[0]) + } + for i := 1; i < 188; i++ { + if xBytes[i] != 0 { + t.Error("Expected 0, got", xBytes[0]) + } + } +} + +func TestExtensionFieldElementToBytesRoundTrip(t *testing.T) { + roundTrips := func(x ExtensionFieldElement) bool { + var xBytes [188]byte + var xPrime ExtensionFieldElement + x.ToBytes(xBytes[:]) + xPrime.FromBytes(xBytes[:]) + + return x.VartimeEq(&xPrime) + } + + if err := quick.Check(roundTrips, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementMulDistributesOverAdd(t *testing.T) { + mulDistributesOverAdd := func(x, y, z ExtensionFieldElement) bool { + // Compute t1 = (x+y)*z + t1 := new(ExtensionFieldElement) + t1.Add(&x, &y) + t1.Mul(t1, &z) + + // Compute t2 = x*z + y*z + t2 := new(ExtensionFieldElement) + t3 := new(ExtensionFieldElement) + t2.Mul(&x, &z) + t3.Mul(&y, &z) + t2.Add(t2, t3) + + return t1.VartimeEq(t2) + } + + if err := quick.Check(mulDistributesOverAdd, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementMulIsAssociative(t *testing.T) { + isAssociative := func(x, y, z ExtensionFieldElement) bool { + // Compute t1 = (x*y)*z + t1 := new(ExtensionFieldElement) + t1.Mul(&x, &y) + t1.Mul(t1, &z) + + // Compute t2 = (y*z)*x + t2 := new(ExtensionFieldElement) + t2.Mul(&y, &z) + t2.Mul(t2, &x) + + return t1.VartimeEq(t2) + } + + if err := quick.Check(isAssociative, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementSquareMatchesMul(t *testing.T) { + sqrMatchesMul := func(x ExtensionFieldElement) bool { + // Compute t1 = (x*x) + t1 := new(ExtensionFieldElement) + t1.Mul(&x, &x) + + // Compute t2 = x^2 + t2 := new(ExtensionFieldElement) + t2.Square(&x) + + return t1.VartimeEq(t2) + } + + if err := quick.Check(sqrMatchesMul, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementInv(t *testing.T) { + inverseIsCorrect := func(x ExtensionFieldElement) bool { + z := new(ExtensionFieldElement) + z.Inv(&x) + + // Now z = (1/x), so (z * x) * x == x + z.Mul(z, &x) + z.Mul(z, &x) + + return z.VartimeEq(&x) + } + + // This is more expensive; run fewer tests + var quickCheckConfig = &quick.Config{MaxCount: (1 << (8 + quickCheckScaleFactor))} + if err := quick.Check(inverseIsCorrect, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementBatch3Inv(t *testing.T) { + batchInverseIsCorrect := func(x1, x2, x3 ExtensionFieldElement) bool { + var x1Inv, x2Inv, x3Inv ExtensionFieldElement + x1Inv.Inv(&x1) + x2Inv.Inv(&x2) + x3Inv.Inv(&x3) + + var y1, y2, y3 ExtensionFieldElement + ExtensionFieldBatch3Inv(&x1, &x2, &x3, &y1, &y2, &y3) + + return (y1.VartimeEq(&x1Inv) && y2.VartimeEq(&x2Inv) && y3.VartimeEq(&x3Inv)) + } + + // This is more expensive; run fewer tests + var quickCheckConfig = &quick.Config{MaxCount: (1 << (5 + quickCheckScaleFactor))} + if err := quick.Check(batchInverseIsCorrect, quickCheckConfig); err != nil { + t.Error(err) + } +} + +//------------------------------------------------------------------------------ +// Prime Field +//------------------------------------------------------------------------------ + +func TestPrimeFieldElementSetUint64VersusBigInt(t *testing.T) { + setUint64RoundTrips := func(x uint64) bool { + z := new(PrimeFieldElement).SetUint64(x).toBigInt().Uint64() + return x == z + } + + if err := quick.Check(setUint64RoundTrips, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestPrimeFieldElementAddVersusBigInt(t *testing.T) { + addMatchesBigInt := func(x, y PrimeFieldElement) bool { + z := new(PrimeFieldElement) + z.Add(&x, &y) + + check := new(big.Int) + check.Add(x.toBigInt(), y.toBigInt()) + check.Mod(check, cln16prime) + + return check.Cmp(z.toBigInt()) == 0 + } + + if err := quick.Check(addMatchesBigInt, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestPrimeFieldElementSubVersusBigInt(t *testing.T) { + subMatchesBigInt := func(x, y PrimeFieldElement) bool { + z := new(PrimeFieldElement) + z.Sub(&x, &y) + + check := new(big.Int) + check.Sub(x.toBigInt(), y.toBigInt()) + check.Mod(check, cln16prime) + + return check.Cmp(z.toBigInt()) == 0 + } + + if err := quick.Check(subMatchesBigInt, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestPrimeFieldElementInv(t *testing.T) { + inverseIsCorrect := func(x PrimeFieldElement) bool { + z := new(PrimeFieldElement) + z.Inv(&x) + + // Now z = (1/x), so (z * x) * x == x + z.Mul(z, &x).Mul(z, &x) + + return z.VartimeEq(&x) + } + + // This is more expensive; run fewer tests + var quickCheckConfig = &quick.Config{MaxCount: (1 << (8 + quickCheckScaleFactor))} + if err := quick.Check(inverseIsCorrect, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestPrimeFieldElementSqrt(t *testing.T) { + inverseIsCorrect := func(x PrimeFieldElement) bool { + // Construct y = x^2 so we're sure y is square. + y := new(PrimeFieldElement) + y.Square(&x) + + z := new(PrimeFieldElement) + z.Sqrt(y) + + // Now z = sqrt(y), so z^2 == y + z.Square(z) + return z.VartimeEq(y) + } + + // This is more expensive; run fewer tests + var quickCheckConfig = &quick.Config{MaxCount: (1 << (8 + quickCheckScaleFactor))} + if err := quick.Check(inverseIsCorrect, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestPrimeFieldElementMulVersusBigInt(t *testing.T) { + mulMatchesBigInt := func(x, y PrimeFieldElement) bool { + z := new(PrimeFieldElement) + z.Mul(&x, &y) + + check := new(big.Int) + check.Mul(x.toBigInt(), y.toBigInt()) + check.Mod(check, cln16prime) + + return check.Cmp(z.toBigInt()) == 0 + } + + if err := quick.Check(mulMatchesBigInt, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestPrimeFieldElementP34VersusBigInt(t *testing.T) { + var p34, _ = new(big.Int).SetString("2588679435442326313244442059466701330356847411387267792529047419763669735170619711625720724140266678406138302904710050596300977994130638598261040117192787954244176710019728333589599932738193731745058771712747875468166412894207", 10) + p34MatchesBigInt := func(x PrimeFieldElement) bool { + z := new(PrimeFieldElement) + z.P34(&x) + + check := x.toBigInt() + check.Exp(check, p34, cln16prime) + + return check.Cmp(z.toBigInt()) == 0 + } + + // This is more expensive; run fewer tests + var quickCheckConfig = &quick.Config{MaxCount: (1 << (8 + quickCheckScaleFactor))} + if err := quick.Check(p34MatchesBigInt, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestFp751ElementConditionalSwap(t *testing.T) { + var one = Fp751Element{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} + var two = Fp751Element{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2} + + var x = one + var y = two + + fp751ConditionalSwap(&x, &y, 0) + + if !(x == one && y == two) { + t.Error("Found", x, "expected", one) + } + + fp751ConditionalSwap(&x, &y, 1) + + if !(x == two && y == one) { + t.Error("Found", x, "expected", two) + } +} + +func TestFp751ElementConditionalAssign(t *testing.T) { + var one = Fp751Element{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} + var two = Fp751Element{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2} + var three = Fp751Element{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3} + + fp751ConditionalAssign(&one, &two, &three, 0) + + if one != two { + t.Error("Found", one, "expected", two) + } + + fp751ConditionalAssign(&one, &two, &three, 1) + + if one != three { + t.Error("Found", one, "expected", three) + } +} + +// Package-level storage for this field element is intended to deter +// compiler optimizations. +var benchmarkFp751Element Fp751Element +var benchmarkFp751X2 fp751X2 +var bench_x = Fp751Element{17026702066521327207, 5108203422050077993, 10225396685796065916, 11153620995215874678, 6531160855165088358, 15302925148404145445, 1248821577836769963, 9789766903037985294, 7493111552032041328, 10838999828319306046, 18103257655515297935, 27403304611634} +var bench_y = Fp751Element{4227467157325093378, 10699492810770426363, 13500940151395637365, 12966403950118934952, 16517692605450415877, 13647111148905630666, 14223628886152717087, 7167843152346903316, 15855377759596736571, 4300673881383687338, 6635288001920617779, 30486099554235} +var bench_z = fp751X2{1595347748594595712, 10854920567160033970, 16877102267020034574, 12435724995376660096, 3757940912203224231, 8251999420280413600, 3648859773438820227, 17622716832674727914, 11029567000887241528, 11216190007549447055, 17606662790980286987, 4720707159513626555, 12887743598335030915, 14954645239176589309, 14178817688915225254, 1191346797768989683, 12629157932334713723, 6348851952904485603, 16444232588597434895, 7809979927681678066, 14642637672942531613, 3092657597757640067, 10160361564485285723, 240071237} + +func BenchmarkExtensionFieldElementMul(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Mul(z, z) + } +} + +func BenchmarkExtensionFieldElementInv(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Inv(z) + } +} + +func BenchmarkExtensionFieldElementSquare(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Square(z) + } +} + +func BenchmarkExtensionFieldElementAdd(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Add(z, z) + } +} + +func BenchmarkExtensionFieldElementSub(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Sub(z, z) + } +} + +func BenchmarkPrimeFieldElementMul(b *testing.B) { + z := &PrimeFieldElement{A: bench_x} + w := new(PrimeFieldElement) + + for n := 0; n < b.N; n++ { + w.Mul(z, z) + } +} + +func BenchmarkPrimeFieldElementInv(b *testing.B) { + z := &PrimeFieldElement{A: bench_x} + w := new(PrimeFieldElement) + + for n := 0; n < b.N; n++ { + w.Inv(z) + } +} + +func BenchmarkPrimeFieldElementSqrt(b *testing.B) { + z := &PrimeFieldElement{A: bench_x} + w := new(PrimeFieldElement) + + for n := 0; n < b.N; n++ { + w.Sqrt(z) + } +} + +func BenchmarkPrimeFieldElementSquare(b *testing.B) { + z := &PrimeFieldElement{A: bench_x} + w := new(PrimeFieldElement) + + for n := 0; n < b.N; n++ { + w.Square(z) + } +} + +func BenchmarkPrimeFieldElementAdd(b *testing.B) { + z := &PrimeFieldElement{A: bench_x} + w := new(PrimeFieldElement) + + for n := 0; n < b.N; n++ { + w.Add(z, z) + } +} + +func BenchmarkPrimeFieldElementSub(b *testing.B) { + z := &PrimeFieldElement{A: bench_x} + w := new(PrimeFieldElement) + + for n := 0; n < b.N; n++ { + w.Sub(z, z) + } +} + +func BenchmarkFp751Multiply(b *testing.B) { + for n := 0; n < b.N; n++ { + fp751Mul(&benchmarkFp751X2, &bench_x, &bench_y) + } +} + +func BenchmarkFp751MontgomeryReduce(b *testing.B) { + z := bench_z + + // This benchmark actually computes garbage, because + // fp751MontgomeryReduce mangles its input, but since it's + // constant-time that shouldn't matter for the benchmarks. + for n := 0; n < b.N; n++ { + fp751MontgomeryReduce(&benchmarkFp751Element, &z) + } +} + +func BenchmarkFp751AddReduced(b *testing.B) { + for n := 0; n < b.N; n++ { + fp751AddReduced(&benchmarkFp751Element, &bench_x, &bench_y) + } +} + +func BenchmarkFp751SubReduced(b *testing.B) { + for n := 0; n < b.N; n++ { + fp751SubReduced(&benchmarkFp751Element, &bench_x, &bench_y) + } +} diff --git a/vendor/github.com/cloudflare/p751sidh/p751toolbox/isogeny.go b/vendor/github.com/cloudflare/p751sidh/p751toolbox/isogeny.go new file mode 100644 index 00000000..be072ed4 --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/p751toolbox/isogeny.go @@ -0,0 +1,190 @@ +package p751toolbox + +// Represents a 3-isogeny phi, holding the data necessary to evaluate phi. +type ThreeIsogeny struct { + x ExtensionFieldElement + z ExtensionFieldElement +} + +// Given a three-torsion point x3 = x(P_3) on the curve E_(A:C), construct the +// three-isogeny phi : E_(A:C) -> E_(A:C)/ = E_(A':C'). +// +// Returns a tuple (codomain, isogeny) = (E_(A':C'), phi). +func ComputeThreeIsogeny(x3 *ProjectivePoint) (ProjectiveCurveParameters, ThreeIsogeny) { + var isogeny ThreeIsogeny + isogeny.x = x3.X + isogeny.z = x3.Z + // We want to compute + // (A':C') = (Z^4 + 18X^2Z^2 - 27X^4 : 4XZ^3) + // To do this, use the identity 18X^2Z^2 - 27X^4 = 9X^2(2Z^2 - 3X^2) + var codomain ProjectiveCurveParameters + var v0, v1, v2, v3 ExtensionFieldElement + v1.Square(&x3.X) // = X^2 + v0.Add(&v1, &v1).Add(&v1, &v0) // = 3X^2 + v1.Add(&v0, &v0).Add(&v1, &v0) // = 9X^2 + v2.Square(&x3.Z) // = Z^2 + v3.Square(&v2) // = Z^4 + v2.Add(&v2, &v2) // = 2Z^2 + v0.Sub(&v2, &v0) // = 2Z^2 - 3X^2 + v1.Mul(&v1, &v0) // = 9X^2(2Z^2 - 3X^2) + v0.Mul(&x3.X, &x3.Z) // = XZ + v0.Add(&v0, &v0) // = 2XZ + codomain.A.Add(&v3, &v1) // = Z^4 + 9X^2(2Z^2 - 3X^2) + codomain.C.Mul(&v0, &v2) // = 4XZ^3 + + return codomain, isogeny +} + +// Given a 3-isogeny phi and a point xP = x(P), compute x(Q), the x-coordinate +// of the image Q = phi(P) of P under phi : E_(A:C) -> E_(A':C'). +// +// The output xQ = x(Q) is then a point on the curve E_(A':C'); the curve +// parameters are returned by the Compute3Isogeny function used to construct +// phi. +func (phi *ThreeIsogeny) Eval(xP *ProjectivePoint) ProjectivePoint { + var xQ ProjectivePoint + var t0, t1, t2 ExtensionFieldElement + t0.Mul(&phi.x, &xP.X) // = X3*XP + t1.Mul(&phi.z, &xP.Z) // = Z3*XP + t2.Sub(&t0, &t1) // = X3*XP - Z3*ZP + t0.Mul(&phi.z, &xP.X) // = Z3*XP + t1.Mul(&phi.x, &xP.Z) // = X3*ZP + t0.Sub(&t0, &t1) // = Z3*XP - X3*ZP + t2.Square(&t2) // = (X3*XP - Z3*ZP)^2 + t0.Square(&t0) // = (Z3*XP - X3*ZP)^2 + xQ.X.Mul(&t2, &xP.X) // = XP*(X3*XP - Z3*ZP)^2 + xQ.Z.Mul(&t0, &xP.Z) // = ZP*(Z3*XP - X3*ZP)^2 + + return xQ +} + +// Represents a 4-isogeny phi, holding the data necessary to evaluate phi. +// +// See ComputeFourIsogeny for more details. +type FourIsogeny struct { + Xsq_plus_Zsq ExtensionFieldElement + Xsq_minus_Zsq ExtensionFieldElement + XZ2 ExtensionFieldElement + Xpow4 ExtensionFieldElement + Zpow4 ExtensionFieldElement +} + +// Given a four-torsion point x4 = x(P_4) on the curve E_(A:C), compute the +// coefficients of the codomain E_(A':C') of the four-isogeny phi : E_(A:C) -> +// E_(A:C)/. +// +// Returns a tuple (codomain, isogeny) = (E_(A':C') : phi). +// +// There are two sets of formulas in Costello-Longa-Naehrig for computing +// four-isogenies. One set is for the case where (1,...) lies in the kernel of +// the isogeny (this is the FirstFourIsogeny), and the other (this set) is for +// the case that (1,...) is *not* in the kernel. +func ComputeFourIsogeny(x4 *ProjectivePoint) (ProjectiveCurveParameters, FourIsogeny) { + var codomain ProjectiveCurveParameters + var isogeny FourIsogeny + var v0, v1 ExtensionFieldElement + v0.Square(&x4.X) // = X4^2 + v1.Square(&x4.Z) // = Z4^2 + isogeny.Xsq_plus_Zsq.Add(&v0, &v1) // = X4^2 + Z4^2 + isogeny.Xsq_minus_Zsq.Sub(&v0, &v1) // = X4^2 - Z4^2 + isogeny.XZ2.Add(&x4.X, &x4.Z) // = X4 + Z4 + isogeny.XZ2.Square(&isogeny.XZ2) // = X4^2 + Z4^2 + 2X4Z4 + isogeny.XZ2.Sub(&isogeny.XZ2, &isogeny.Xsq_plus_Zsq) // = 2X4Z4 + isogeny.Xpow4.Square(&v0) // = X4^4 + isogeny.Zpow4.Square(&v1) // = Z4^4 + v0.Add(&isogeny.Xpow4, &isogeny.Xpow4) // = 2X4^4 + v0.Sub(&v0, &isogeny.Zpow4) // = 2X4^4 - Z4^4 + codomain.A.Add(&v0, &v0) // = 2(2X4^4 - Z4^4) + codomain.C = isogeny.Zpow4 // = Z4^4 + + return codomain, isogeny +} + +// Given a 4-isogeny phi and a point xP = x(P), compute x(Q), the x-coordinate +// of the image Q = phi(P) of P under phi : E_(A:C) -> E_(A':C'). +// +// The output xQ = x(Q) is then a point on the curve E_(A':C'); the curve +// parameters are returned by the ComputeFourIsogeny function used to construct +// phi. +func (phi *FourIsogeny) Eval(xP *ProjectivePoint) ProjectivePoint { + var xQ ProjectivePoint + var t0, t1, t2 ExtensionFieldElement + // We want to compute formula (7) of Costello-Longa-Naehrig, namely + // + // Xprime = (2*X_4*Z*Z_4 - (X_4^2 + Z_4^2)*X)*(X*X_4 - Z*Z_4)^2*X + // Zprime = (2*X*X_4*Z_4 - (X_4^2 + Z_4^2)*Z)*(X_4*Z - X*Z_4)^2*Z + // + // To do this we adapt the method in the MSR implementation, which computes + // + // X_Q = Xprime*( 16*(X_4 + Z_4)*(X_4 - Z_4)*X_4^2*Z_4^4 ) + // Z_Q = Zprime*( 16*(X_4 + Z_4)*(X_4 - Z_4)*X_4^2*Z_4^4 ) + // + t0.Mul(&xP.X, &phi.XZ2) // = 2*X*X_4*Z_4 + t1.Mul(&xP.Z, &phi.Xsq_plus_Zsq) // = (X_4^2 + Z_4^2)*Z + t0.Sub(&t0, &t1) // = -X_4^2*Z + 2*X*X_4*Z_4 - Z*Z_4^2 + t1.Mul(&xP.Z, &phi.Xsq_minus_Zsq) // = (X_4^2 - Z_4^2)*Z + t2.Sub(&t0, &t1).Square(&t2) // = 4*(X_4*Z - X*Z_4)^2*X_4^2 + t0.Mul(&t0, &t1).Add(&t0, &t0).Add(&t0, &t0) // = 4*(2*X*X_4*Z_4 - (X_4^2 + Z_4^2)*Z)*(X_4^2 - Z_4^2)*Z + t1.Add(&t0, &t2) // = 4*(X*X_4 - Z*Z_4)^2*Z_4^2 + t0.Mul(&t0, &t2) // = Zprime * 16*(X_4 + Z_4)*(X_4 - Z_4)*X_4^2 + xQ.Z.Mul(&t0, &phi.Zpow4) // = Zprime * 16*(X_4 + Z_4)*(X_4 - Z_4)*X_4^2*Z_4^4 + t2.Mul(&t2, &phi.Zpow4) // = 4*(X_4*Z - X*Z_4)^2*X_4^2*Z_4^4 + t0.Mul(&t1, &phi.Xpow4) // = 4*(X*X_4 - Z*Z_4)^2*X_4^4*Z_4^2 + t0.Sub(&t2, &t0) // = -4*(X*X_4^2 - 2*X_4*Z*Z_4 + X*Z_4^2)*X*(X_4^2 - Z_4^2)*X_4^2*Z_4^2 + xQ.X.Mul(&t1, &t0) // = Xprime * 16*(X_4 + Z_4)*(X_4 - Z_4)*X_4^2*Z_4^4 + + return xQ +} + +// Represents a 4-isogeny phi. See ComputeFourIsogeny for details. +type FirstFourIsogeny struct { + A ExtensionFieldElement + C ExtensionFieldElement +} + +// Compute the "first" four-isogeny from the given curve. See also +// ComputeFourIsogeny and Costello-Longa-Naehrig for more details. +func ComputeFirstFourIsogeny(domain *ProjectiveCurveParameters) (ProjectiveCurveParameters, FirstFourIsogeny) { + var codomain ProjectiveCurveParameters + var isogeny FirstFourIsogeny + var t0, t1 ExtensionFieldElement + + t0.Add(&domain.C, &domain.C) // = 2*C + codomain.C.Sub(&domain.A, &t0) // = A - 2*C + t1.Add(&t0, &t0) // = 4*C + t1.Add(&t1, &t0) // = 6*C + t0.Add(&t1, &domain.A) // = A + 6*C + codomain.A.Add(&t0, &t0) // = 2*(A + 6*C) + + isogeny.A = domain.A + isogeny.C = domain.C + + return codomain, isogeny +} + +// Given a 4-isogeny phi and a point xP = x(P), compute x(Q), the x-coordinate +// of the image Q = phi(P) of P under phi : E_(A:C) -> E_(A':C'). +// +// The output xQ = x(Q) is then a point on the curve E_(A':C'); the curve +// parameters are returned by the ComputeFirstFourIsogeny function used to construct +// phi. +func (phi *FirstFourIsogeny) Eval(xP *ProjectivePoint) ProjectivePoint { + var xQ ProjectivePoint + var t0, t1, t2, t3 ExtensionFieldElement + + t0.Add(&xP.X, &xP.Z).Square(&t0) // = (X+Z)^2 + t2.Mul(&xP.X, &xP.Z) // = X*Z + t1.Add(&t2, &t2) // = 2*X*Z + t1.Sub(&t0, &t1) // = X^2 + Z^2 + xQ.X.Mul(&phi.A, &t2) // = A*X*Z + t3.Mul(&phi.C, &t1) // = C*(X^2 + Z^2) + xQ.X.Add(&xQ.X, &t3) // = A*X*Z + C*(X^2 + Z^2) + xQ.X.Mul(&xQ.X, &t0) // = (X+Z)^2 * (A*X*Z + C*(X^2 + Z^2)) + t0.Sub(&xP.X, &xP.Z).Square(&t0) // = (X-Z)^2 + t0.Mul(&t0, &t2) // = X*Z*(X-Z)^2 + t1.Add(&phi.C, &phi.C) // = 2*C + t1.Sub(&t1, &phi.A) // = 2*C - A + xQ.Z.Mul(&t1, &t0) // = (2*C - A)*X*Z*(X-Z)^2 + + return xQ +} diff --git a/vendor/github.com/cloudflare/p751sidh/p751toolbox/isogeny_test.go b/vendor/github.com/cloudflare/p751sidh/p751toolbox/isogeny_test.go new file mode 100644 index 00000000..09c20e6e --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/p751toolbox/isogeny_test.go @@ -0,0 +1,158 @@ +package p751toolbox + +import ( + "testing" +) + +// Test the first four-isogeny from the base curve E_0(F_{p^2}) +func TestFirstFourIsogenyVersusSage(t *testing.T) { + var xR, isogenized_xR, sageIsogenized_xR ProjectivePoint + + // sage: p = 2^372 * 3^239 - 1; Fp = GF(p) + // sage: R. = Fp[] + // sage: Fp2 = Fp.extension(x^2 + 1, 'i') + // sage: i = Fp2.gen() + // sage: E0Fp = EllipticCurve(Fp, [0,0,0,1,0]) + // sage: E0Fp2 = EllipticCurve(Fp2, [0,0,0,1,0]) + // sage: x_PA = 11 + // sage: y_PA = -Fp(11^3 + 11).sqrt() + // sage: x_PB = 6 + // sage: y_PB = -Fp(6^3 + 6).sqrt() + // sage: P_A = 3^239 * E0Fp((x_PA,y_PA)) + // sage: P_B = 2^372 * E0Fp((x_PB,y_PB)) + // sage: def tau(P): + // ....: return E0Fp2( (-P.xy()[0], i*P.xy()[1])) + // ....: + // sage: m_B = 3*randint(0,3^238) + // sage: m_A = 2*randint(0,2^371) + // sage: R_A = E0Fp2(P_A) + m_A*tau(P_A) + // sage: def y_recover(x, a): + // ....: return (x**3 + a*x**2 + x).sqrt() + // ....: + // sage: first_4_torsion_point = E0Fp2(1, y_recover(Fp2(1),0)) + // sage: sage_first_4_isogeny = E0Fp2.isogeny(first_4_torsion_point) + // sage: a = Fp2(0) + // sage: sage_isomorphism = sage_first_4_isogeny.codomain().isomorphism_to(EllipticCurve(Fp2, [0,(2*(a+6))/(a-2),0,1,0])) + // sage: isogenized_R_A = sage_isomorphism(sage_first_4_isogeny(R_A)) + + xR.FromAffine(&ExtensionFieldElement{A: Fp751Element{0xa179cb7e2a95fce9, 0xbfd6a0f3a0a892c0, 0x8b2f0aa4250ab3f3, 0x2e7aa4dd4118732d, 0x627969e493acbc2a, 0x21a5b852c7b8cc83, 0x26084278586324f2, 0x383be1aa5aa947c0, 0xc6558ecbb5c0183e, 0xf1f192086a52b035, 0x4c58b755b865c1b, 0x67b4ceea2d2c}, B: Fp751Element{0xfceb02a2797fecbf, 0x3fee9e1d21f95e99, 0xa1c4ce896024e166, 0xc09c024254517358, 0xf0255994b17b94e7, 0xa4834359b41ee894, 0x9487f7db7ebefbe, 0x3bbeeb34a0bf1f24, 0xfa7e5533514c6a05, 0x92b0328146450a9a, 0xfde71ca3fada4c06, 0x3610f995c2bd}}) + + sageIsogenized_xR.FromAffine(&ExtensionFieldElement{A: Fp751Element{0xff99e76f78da1e05, 0xdaa36bd2bb8d97c4, 0xb4328cee0a409daf, 0xc28b099980c5da3f, 0xf2d7cd15cfebb852, 0x1935103dded6cdef, 0xade81528de1429c3, 0x6775b0fa90a64319, 0x25f89817ee52485d, 0x706e2d00848e697, 0xc4958ec4216d65c0, 0xc519681417f}, B: Fp751Element{0x742fe7dde60e1fb9, 0x801a3c78466a456b, 0xa9f945b786f48c35, 0x20ce89e1b144348f, 0xf633970b7776217e, 0x4c6077a9b38976e5, 0x34a513fc766c7825, 0xacccba359b9cd65, 0xd0ca8383f0fd0125, 0x77350437196287a, 0x9fe1ad7706d4ea21, 0x4d26129ee42d}}) + + var params ProjectiveCurveParameters + params.A.Zero() + params.C.One() + + _, phi := ComputeFirstFourIsogeny(¶ms) + + isogenized_xR = phi.Eval(&xR) + + if !sageIsogenized_xR.VartimeEq(&isogenized_xR) { + t.Error("\nExpected\n", sageIsogenized_xR.ToAffine(), "\nfound\n", isogenized_xR.ToAffine()) + } +} + +func TestFourIsogenyVersusSage(t *testing.T) { + var xP4, xR, isogenized_xR, sageIsogenized_xR ProjectivePoint + // sage: p = 2^372 * 3^239 - 1; Fp = GF(p) + // *** Warning: increasing stack size to 2000000. + // *** Warning: increasing stack size to 4000000. + // sage: R. = Fp[] + // sage: Fp2 = Fp.extension(x^2 + 1, 'i') + // sage: i = Fp2.gen() + // sage: E0Fp = EllipticCurve(Fp, [0,0,0,1,0]) + // sage: E0Fp2 = EllipticCurve(Fp2, [0,0,0,1,0]) + // sage: x_PA = 11 + // sage: y_PA = -Fp(11^3 + 11).sqrt() + // sage: x_PB = 6 + // sage: y_PB = -Fp(6^3 + 6).sqrt() + // sage: P_A = 3^239 * E0Fp((x_PA,y_PA)) + // sage: P_B = 2^372 * E0Fp((x_PB,y_PB)) + // sage: def tau(P): + // ....: return E0Fp2( (-P.xy()[0], i*P.xy()[1])) + // ....: + // sage: m_B = 3*randint(0,3^238) + // sage: m_A = 2*randint(0,2^371) + // sage: R_A = E0Fp2(P_A) + m_A*tau(P_A) + // sage: def y_recover(x, a): + // ....: return (x**3 + a*x**2 + x).sqrt() + // ....: + // sage: first_4_torsion_point = E0Fp2(1, y_recover(Fp2(1),0)) + // sage: sage_first_4_isogeny = E0Fp2.isogeny(first_4_torsion_point) + // sage: a = Fp2(0) + // sage: E1A = EllipticCurve(Fp2, [0,(2*(a+6))/(a-2),0,1,0]) + // sage: sage_isomorphism = sage_first_4_isogeny.codomain().isomorphism_to(E1A) + // sage: isogenized_R_A = sage_isomorphism(sage_first_4_isogeny(R_A)) + // sage: P_4 = (2**(372-4))*isogenized_R_A + // sage: P_4._order = 4 #otherwise falls back to generic group methods for order + // sage: X4, Z4 = P_4.xy()[0], 1 + // sage: phi4 = EllipticCurveIsogeny(E1A, P_4, None, 4) + // sage: E2A_sage = phi4.codomain() # not in monty form + // sage: Aprime, Cprime = 2*(2*X4^4 - Z4^4), Z4^4 + // sage: E2A = EllipticCurve(Fp2, [0,Aprime/Cprime,0,1,0]) + // sage: sage_iso = E2A_sage.isomorphism_to(E2A) + // sage: isogenized2_R_A = sage_iso(phi4(isogenized_R_A)) + + xP4.FromAffine(&ExtensionFieldElement{A: Fp751Element{0x2afd75a913f3d5e7, 0x2918fba06f88c9ab, 0xa4ac4dc7cb526f05, 0x2d19e9391a607300, 0x7a79e2b34091b54, 0x3ad809dcb42f1792, 0xd46179328bd6402a, 0x1afa73541e2c4f3f, 0xf602d73ace9bdbd8, 0xd77ac58f6bab7004, 0x4689d97f6793b3b3, 0x4f26b00e42b7}, B: Fp751Element{0x6cdf918dafdcb890, 0x666f273cc29cfae2, 0xad00fcd31ba618e2, 0x5fbcf62bef2f6a33, 0xf408bb88318e5098, 0x84ab97849453d175, 0x501bbfcdcfb8e1ac, 0xf2370098e6b5542c, 0xc7dc73f5f0f6bd32, 0xdd76dcd86729d1cf, 0xca22c905029996e4, 0x5cf4a9373de3}}) + + xR.FromAffine(&ExtensionFieldElement{A: Fp751Element{0xff99e76f78da1e05, 0xdaa36bd2bb8d97c4, 0xb4328cee0a409daf, 0xc28b099980c5da3f, 0xf2d7cd15cfebb852, 0x1935103dded6cdef, 0xade81528de1429c3, 0x6775b0fa90a64319, 0x25f89817ee52485d, 0x706e2d00848e697, 0xc4958ec4216d65c0, 0xc519681417f}, B: Fp751Element{0x742fe7dde60e1fb9, 0x801a3c78466a456b, 0xa9f945b786f48c35, 0x20ce89e1b144348f, 0xf633970b7776217e, 0x4c6077a9b38976e5, 0x34a513fc766c7825, 0xacccba359b9cd65, 0xd0ca8383f0fd0125, 0x77350437196287a, 0x9fe1ad7706d4ea21, 0x4d26129ee42d}}) + + sageIsogenized_xR.FromAffine(&ExtensionFieldElement{A: Fp751Element{0x111efd8bd0b7a01e, 0x6ab75a4f3789ca9b, 0x939dbe518564cac4, 0xf9eeaba1601d0434, 0x8d41f8ba6edac998, 0xfcd2557efe9aa170, 0xb3c3549c098b7844, 0x52874fef6f81127c, 0xb2b9ac82aa518bb3, 0xee70820230520a86, 0xd4012b7f5efb184a, 0x573e4536329b}, B: Fp751Element{0xa99952281e932902, 0x569a89a571f2c7b1, 0x6150143846ba3f6b, 0x11fd204441e91430, 0x7f469bd55c9b07b, 0xb72db8b9de35b161, 0x455a9a37a940512a, 0xb0cff7670abaf906, 0x18c785b7583375fe, 0x603ab9ca403c9148, 0xab54ba3a6e6c62c1, 0x2726d7d57c4f}}) + + _, phi := ComputeFourIsogeny(&xP4) + + isogenized_xR = phi.Eval(&xR) + + if !sageIsogenized_xR.VartimeEq(&isogenized_xR) { + t.Error("\nExpected\n", sageIsogenized_xR.ToAffine(), "\nfound\n", isogenized_xR.ToAffine()) + } +} + +func TestThreeIsogenyVersusSage(t *testing.T) { + var xR, xP3, isogenized_xR, sageIsogenized_xR ProjectivePoint + + // sage: %colors Linux + // sage: p = 2^372 * 3^239 - 1; Fp = GF(p) + // *** Warning: increasing stack size to 2000000. + // *** Warning: increasing stack size to 4000000. + // sage: R. = Fp[] + // sage: Fp2 = Fp.extension(x^2 + 1, 'i') + // sage: i = Fp2.gen() + // sage: E0Fp = EllipticCurve(Fp, [0,0,0,1,0]) + // sage: E0Fp2 = EllipticCurve(Fp2, [0,0,0,1,0]) + // sage: x_PA = 11 + // sage: y_PA = -Fp(11^3 + 11).sqrt() + // sage: x_PB = 6 + // sage: y_PB = -Fp(6^3 + 6).sqrt() + // sage: P_A = 3^239 * E0Fp((x_PA,y_PA)) + // sage: P_B = 2^372 * E0Fp((x_PB,y_PB)) + // sage: def tau(P): + // ....: return E0Fp2( (-P.xy()[0], i*P.xy()[1])) + // ....: + // sage: m_B = 3*randint(0,3^238) + // sage: R_B = E0Fp2(P_B) + m_B*tau(P_B) + // sage: P_3 = (3^238)*R_B + // sage: def three_isog(P_3, P): + // ....: X3, Z3 = P_3.xy()[0], 1 + // ....: XP, ZP = P.xy()[0], 1 + // ....: x = (XP*(X3*XP - Z3*ZP)^2)/(ZP*(Z3*XP - X3*ZP)^2) + // ....: A3, C3 = (Z3^4 + 9*X3^2*(2*Z3^2 - 3*X3^2)), 4*X3*Z3^3 + // ....: cod = EllipticCurve(Fp2, [0,A3/C3,0,1,0]) + // ....: return cod.lift_x(x) + // ....: + // sage: isogenized_R_B = three_isog(P_3, R_B) + + xR.FromAffine(&ExtensionFieldElement{A: Fp751Element{0xbd0737ed5cc9a3d7, 0x45ae6d476517c101, 0x6f228e9e7364fdb2, 0xbba4871225b3dbd, 0x6299ccd2e5da1a07, 0x38488fe4af5f2d0e, 0xec23cae5a86e980c, 0x26c804ba3f1edffa, 0xfbbed81932df60e5, 0x7e00e9d182ae9187, 0xc7654abb66d05f4b, 0x262d0567237b}, B: Fp751Element{0x3a3b5b6ad0b2ac33, 0x246602b5179127d3, 0x502ae0e9ad65077d, 0x10a3a37237e1bf70, 0x4a1ab9294dd05610, 0xb0f3adac30fe1fa6, 0x341995267faf70cb, 0xa14dd94d39cf4ec1, 0xce4b7527d1bf5568, 0xe0410423ed45c7e4, 0x38011809b6425686, 0x28f52472ebed}}) + + xP3.FromAffine(&ExtensionFieldElement{A: Fp751Element{0x7bb7a4a07b0788dc, 0xdc36a3f6607b21b0, 0x4750e18ee74cf2f0, 0x464e319d0b7ab806, 0xc25aa44c04f758ff, 0x392e8521a46e0a68, 0xfc4e76b63eff37df, 0x1f3566d892e67dd8, 0xf8d2eb0f73295e65, 0x457b13ebc470bccb, 0xfda1cc9efef5be33, 0x5dbf3d92cc02}, B: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}) + + sageIsogenized_xR.FromAffine(&ExtensionFieldElement{A: Fp751Element{0x286db7d75913c5b1, 0xcb2049ad50189220, 0xccee90ef765fa9f4, 0x65e52ce2730e7d88, 0xa6b6b553bd0d06e7, 0xb561ecec14591590, 0x17b7a66d8c64d959, 0x77778cecbe1461e, 0x9405c9c0c41a57ce, 0x8f6b4847e8ca7d3d, 0xf625eb987b366937, 0x421b3590e345}, B: Fp751Element{0x566b893803e7d8d6, 0xe8c71a04d527e696, 0x5a1d8f87bf5eb51, 0x42ae08ae098724f, 0x4ee3d7c7af40ca2e, 0xd9f9ab9067bb10a7, 0xecd53d69edd6328c, 0xa581e9202dea107d, 0x8bcdfb6c8ecf9257, 0xe7cbbc2e5cbcf2af, 0x5f031a8701f0e53e, 0x18312d93e3cb}}) + + _, phi := ComputeThreeIsogeny(&xP3) + + isogenized_xR = phi.Eval(&xR) + + if !sageIsogenized_xR.VartimeEq(&isogenized_xR) { + t.Error("\nExpected\n", sageIsogenized_xR.ToAffine(), "\nfound\n", isogenized_xR.ToAffine()) + } +} diff --git a/vendor/github.com/cloudflare/p751sidh/sidh.go b/vendor/github.com/cloudflare/p751sidh/sidh.go new file mode 100644 index 00000000..f0e8f89a --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/sidh.go @@ -0,0 +1,421 @@ +// Package p751sidh implements (ephemeral) supersingular isogeny +// Diffie-Hellman, as described in Costello-Longa-Naehrig 2016. Portions of +// the field arithmetic implementation were based on their implementation. +// Internal functions useful for the implementation are published in the +// p751toolbox package. +// +// This package follows their naming convention, writing "Alice" for the party +// using 2^e-isogenies and "Bob" for the party using 3^e-isogenies. +// +// This package does NOT implement SIDH key validation, so it should only be +// used for ephemeral DH. Each keypair should be used at most once. +// +// If you feel that SIDH may be appropriate for you, consult your +// cryptographer. +package p751sidh + +import ( + "errors" + "io" +) + +import . "github.com/cloudflare/p751sidh/p751toolbox" + +const ( + // The secret key size, in bytes. + SecretKeySize = 48 + // The public key size, in bytes. + PublicKeySize = 564 + // The shared secret size, in bytes. + SharedSecretSize = 188 +) + +const maxAlice = 185 + +var aliceIsogenyStrategy = [maxAlice]int{0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 5, + 6, 7, 8, 8, 9, 9, 9, 9, 9, 9, 9, 12, 11, 12, 12, 13, 14, 15, 16, 16, 16, 16, + 16, 16, 17, 17, 18, 18, 17, 21, 17, 18, 21, 20, 21, 21, 21, 21, 21, 22, 25, 25, + 25, 26, 27, 28, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 35, 36, + 36, 33, 36, 35, 36, 36, 35, 36, 36, 37, 38, 38, 39, 40, 41, 42, 38, 39, 40, 41, + 42, 40, 46, 42, 43, 46, 46, 46, 46, 48, 48, 48, 48, 49, 49, 48, 53, 54, 51, 52, + 53, 54, 55, 56, 57, 58, 59, 59, 60, 62, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, + 65, 65, 65, 65, 65, 66, 67, 65, 66, 67, 66, 69, 70, 66, 67, 66, 69, 70, 69, 70, + 70, 71, 72, 71, 72, 72, 74, 74, 75, 72, 72, 74, 74, 75, 72, 72, 74, 75, 75, 72, + 72, 74, 75, 75, 77, 77, 79, 80, 80, 82} + +const maxBob = 239 + +var bobIsogenyStrategy = [maxBob]int{0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, + 7, 8, 8, 8, 8, 9, 9, 9, 9, 9, 10, 12, 12, 12, 12, 12, 12, 13, 14, 14, 15, 16, + 16, 16, 16, 16, 17, 16, 16, 17, 19, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 24, 24, 25, 27, 27, 28, 28, 29, 28, 29, 28, 28, 28, 30, 28, 28, 28, 29, + 30, 33, 33, 33, 33, 34, 35, 37, 37, 37, 37, 38, 38, 37, 38, 38, 38, 38, 38, 39, + 43, 38, 38, 38, 38, 43, 40, 41, 42, 43, 48, 45, 46, 47, 47, 48, 49, 49, 49, 50, + 51, 50, 49, 49, 49, 49, 51, 49, 53, 50, 51, 50, 51, 51, 51, 52, 55, 55, 55, 56, + 56, 56, 56, 56, 58, 58, 61, 61, 61, 63, 63, 63, 64, 65, 65, 65, 65, 66, 66, 65, + 65, 66, 66, 66, 66, 66, 66, 66, 71, 66, 73, 66, 66, 71, 66, 73, 66, 66, 71, 66, + 73, 68, 68, 71, 71, 73, 73, 73, 75, 75, 78, 78, 78, 80, 80, 80, 81, 81, 82, 83, + 84, 85, 86, 86, 86, 86, 86, 87, 86, 88, 86, 86, 86, 86, 88, 86, 88, 86, 86, 86, + 88, 88, 86, 86, 86, 93, 90, 90, 92, 92, 92, 93, 93, 93, 93, 93, 97, 97, 97, 97, + 97, 97} + +// Bob's public key. +type SIDHPublicKeyBob struct { + affine_xP ExtensionFieldElement + affine_xQ ExtensionFieldElement + affine_xQmP ExtensionFieldElement +} + +// Read a public key from a byte slice. The input must be at least 564 bytes long. +func (pubKey *SIDHPublicKeyBob) FromBytes(input []byte) { + if len(input) < 564 { + panic("Too short input to SIDH pubkey FromBytes, expected 564 bytes") + } + pubKey.affine_xP.FromBytes(input[0:188]) + pubKey.affine_xQ.FromBytes(input[188:376]) + pubKey.affine_xQmP.FromBytes(input[376:564]) +} + +// Write a public key to a byte slice. The output must be at least 564 bytes long. +func (pubKey *SIDHPublicKeyBob) ToBytes(output []byte) { + if len(output) < 564 { + panic("Too short output for SIDH pubkey FromBytes, expected 564 bytes") + } + pubKey.affine_xP.ToBytes(output[0:188]) + pubKey.affine_xQ.ToBytes(output[188:376]) + pubKey.affine_xQmP.ToBytes(output[376:564]) +} + +// Alice's public key. +type SIDHPublicKeyAlice struct { + affine_xP ExtensionFieldElement + affine_xQ ExtensionFieldElement + affine_xQmP ExtensionFieldElement +} + +// Read a public key from a byte slice. The input must be at least 564 bytes long. +func (pubKey *SIDHPublicKeyAlice) FromBytes(input []byte) { + if len(input) < 564 { + panic("Too short input to SIDH pubkey FromBytes, expected 564 bytes") + } + pubKey.affine_xP.FromBytes(input[0:188]) + pubKey.affine_xQ.FromBytes(input[188:376]) + pubKey.affine_xQmP.FromBytes(input[376:564]) +} + +// Write a public key to a byte slice. The output must be at least 564 bytes long. +func (pubKey *SIDHPublicKeyAlice) ToBytes(output []byte) { + if len(output) < 564 { + panic("Too short output for SIDH pubkey FromBytes, expected 564 bytes") + } + pubKey.affine_xP.ToBytes(output[0:188]) + pubKey.affine_xQ.ToBytes(output[188:376]) + pubKey.affine_xQmP.ToBytes(output[376:564]) +} + +// Bob's secret key. +type SIDHSecretKeyBob struct { + Scalar [SecretKeySize]byte +} + +// Alice's secret key. +type SIDHSecretKeyAlice struct { + Scalar [SecretKeySize]byte +} + +// Generate a keypair for "Alice". Note that because this library does not +// implement SIDH validation, each keypair should be used for at most one +// shared secret computation. +func GenerateAliceKeypair(rand io.Reader) (publicKey *SIDHPublicKeyAlice, secretKey *SIDHSecretKeyAlice, err error) { + publicKey = new(SIDHPublicKeyAlice) + secretKey = new(SIDHSecretKeyAlice) + + _, err = io.ReadFull(rand, secretKey.Scalar[:]) + if err != nil { + return nil, nil, err + } + + // Bit-twiddle to ensure scalar is in 2*[0,2^371): + secretKey.Scalar[47] = 0 + secretKey.Scalar[46] &= 15 // clear high bits, so scalar < 2^372 + secretKey.Scalar[0] &= 254 // clear low bit, so scalar is even + + // We actually want scalar in 2*(0,2^371), but the above procedure + // generates 0 with probability 2^(-371), which isn't worth checking + // for. + + *publicKey = secretKey.PublicKey() + + return +} + +// Set result to zero if the input scalar is <= 3^238. +//go:noescape +func checkLessThanThree238(scalar *[48]byte, result *uint32) + +// Set scalar = 3*scalar +//go:noescape +func multiplyByThree(scalar *[48]byte) + +// Generate a keypair for "Bob". Note that because this library does not +// implement SIDH validation, each keypair should be used for at most one +// shared secret computation. +func GenerateBobKeypair(rand io.Reader) (publicKey *SIDHPublicKeyBob, secretKey *SIDHSecretKeyBob, err error) { + publicKey = new(SIDHPublicKeyBob) + secretKey = new(SIDHSecretKeyBob) + + // Perform rejection sampling to obtain a random value in [0,3^238]: + var ok uint32 + for i := 0; i < 102; i++ { + _, err = io.ReadFull(rand, secretKey.Scalar[:]) + if err != nil { + return nil, nil, err + } + // Mask the high bits to obtain a uniform value in [0,2^378): + secretKey.Scalar[47] &= 3 + // Accept if scalar < 3^238 (this happens w/ prob ~0.5828) + checkLessThanThree238(&secretKey.Scalar, &ok) + if ok == 0 { + break + } + } + // ok is nonzero if all 102 trials failed. + // This happens with probability 0.41719...^102 < 2^(-128), i.e., never + if ok != 0 { + return nil, nil, errors.New("WOW! An event with probability < 2^(-128) occurred!!") + } + + // Multiply by 3 to get a scalar in 3*[0,3^238): + multiplyByThree(&secretKey.Scalar) + + // We actually want scalar in 2*(0,2^371), but the above procedure + // generates 0 with probability 3^(-238), which isn't worth checking + // for. + + *publicKey = secretKey.PublicKey() + + return +} + +// Compute the corresponding public key for the given secret key. +func (secretKey *SIDHSecretKeyAlice) PublicKey() SIDHPublicKeyAlice { + var xP, xQ, xQmP, xR ProjectivePoint + + xP.FromAffinePrimeField(&Affine_xPB) // = ( x_P : 1) = x(P_B) + xQ.FromAffinePrimeField(&Affine_xPB) // + xQ.X.Neg(&xQ.X) // = (-x_P : 1) = x(Q_B) + xQmP = DistortAndDifference(&Affine_xPB) // = x(Q_B - P_B) + + xR = SecretPoint(&Affine_xPA, &Affine_yPA, secretKey.Scalar[:]) + + var currentCurve ProjectiveCurveParameters + // Starting curve has a = 0, so (A:C) = (0,1) + currentCurve.A.Zero() + currentCurve.C.One() + + var firstPhi FirstFourIsogeny + currentCurve, firstPhi = ComputeFirstFourIsogeny(¤tCurve) + + xP = firstPhi.Eval(&xP) + xQ = firstPhi.Eval(&xQ) + xQmP = firstPhi.Eval(&xQmP) + xR = firstPhi.Eval(&xR) + + var points = make([]ProjectivePoint, 0, 8) + var indices = make([]int, 0, 8) + var phi FourIsogeny + + var i = 0 + + for j := 1; j < 185; j++ { + for i < 185-j { + points = append(points, xR) + indices = append(indices, i) + k := int(aliceIsogenyStrategy[185-i-j]) + xR.Pow2k(¤tCurve, &xR, uint32(2*k)) + i = i + k + } + currentCurve, phi = ComputeFourIsogeny(&xR) + + for k := 0; k < len(points); k++ { + points[k] = phi.Eval(&points[k]) + } + + xP = phi.Eval(&xP) + xQ = phi.Eval(&xQ) + xQmP = phi.Eval(&xQmP) + + // pop xR from points + xR, points = points[len(points)-1], points[:len(points)-1] + i, indices = int(indices[len(indices)-1]), indices[:len(indices)-1] + } + + currentCurve, phi = ComputeFourIsogeny(&xR) + xP = phi.Eval(&xP) + xQ = phi.Eval(&xQ) + xQmP = phi.Eval(&xQmP) + + var invZP, invZQ, invZQmP ExtensionFieldElement + ExtensionFieldBatch3Inv(&xP.Z, &xQ.Z, &xQmP.Z, &invZP, &invZQ, &invZQmP) + + var publicKey SIDHPublicKeyAlice + publicKey.affine_xP.Mul(&xP.X, &invZP) + publicKey.affine_xQ.Mul(&xQ.X, &invZQ) + publicKey.affine_xQmP.Mul(&xQmP.X, &invZQmP) + + return publicKey +} + +// Compute the public key corresponding to the secret key. +func (secretKey *SIDHSecretKeyBob) PublicKey() SIDHPublicKeyBob { + var xP, xQ, xQmP, xR ProjectivePoint + + xP.FromAffinePrimeField(&Affine_xPA) // = ( x_P : 1) = x(P_A) + xQ.FromAffinePrimeField(&Affine_xPA) // + xQ.X.Neg(&xQ.X) // = (-x_P : 1) = x(Q_A) + xQmP = DistortAndDifference(&Affine_xPA) // = x(Q_B - P_B) + + xR = SecretPoint(&Affine_xPB, &Affine_yPB, secretKey.Scalar[:]) + + var currentCurve ProjectiveCurveParameters + // Starting curve has a = 0, so (A:C) = (0,1) + currentCurve.A.Zero() + currentCurve.C.One() + + var points = make([]ProjectivePoint, 0, 8) + var indices = make([]int, 0, 8) + var phi ThreeIsogeny + + var i = 0 + + for j := 1; j < 239; j++ { + for i < 239-j { + points = append(points, xR) + indices = append(indices, i) + k := int(bobIsogenyStrategy[239-i-j]) + xR.Pow3k(¤tCurve, &xR, uint32(k)) + i = i + k + } + currentCurve, phi = ComputeThreeIsogeny(&xR) + + for k := 0; k < len(points); k++ { + points[k] = phi.Eval(&points[k]) + } + + xP = phi.Eval(&xP) + xQ = phi.Eval(&xQ) + xQmP = phi.Eval(&xQmP) + + // pop xR from points + xR, points = points[len(points)-1], points[:len(points)-1] + i, indices = int(indices[len(indices)-1]), indices[:len(indices)-1] + } + + currentCurve, phi = ComputeThreeIsogeny(&xR) + xP = phi.Eval(&xP) + xQ = phi.Eval(&xQ) + xQmP = phi.Eval(&xQmP) + + var invZP, invZQ, invZQmP ExtensionFieldElement + ExtensionFieldBatch3Inv(&xP.Z, &xQ.Z, &xQmP.Z, &invZP, &invZQ, &invZQmP) + + var publicKey SIDHPublicKeyBob + publicKey.affine_xP.Mul(&xP.X, &invZP) + publicKey.affine_xQ.Mul(&xQ.X, &invZQ) + publicKey.affine_xQmP.Mul(&xQmP.X, &invZQmP) + + return publicKey +} + +// Compute (Alice's view of) a shared secret using Alice's secret key and Bob's public key. +func (aliceSecret *SIDHSecretKeyAlice) SharedSecret(bobPublic *SIDHPublicKeyBob) [SharedSecretSize]byte { + var currentCurve = RecoverCurveParameters(&bobPublic.affine_xP, &bobPublic.affine_xQ, &bobPublic.affine_xQmP) + + var xR, xP, xQ, xQmP ProjectivePoint + + xP.FromAffine(&bobPublic.affine_xP) + xQ.FromAffine(&bobPublic.affine_xQ) + xQmP.FromAffine(&bobPublic.affine_xQmP) + + xR.ThreePointLadder(¤tCurve, &xP, &xQ, &xQmP, aliceSecret.Scalar[:]) + + var firstPhi FirstFourIsogeny + currentCurve, firstPhi = ComputeFirstFourIsogeny(¤tCurve) + xR = firstPhi.Eval(&xR) + + var points = make([]ProjectivePoint, 0, 8) + var indices = make([]int, 0, 8) + var phi FourIsogeny + + var i = 0 + + for j := 1; j < 185; j++ { + for i < 185-j { + points = append(points, xR) + indices = append(indices, i) + k := int(aliceIsogenyStrategy[185-i-j]) + xR.Pow2k(¤tCurve, &xR, uint32(2*k)) + i = i + k + } + currentCurve, phi = ComputeFourIsogeny(&xR) + + for k := 0; k < len(points); k++ { + points[k] = phi.Eval(&points[k]) + } + + // pop xR from points + xR, points = points[len(points)-1], points[:len(points)-1] + i, indices = int(indices[len(indices)-1]), indices[:len(indices)-1] + } + + currentCurve, _ = ComputeFourIsogeny(&xR) + + var sharedSecret [SharedSecretSize]byte + var jInv = currentCurve.JInvariant() + jInv.ToBytes(sharedSecret[:]) + return sharedSecret +} + +// Compute (Bob's view of) a shared secret using Bob's secret key and Alice's public key. +func (bobSecret *SIDHSecretKeyBob) SharedSecret(alicePublic *SIDHPublicKeyAlice) [SharedSecretSize]byte { + var currentCurve = RecoverCurveParameters(&alicePublic.affine_xP, &alicePublic.affine_xQ, &alicePublic.affine_xQmP) + + var xR, xP, xQ, xQmP ProjectivePoint + + xP.FromAffine(&alicePublic.affine_xP) + xQ.FromAffine(&alicePublic.affine_xQ) + xQmP.FromAffine(&alicePublic.affine_xQmP) + + xR.ThreePointLadder(¤tCurve, &xP, &xQ, &xQmP, bobSecret.Scalar[:]) + + var points = make([]ProjectivePoint, 0, 8) + var indices = make([]int, 0, 8) + var phi ThreeIsogeny + + var i = 0 + + for j := 1; j < 239; j++ { + for i < 239-j { + points = append(points, xR) + indices = append(indices, i) + k := int(bobIsogenyStrategy[239-i-j]) + xR.Pow3k(¤tCurve, &xR, uint32(k)) + i = i + k + } + currentCurve, phi = ComputeThreeIsogeny(&xR) + + for k := 0; k < len(points); k++ { + points[k] = phi.Eval(&points[k]) + } + + // pop xR from points + xR, points = points[len(points)-1], points[:len(points)-1] + i, indices = int(indices[len(indices)-1]), indices[:len(indices)-1] + } + currentCurve, _ = ComputeThreeIsogeny(&xR) + + var sharedSecret [SharedSecretSize]byte + var jInv = currentCurve.JInvariant() + jInv.ToBytes(sharedSecret[:]) + return sharedSecret +} diff --git a/vendor/github.com/cloudflare/p751sidh/sidh_amd64.s b/vendor/github.com/cloudflare/p751sidh/sidh_amd64.s new file mode 100644 index 00000000..45d3779b --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/sidh_amd64.s @@ -0,0 +1,65 @@ +#include "textflag.h" + +// Digits of 3^238 - 1 +#define THREE238M1_0 $0xedcd718a828384f8 +#define THREE238M1_1 $0x733b35bfd4427a14 +#define THREE238M1_2 $0xf88229cf94d7cf38 +#define THREE238M1_3 $0x63c56c990c7c2ad6 +#define THREE238M1_4 $0xb858a87e8f4222c7 +#define THREE238M1_5 $0x254c9c6b525eaf5 + +TEXT ·checkLessThanThree238(SB), NOSPLIT, $0-16 + MOVQ scalar+0(FP), SI + MOVQ result+8(FP), DI + + XORQ AX, AX + + // Set [R10,...,R15] = 3^238 + MOVQ THREE238M1_0, R10 + MOVQ THREE238M1_1, R11 + MOVQ THREE238M1_2, R12 + MOVQ THREE238M1_3, R13 + MOVQ THREE238M1_4, R14 + MOVQ THREE238M1_5, R15 + + // Set [R10,...,R15] = 3^238 - scalar + SUBQ (SI), R10 + SBBQ (8)(SI), R11 + SBBQ (16)(SI), R12 + SBBQ (24)(SI), R13 + SBBQ (32)(SI), R14 + SBBQ (40)(SI), R15 + + // Save borrow flag indicating 3^238 - scalar < 0 as a mask in AX (eax) + SBBL $0, AX + MOVL AX, (DI) + + RET + +TEXT ·multiplyByThree(SB), NOSPLIT, $0-8 + MOVQ scalar+0(FP), SI + + // Set [R10,...,R15] = scalar + MOVQ (SI), R10 + MOVQ (8)(SI), R11 + MOVQ (16)(SI), R12 + MOVQ (24)(SI), R13 + MOVQ (32)(SI), R14 + MOVQ (40)(SI), R15 + + // Add scalar twice to compute 3*scalar + ADDQ R10, (SI) + ADCQ R11, (8)(SI) + ADCQ R12, (16)(SI) + ADCQ R13, (24)(SI) + ADCQ R14, (32)(SI) + ADCQ R15, (40)(SI) + ADDQ R10, (SI) + ADCQ R11, (8)(SI) + ADCQ R12, (16)(SI) + ADCQ R13, (24)(SI) + ADCQ R14, (32)(SI) + ADCQ R15, (40)(SI) + + RET + diff --git a/vendor/github.com/cloudflare/p751sidh/sidh_test.go b/vendor/github.com/cloudflare/p751sidh/sidh_test.go new file mode 100644 index 00000000..c89ebf61 --- /dev/null +++ b/vendor/github.com/cloudflare/p751sidh/sidh_test.go @@ -0,0 +1,406 @@ +package p751sidh + +import ( + "bytes" + "crypto/rand" + mathRand "math/rand" + "reflect" + "testing" + "testing/quick" +) + +import . "github.com/cloudflare/p751sidh/p751toolbox" + +func TestMultiplyByThree(t *testing.T) { + // sage: repr((3^238 -1).digits(256)) + var three238minus1 = [48]byte{248, 132, 131, 130, 138, 113, 205, 237, 20, 122, 66, 212, 191, 53, 59, 115, 56, 207, 215, 148, 207, 41, 130, 248, 214, 42, 124, 12, 153, 108, 197, 99, 199, 34, 66, 143, 126, 168, 88, 184, 245, 234, 37, 181, 198, 201, 84, 2} + // sage: repr((3*(3^238 -1)).digits(256)) + var threeTimesThree238minus1 = [48]byte{232, 142, 138, 135, 159, 84, 104, 201, 62, 110, 199, 124, 63, 161, 177, 89, 169, 109, 135, 190, 110, 125, 134, 233, 132, 128, 116, 37, 203, 69, 80, 43, 86, 104, 198, 173, 123, 249, 9, 41, 225, 192, 113, 31, 84, 93, 254, 6} + + multiplyByThree(&three238minus1) + + for i := 0; i < 48; i++ { + if three238minus1[i] != threeTimesThree238minus1[i] { + t.Error("Digit", i, "error: found", three238minus1[i], "expected", threeTimesThree238minus1[i]) + } + } +} + +func TestCheckLessThanThree238(t *testing.T) { + var three238minus1 = [48]byte{248, 132, 131, 130, 138, 113, 205, 237, 20, 122, 66, 212, 191, 53, 59, 115, 56, 207, 215, 148, 207, 41, 130, 248, 214, 42, 124, 12, 153, 108, 197, 99, 199, 34, 66, 143, 126, 168, 88, 184, 245, 234, 37, 181, 198, 201, 84, 2} + var three238 = [48]byte{249, 132, 131, 130, 138, 113, 205, 237, 20, 122, 66, 212, 191, 53, 59, 115, 56, 207, 215, 148, 207, 41, 130, 248, 214, 42, 124, 12, 153, 108, 197, 99, 199, 34, 66, 143, 126, 168, 88, 184, 245, 234, 37, 181, 198, 201, 84, 2} + var three238plus1 = [48]byte{250, 132, 131, 130, 138, 113, 205, 237, 20, 122, 66, 212, 191, 53, 59, 115, 56, 207, 215, 148, 207, 41, 130, 248, 214, 42, 124, 12, 153, 108, 197, 99, 199, 34, 66, 143, 126, 168, 88, 184, 245, 234, 37, 181, 198, 201, 84, 2} + + var result = uint32(57) + + checkLessThanThree238(&three238minus1, &result) + if result != 0 { + t.Error("Expected 0, got", result) + } + checkLessThanThree238(&three238, &result) + if result == 0 { + t.Error("Expected nonzero, got", result) + } + checkLessThanThree238(&three238plus1, &result) + if result == 0 { + t.Error("Expected nonzero, got", result) + } +} + +// This throws away the generated public key, forcing us to recompute it in the test, +// but generating the value *in* the quickcheck predicate breaks the testing. +func (x SIDHSecretKeyAlice) Generate(quickCheckRand *mathRand.Rand, size int) reflect.Value { + // use crypto/rand instead of the quickCheck-provided RNG + _, aliceSecret, err := GenerateAliceKeypair(rand.Reader) + if err != nil { + panic("error generating secret key") + } + return reflect.ValueOf(*aliceSecret) +} + +func (x SIDHSecretKeyBob) Generate(quickCheckRand *mathRand.Rand, size int) reflect.Value { + // use crypto/rand instead of the quickCheck-provided RNG + _, bobSecret, err := GenerateBobKeypair(rand.Reader) + if err != nil { + panic("error generating secret key") + } + return reflect.ValueOf(*bobSecret) +} + +func TestEphemeralSharedSecret(t *testing.T) { + sharedSecretsMatch := func(aliceSecret SIDHSecretKeyAlice, bobSecret SIDHSecretKeyBob) bool { + alicePublic := aliceSecret.PublicKey() + bobPublic := bobSecret.PublicKey() + + aliceSharedSecret := aliceSecret.SharedSecret(&bobPublic) + bobSharedSecret := bobSecret.SharedSecret(&alicePublic) + + return bytes.Equal(aliceSharedSecret[:], bobSharedSecret[:]) + } + + if err := quick.Check(sharedSecretsMatch, nil); err != nil { + t.Error(err) + } +} + +// Perform Alice's (2-isogeny) key generation, using the slow but simple multiplication-based strategy. +// +// This function just exists to ensure that the fast isogeny-tree strategy works correctly. +func aliceKeyGenSlow(secretKey *SIDHSecretKeyAlice) SIDHPublicKeyAlice { + var xP, xQ, xQmP, xR, xS ProjectivePoint + + xP.FromAffinePrimeField(&Affine_xPB) // = ( x_P : 1) = x(P_B) + xQ.FromAffinePrimeField(&Affine_xPB) // + xQ.X.Neg(&xQ.X) // = (-x_P : 1) = x(Q_B) + xQmP = DistortAndDifference(&Affine_xPB) // = x(Q_B - P_B) + + xR = SecretPoint(&Affine_xPA, &Affine_yPA, secretKey.Scalar[:]) + + var currentCurve ProjectiveCurveParameters + // Starting curve has a = 0, so (A:C) = (0,1) + currentCurve.A.Zero() + currentCurve.C.One() + + var firstPhi FirstFourIsogeny + currentCurve, firstPhi = ComputeFirstFourIsogeny(¤tCurve) + + xP = firstPhi.Eval(&xP) + xQ = firstPhi.Eval(&xQ) + xQmP = firstPhi.Eval(&xQmP) + xR = firstPhi.Eval(&xR) + + var phi FourIsogeny + for e := (372 - 4); e >= 0; e -= 2 { + xS.Pow2k(¤tCurve, &xR, uint32(e)) + currentCurve, phi = ComputeFourIsogeny(&xS) + xR = phi.Eval(&xR) + xP = phi.Eval(&xP) + xQ = phi.Eval(&xQ) + xQmP = phi.Eval(&xQmP) + } + + var invZP, invZQ, invZQmP ExtensionFieldElement + ExtensionFieldBatch3Inv(&xP.Z, &xQ.Z, &xQmP.Z, &invZP, &invZQ, &invZQmP) + + var publicKey SIDHPublicKeyAlice + publicKey.affine_xP.Mul(&xP.X, &invZP) + publicKey.affine_xQ.Mul(&xQ.X, &invZQ) + publicKey.affine_xQmP.Mul(&xQmP.X, &invZQmP) + + return publicKey +} + +// Perform Bob's (3-isogeny) key generation, using the slow but simple multiplication-based strategy. +// +// This function just exists to ensure that the fast isogeny-tree strategy works correctly. +func bobKeyGenSlow(secretKey *SIDHSecretKeyBob) SIDHPublicKeyBob { + var xP, xQ, xQmP, xR, xS ProjectivePoint + + xP.FromAffinePrimeField(&Affine_xPA) // = ( x_P : 1) = x(P_A) + xQ.FromAffinePrimeField(&Affine_xPA) // + xQ.X.Neg(&xQ.X) // = (-x_P : 1) = x(Q_A) + xQmP = DistortAndDifference(&Affine_xPA) // = x(Q_B - P_B) + + xR = SecretPoint(&Affine_xPB, &Affine_yPB, secretKey.Scalar[:]) + + var currentCurve ProjectiveCurveParameters + // Starting curve has a = 0, so (A:C) = (0,1) + currentCurve.A.Zero() + currentCurve.C.One() + + var phi ThreeIsogeny + for e := 238; e >= 0; e-- { + xS.Pow3k(¤tCurve, &xR, uint32(e)) + currentCurve, phi = ComputeThreeIsogeny(&xS) + xR = phi.Eval(&xR) + xP = phi.Eval(&xP) + xQ = phi.Eval(&xQ) + xQmP = phi.Eval(&xQmP) + } + + var invZP, invZQ, invZQmP ExtensionFieldElement + ExtensionFieldBatch3Inv(&xP.Z, &xQ.Z, &xQmP.Z, &invZP, &invZQ, &invZQmP) + + var publicKey SIDHPublicKeyBob + publicKey.affine_xP.Mul(&xP.X, &invZP) + publicKey.affine_xQ.Mul(&xQ.X, &invZQ) + publicKey.affine_xQmP.Mul(&xQmP.X, &invZQmP) + + return publicKey +} + +// Perform Alice's key agreement, using the slow but simple multiplication-based strategy. +// +// This function just exists to ensure that the fast isogeny-tree strategy works correctly. +func aliceSharedSecretSlow(bobPublic *SIDHPublicKeyBob, aliceSecret *SIDHSecretKeyAlice) [188]byte { + var currentCurve = RecoverCurveParameters(&bobPublic.affine_xP, &bobPublic.affine_xQ, &bobPublic.affine_xQmP) + + var xR, xS, xP, xQ, xQmP ProjectivePoint + + xP.FromAffine(&bobPublic.affine_xP) + xQ.FromAffine(&bobPublic.affine_xQ) + xQmP.FromAffine(&bobPublic.affine_xQmP) + + xR.ThreePointLadder(¤tCurve, &xP, &xQ, &xQmP, aliceSecret.Scalar[:]) + + var firstPhi FirstFourIsogeny + currentCurve, firstPhi = ComputeFirstFourIsogeny(¤tCurve) + xR = firstPhi.Eval(&xR) + + var phi FourIsogeny + for e := (372 - 4); e >= 2; e -= 2 { + xS.Pow2k(¤tCurve, &xR, uint32(e)) + currentCurve, phi = ComputeFourIsogeny(&xS) + xR = phi.Eval(&xR) + } + + currentCurve, _ = ComputeFourIsogeny(&xR) + + var sharedSecret [SharedSecretSize]byte + var jInv = currentCurve.JInvariant() + jInv.ToBytes(sharedSecret[:]) + return sharedSecret +} + +// Perform Bob's key agreement, using the slow but simple multiplication-based strategy. +// +// This function just exists to ensure that the fast isogeny-tree strategy works correctly. +func bobSharedSecretSlow(alicePublic *SIDHPublicKeyAlice, bobSecret *SIDHSecretKeyBob) [188]byte { + var currentCurve = RecoverCurveParameters(&alicePublic.affine_xP, &alicePublic.affine_xQ, &alicePublic.affine_xQmP) + + var xR, xS, xP, xQ, xQmP ProjectivePoint + + xP.FromAffine(&alicePublic.affine_xP) + xQ.FromAffine(&alicePublic.affine_xQ) + xQmP.FromAffine(&alicePublic.affine_xQmP) + + xR.ThreePointLadder(¤tCurve, &xP, &xQ, &xQmP, bobSecret.Scalar[:]) + + var phi ThreeIsogeny + for e := 238; e >= 1; e-- { + xS.Pow3k(¤tCurve, &xR, uint32(e)) + currentCurve, phi = ComputeThreeIsogeny(&xS) + xR = phi.Eval(&xR) + } + + currentCurve, _ = ComputeThreeIsogeny(&xR) + + var sharedSecret [SharedSecretSize]byte + var jInv = currentCurve.JInvariant() + jInv.ToBytes(sharedSecret[:]) + return sharedSecret +} + +func TestBobKeyGenFastVsSlow(t *testing.T) { + // m_B = 3*randint(0,3^238) + var m_B = [48]uint8{246, 217, 158, 190, 100, 227, 224, 181, 171, 32, 120, 72, 92, 115, 113, 62, 103, 57, 71, 252, 166, 121, 126, 201, 55, 99, 213, 234, 243, 228, 171, 68, 9, 239, 214, 37, 255, 242, 217, 180, 25, 54, 242, 61, 101, 245, 78, 0} + + var bobSecretKey = SIDHSecretKeyBob{Scalar: m_B} + var fastPubKey = bobSecretKey.PublicKey() + var slowPubKey = bobKeyGenSlow(&bobSecretKey) + + if !fastPubKey.affine_xP.VartimeEq(&slowPubKey.affine_xP) { + t.Error("Expected affine_xP = ", fastPubKey.affine_xP, "found", slowPubKey.affine_xP) + } + if !fastPubKey.affine_xQ.VartimeEq(&slowPubKey.affine_xQ) { + t.Error("Expected affine_xQ = ", fastPubKey.affine_xQ, "found", slowPubKey.affine_xQ) + } + if !fastPubKey.affine_xQmP.VartimeEq(&slowPubKey.affine_xQmP) { + t.Error("Expected affine_xQmP = ", fastPubKey.affine_xQmP, "found", slowPubKey.affine_xQmP) + } +} + +func TestAliceKeyGenFastVsSlow(t *testing.T) { + // m_A = 2*randint(0,2^371) + var m_A = [48]uint8{248, 31, 9, 39, 165, 125, 79, 135, 70, 97, 87, 231, 221, 204, 245, 38, 150, 198, 187, 184, 199, 148, 156, 18, 137, 71, 248, 83, 111, 170, 138, 61, 112, 25, 188, 197, 132, 151, 1, 0, 207, 178, 24, 72, 171, 22, 11, 0} + + var aliceSecretKey = SIDHSecretKeyAlice{Scalar: m_A} + var fastPubKey = aliceSecretKey.PublicKey() + var slowPubKey = aliceKeyGenSlow(&aliceSecretKey) + + if !fastPubKey.affine_xP.VartimeEq(&slowPubKey.affine_xP) { + t.Error("Expected affine_xP = ", fastPubKey.affine_xP, "found", slowPubKey.affine_xP) + } + if !fastPubKey.affine_xQ.VartimeEq(&slowPubKey.affine_xQ) { + t.Error("Expected affine_xQ = ", fastPubKey.affine_xQ, "found", slowPubKey.affine_xQ) + } + if !fastPubKey.affine_xQmP.VartimeEq(&slowPubKey.affine_xQmP) { + t.Error("Expected affine_xQmP = ", fastPubKey.affine_xQmP, "found", slowPubKey.affine_xQmP) + } +} + +func TestSharedSecret(t *testing.T) { + // m_A = 2*randint(0,2^371) + var m_A = [48]uint8{248, 31, 9, 39, 165, 125, 79, 135, 70, 97, 87, 231, 221, 204, 245, 38, 150, 198, 187, 184, 199, 148, 156, 18, 137, 71, 248, 83, 111, 170, 138, 61, 112, 25, 188, 197, 132, 151, 1, 0, 207, 178, 24, 72, 171, 22, 11, 0} + // m_B = 3*randint(0,3^238) + var m_B = [48]uint8{246, 217, 158, 190, 100, 227, 224, 181, 171, 32, 120, 72, 92, 115, 113, 62, 103, 57, 71, 252, 166, 121, 126, 201, 55, 99, 213, 234, 243, 228, 171, 68, 9, 239, 214, 37, 255, 242, 217, 180, 25, 54, 242, 61, 101, 245, 78, 0} + + var aliceSecret = SIDHSecretKeyAlice{Scalar: m_A} + var bobSecret = SIDHSecretKeyBob{Scalar: m_B} + + var alicePublic = aliceSecret.PublicKey() + var bobPublic = bobSecret.PublicKey() + + var aliceSharedSecretSlow = aliceSharedSecretSlow(&bobPublic, &aliceSecret) + var aliceSharedSecretFast = aliceSecret.SharedSecret(&bobPublic) + var bobSharedSecretSlow = bobSharedSecretSlow(&alicePublic, &bobSecret) + var bobSharedSecretFast = bobSecret.SharedSecret(&alicePublic) + + if !bytes.Equal(aliceSharedSecretFast[:], aliceSharedSecretSlow[:]) { + t.Error("Shared secret (fast) mismatch: Alice has ", aliceSharedSecretFast, " Bob has ", bobSharedSecretFast) + } + if !bytes.Equal(aliceSharedSecretSlow[:], bobSharedSecretSlow[:]) { + t.Error("Shared secret (slow) mismatch: Alice has ", aliceSharedSecretSlow, " Bob has ", bobSharedSecretSlow) + } + if !bytes.Equal(aliceSharedSecretSlow[:], bobSharedSecretFast[:]) { + t.Error("Shared secret mismatch: Alice (slow) has ", aliceSharedSecretSlow, " Bob (fast) has ", bobSharedSecretFast) + } +} + +func TestSecretPoint(t *testing.T) { + // m_A = 2*randint(0,2^371) + var m_A = [48]uint8{248, 31, 9, 39, 165, 125, 79, 135, 70, 97, 87, 231, 221, 204, 245, 38, 150, 198, 187, 184, 199, 148, 156, 18, 137, 71, 248, 83, 111, 170, 138, 61, 112, 25, 188, 197, 132, 151, 1, 0, 207, 178, 24, 72, 171, 22, 11, 0} + // m_B = 3*randint(0,3^238) + var m_B = [48]uint8{246, 217, 158, 190, 100, 227, 224, 181, 171, 32, 120, 72, 92, 115, 113, 62, 103, 57, 71, 252, 166, 121, 126, 201, 55, 99, 213, 234, 243, 228, 171, 68, 9, 239, 214, 37, 255, 242, 217, 180, 25, 54, 242, 61, 101, 245, 78, 0} + + var xR_A = SecretPoint(&Affine_xPA, &Affine_yPA, m_A[:]) + var xR_B = SecretPoint(&Affine_xPB, &Affine_yPB, m_B[:]) + + var sageAffine_xR_A = ExtensionFieldElement{A: Fp751Element{0x29f1dff12103d089, 0x7409b9bf955e0d87, 0xe812441c1cca7288, 0xc32b8b13efba55f9, 0xc3b76a80696d83da, 0x185dd4f93a3dc373, 0xfc07c1a9115b6717, 0x39bfcdd63b5c4254, 0xc4d097d51d41efd8, 0x4f893494389b21c7, 0x373433211d3d0446, 0x53c35ccc3d22}, B: Fp751Element{0x722e718f33e40815, 0x8c5fc0fdf715667, 0x850fd292bbe8c74c, 0x212938a60fcbf5d3, 0xfdb2a099d58dc6e7, 0x232f83ab63c9c205, 0x23eda62fa5543f5e, 0x49b5758855d9d04f, 0x6b455e6642ef25d1, 0x9651162537470202, 0xfeced582f2e96ff0, 0x33a9e0c0dea8}} + var sageAffine_xR_B = ExtensionFieldElement{A: Fp751Element{0xdd4e66076e8499f5, 0xe7efddc6907519da, 0xe31f9955b337108c, 0x8e558c5479ffc5e1, 0xfee963ead776bfc2, 0x33aa04c35846bf15, 0xab77d91b23617a0d, 0xbdd70948746070e2, 0x66f71291c277e942, 0x187c39db2f901fce, 0x69262987d5d32aa2, 0xe1db40057dc}, B: Fp751Element{0xd1b766abcfd5c167, 0x4591059dc8a382fa, 0x1ddf9490736c223d, 0xc96db091bdf2b3dd, 0x7b8b9c3dc292f502, 0xe5b18ad85e4d3e33, 0xc3f3479b6664b931, 0xa4f17865299e21e6, 0x3f7ef5b332fa1c6e, 0x875bedb5dab06119, 0x9b5a06ea2e23b93, 0x43d48296fb26}} + + var affine_xR_A = xR_A.ToAffine() + if !sageAffine_xR_A.VartimeEq(affine_xR_A) { + t.Error("Expected \n", sageAffine_xR_A, "\nfound\n", affine_xR_A) + } + + var affine_xR_B = xR_B.ToAffine() + if !sageAffine_xR_B.VartimeEq(affine_xR_B) { + t.Error("Expected \n", sageAffine_xR_B, "\nfound\n", affine_xR_B) + } +} + +var keygenBenchPubKeyAlice SIDHPublicKeyAlice +var keygenBenchPubKeyBob SIDHPublicKeyBob + +func BenchmarkAliceKeyGen(b *testing.B) { + for n := 0; n < b.N; n++ { + GenerateAliceKeypair(rand.Reader) + } +} + +func BenchmarkAliceKeyGenSlow(b *testing.B) { + // m_A = 2*randint(0,2^371) + var m_A = [48]uint8{248, 31, 9, 39, 165, 125, 79, 135, 70, 97, 87, 231, 221, 204, 245, 38, 150, 198, 187, 184, 199, 148, 156, 18, 137, 71, 248, 83, 111, 170, 138, 61, 112, 25, 188, 197, 132, 151, 1, 0, 207, 178, 24, 72, 171, 22, 11, 0} + + var aliceSecretKey = SIDHSecretKeyAlice{Scalar: m_A} + + for n := 0; n < b.N; n++ { + keygenBenchPubKeyAlice = aliceKeyGenSlow(&aliceSecretKey) + } +} + +func BenchmarkBobKeyGen(b *testing.B) { + for n := 0; n < b.N; n++ { + GenerateBobKeypair(rand.Reader) + } +} + +func BenchmarkBobKeyGenSlow(b *testing.B) { + // m_B = 3*randint(0,3^238) + var m_B = [48]uint8{246, 217, 158, 190, 100, 227, 224, 181, 171, 32, 120, 72, 92, 115, 113, 62, 103, 57, 71, 252, 166, 121, 126, 201, 55, 99, 213, 234, 243, 228, 171, 68, 9, 239, 214, 37, 255, 242, 217, 180, 25, 54, 242, 61, 101, 245, 78, 0} + + var bobSecretKey = SIDHSecretKeyBob{Scalar: m_B} + + for n := 0; n < b.N; n++ { + keygenBenchPubKeyBob = bobKeyGenSlow(&bobSecretKey) + } +} + +var benchSharedSecretAlicePublic = SIDHPublicKeyAlice{affine_xP: ExtensionFieldElement{A: Fp751Element{0xea6b2d1e2aebb250, 0x35d0b205dc4f6386, 0xb198e93cb1830b8d, 0x3b5b456b496ddcc6, 0x5be3f0d41132c260, 0xce5f188807516a00, 0x54f3e7469ea8866d, 0x33809ef47f36286, 0x6fa45f83eabe1edb, 0x1b3391ae5d19fd86, 0x1e66daf48584af3f, 0xb430c14aaa87}, B: Fp751Element{0x97b41ebc61dcb2ad, 0x80ead31cb932f641, 0x40a940099948b642, 0x2a22fd16cdc7fe84, 0xaabf35b17579667f, 0x76c1d0139feb4032, 0x71467e1e7b1949be, 0x678ca8dadd0d6d81, 0x14445daea9064c66, 0x92d161eab4fa4691, 0x8dfbb01b6b238d36, 0x2e3718434e4e}}, affine_xQ: ExtensionFieldElement{A: Fp751Element{0xb055cf0ca1943439, 0xa9ff5de2fa6c69ed, 0x4f2761f934e5730a, 0x61a1dcaa1f94aa4b, 0xce3c8fadfd058543, 0xeac432aaa6701b8e, 0x8491d523093aea8b, 0xba273f9bd92b9b7f, 0xd8f59fd34439bb5a, 0xdc0350261c1fe600, 0x99375ab1eb151311, 0x14d175bbdbc5}, B: Fp751Element{0xffb0ef8c2111a107, 0x55ceca3825991829, 0xdbf8a1ccc075d34b, 0xb8e9187bd85d8494, 0x670aa2d5c34a03b0, 0xef9fe2ed2b064953, 0xc911f5311d645aee, 0xf4411f409e410507, 0x934a0a852d03e1a8, 0xe6274e67ae1ad544, 0x9f4bc563c69a87bc, 0x6f316019681e}}, affine_xQmP: ExtensionFieldElement{A: Fp751Element{0x6ffb44306a153779, 0xc0ffef21f2f918f3, 0x196c46d35d77f778, 0x4a73f80452edcfe6, 0x9b00836bce61c67f, 0x387879418d84219e, 0x20700cf9fc1ec5d1, 0x1dfe2356ec64155e, 0xf8b9e33038256b1c, 0xd2aaf2e14bada0f0, 0xb33b226e79a4e313, 0x6be576fad4e5}, B: Fp751Element{0x7db5dbc88e00de34, 0x75cc8cb9f8b6e11e, 0x8c8001c04ebc52ac, 0x67ef6c981a0b5a94, 0xc3654fbe73230738, 0xc6a46ee82983ceca, 0xed1aa61a27ef49f0, 0x17fe5a13b0858fe0, 0x9ae0ca945a4c6b3c, 0x234104a218ad8878, 0xa619627166104394, 0x556a01ff2e7e}}} + +var benchSharedSecretBobPublic = SIDHPublicKeyBob{affine_xP: ExtensionFieldElement{A: Fp751Element{0x6e1b8b250595b5fb, 0x800787f5197d963b, 0x6f4a4e314162a8a4, 0xe75cba4d37c02128, 0x2212e7579817a216, 0xd8a5fdb0ab2f843c, 0x44230c9f998cfd6c, 0x311ff789b26aa292, 0x73d05c379ff53e40, 0xddd8f5a223bad56c, 0x94b611e6e931c8b5, 0x4d6b9bfe3555}, B: Fp751Element{0x1a3686cfc8381294, 0x57f089b14f639cc4, 0xdb6a1565f2f5cabe, 0x83d67e8f6a02f215, 0x1946272593815e87, 0x2d839631785ca74c, 0xf149dcb2dee2bee, 0x705acd79efe405bf, 0xae3769b67687fbed, 0xacd5e29f2c203cb0, 0xdd91f08fa3153e08, 0x5a9ad8cb7400}}, affine_xQ: ExtensionFieldElement{A: Fp751Element{0xd30ed48b8c0d0c4a, 0x949cad95959ec462, 0x188675581e9d1f2a, 0xf57ed3233d33031c, 0x564c6532f7283ce7, 0x80cbef8ee3b66ecb, 0x5c687359315f22ce, 0x1da950f8671fac50, 0x6fa6c045f513ef6, 0x25ffc65a8da12d4a, 0x8b0f4ac0f5244f23, 0xadcb0e07fd92}, B: Fp751Element{0x37a43cd933ebfec4, 0x2a2806ef28dacf84, 0xd671fe718611b71e, 0xef7d73f01a676326, 0x99db1524e5799cf2, 0x860271dfbf67ff62, 0xedc2a0a14114bcf, 0x6c7b9b14b1264e5a, 0xf52de61707dc38b4, 0xccddb13fcc691f5a, 0x80f37a1220163920, 0x6a9175b9d5a1}}, affine_xQmP: ExtensionFieldElement{A: Fp751Element{0xf08af9e695c626da, 0x7a4b4d52b54e1b38, 0x980272cd4c8b8c10, 0x1afcb6151d113176, 0xaef7dbd877c00f0c, 0xe8a5ea89078700c3, 0x520c1901aa8323fa, 0xfba049c947f3383a, 0x1c38abcab48be9af, 0x9f1212b923481ea, 0x1522da3457a7c293, 0xb746f78e3a61}, B: Fp751Element{0x48010d0b48491128, 0x6d1c5c509f99f450, 0xaa3522330e3a8a62, 0x872aaf46193b2bb2, 0xc89260a2d8508973, 0x98bbbebf5524be83, 0x35711d01d895c217, 0x5e44e09ec506ed7, 0xac653a760ef6fd58, 0x5837954e30ad688d, 0xcbd3e9a1b5661da8, 0x15547f5d091a}}} + +func BenchmarkSharedSecretAlice(b *testing.B) { + // m_A = 2*randint(0,2^371) + var m_A = [48]uint8{248, 31, 9, 39, 165, 125, 79, 135, 70, 97, 87, 231, 221, 204, 245, 38, 150, 198, 187, 184, 199, 148, 156, 18, 137, 71, 248, 83, 111, 170, 138, 61, 112, 25, 188, 197, 132, 151, 1, 0, 207, 178, 24, 72, 171, 22, 11, 0} + + var aliceSecret = SIDHSecretKeyAlice{Scalar: m_A} + + for n := 0; n < b.N; n++ { + aliceSecret.SharedSecret(&benchSharedSecretBobPublic) + } +} + +func BenchmarkSharedSecretAliceSlow(b *testing.B) { + // m_A = 2*randint(0,2^371) + var m_A = [48]uint8{248, 31, 9, 39, 165, 125, 79, 135, 70, 97, 87, 231, 221, 204, 245, 38, 150, 198, 187, 184, 199, 148, 156, 18, 137, 71, 248, 83, 111, 170, 138, 61, 112, 25, 188, 197, 132, 151, 1, 0, 207, 178, 24, 72, 171, 22, 11, 0} + + var aliceSecret = SIDHSecretKeyAlice{Scalar: m_A} + + for n := 0; n < b.N; n++ { + aliceSharedSecretSlow(&benchSharedSecretBobPublic, &aliceSecret) + } +} + +func BenchmarkSharedSecretBob(b *testing.B) { + // m_B = 3*randint(0,3^238) + var m_B = [48]uint8{246, 217, 158, 190, 100, 227, 224, 181, 171, 32, 120, 72, 92, 115, 113, 62, 103, 57, 71, 252, 166, 121, 126, 201, 55, 99, 213, 234, 243, 228, 171, 68, 9, 239, 214, 37, 255, 242, 217, 180, 25, 54, 242, 61, 101, 245, 78, 0} + + var bobSecret = SIDHSecretKeyBob{Scalar: m_B} + + for n := 0; n < b.N; n++ { + bobSecret.SharedSecret(&benchSharedSecretAlicePublic) + } +} + +func BenchmarkSharedSecretBobSlow(b *testing.B) { + // m_B = 3*randint(0,3^238) + var m_B = [48]uint8{246, 217, 158, 190, 100, 227, 224, 181, 171, 32, 120, 72, 92, 115, 113, 62, 103, 57, 71, 252, 166, 121, 126, 201, 55, 99, 213, 234, 243, 228, 171, 68, 9, 239, 214, 37, 255, 242, 217, 180, 25, 54, 242, 61, 101, 245, 78, 0} + + var bobSecret = SIDHSecretKeyBob{Scalar: m_B} + + for n := 0; n < b.N; n++ { + bobSharedSecretSlow(&benchSharedSecretAlicePublic, &bobSecret) + } +} diff --git a/vendor/github.com/kardianos/service/service_upstart_linux.go b/vendor/github.com/kardianos/service/service_upstart_linux.go index 61c601af..6e73e822 100644 --- a/vendor/github.com/kardianos/service/service_upstart_linux.go +++ b/vendor/github.com/kardianos/service/service_upstart_linux.go @@ -8,7 +8,9 @@ import ( "errors" "fmt" "os" + "os/exec" "os/signal" + "strings" "text/template" "time" ) @@ -17,6 +19,13 @@ func isUpstart() bool { if _, err := os.Stat("/sbin/upstart-udev-bridge"); err == nil { return true } + if _, err := os.Stat("/sbin/init"); err == nil { + if out, err := exec.Command("/sbin/init", "--version").Output(); err == nil { + if strings.Contains(string(out), "init (upstart") { + return true + } + } + } return false }