Improve FIPS RSA keygen performance.

FIPS 186-4 has 5 different algorithms for key generation, and all of them rely on testing GCD(a,n) == 1 many times. Cachegrind was showing that during a RSA keygen operation, the function BN_gcd() was taking a considerable percentage of the total cycles. The default provider uses multiprime keygen, which seemed to be much faster. This is because it uses BN_mod_inverse() instead. For a 4096 bit key, the entropy of a key that was taking a long time to generate was recorded and fed back into subsequent runs. Roughly 40% of the cycle time was BN_gcd() with most of the remainder in the prime testing. Changing to use the inverse resulted in the cycle count being 96% in the prime testing. Reviewed-by: Paul Dale <pauli@openssl.org> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/19578)
author: slontis <shane.lontis@oracle.com> 2022-11-02 03:01:34 +0100
committer: Tomas Mraz <tomas@openssl.org> 2022-11-21 11:17:59 +0100
commit: dd1d7bcb69994d81662e709b0ad838880b943870 (patch)
tree: f24c3ce03aa4d0bd374ce4cba03d0968cd886b9c
parent: Design document for the QUIC-TLS integration (diff)
download: openssl-dd1d7bcb69994d81662e709b0ad838880b943870.tar.xz
openssl-dd1d7bcb69994d81662e709b0ad838880b943870.zip
6 files changed, 85 insertions, 12 deletions
diff --git a/crypto/bn/bn_gcd.c b/crypto/bn/bn_gcd.c
index 91ad76a161..519bb4e951 100644
--- a/crypto/bn/bn_gcd.c
+++ b/crypto/bn/bn_gcd.c
@@ -534,6 +534,37 @@ BIGNUM *BN_mod_inverse(BIGNUM *in,
     return rv;
 }
 
+/*
+ * The numbers a and b are coprime if the only positive integer that is a
+ * divisor of both of them is 1.
+ * i.e. gcd(a,b) = 1.
+ *
+ * Coprimes have the property: b has a multiplicative inverse modulo a
+ * i.e there is some value x such that bx = 1 (mod a).
+ *
+ * Testing the modulo inverse is currently much faster than the constant
+ * time version of BN_gcd().
+ */
+int BN_are_coprime(BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
+{
+    int ret = 0;
+    BIGNUM *tmp;
+
+    BN_CTX_start(ctx);
+    tmp = BN_CTX_get(ctx);
+    if (tmp == NULL)
+        goto end;
+
+    ERR_set_mark();
+    BN_set_flags(a, BN_FLG_CONSTTIME);
+    ret = (BN_mod_inverse(tmp, a, b, ctx) != NULL);
+    /* Clear any errors (an error is returned if there is no inverse) */
+    ERR_pop_to_mark();
+end:
+    BN_CTX_end(ctx);
+    return ret;
+}
+
 /*-
  * This function is based on the constant-time GCD work by Bernstein and Yang:
  * https://eprint.iacr.org/2019/266
diff --git a/crypto/bn/bn_rsa_fips186_4.c b/crypto/bn/bn_rsa_fips186_4.c
index 770ae4d1fa..e3a2ad76af 100644
--- a/crypto/bn/bn_rsa_fips186_4.c
+++ b/crypto/bn/bn_rsa_fips186_4.c
@@ -286,14 +286,20 @@ int ossl_bn_rsa_fips186_4_derive_prime(BIGNUM *Y, BIGNUM *X, const BIGNUM *Xin,
             goto err;
     }
 
+    /*
+     * (Step 1) GCD(2r1, r2) = 1.
+     *    Note: This algorithm was doing a gcd(2r1, r2)=1 test before doing an
+     *    mod_inverse(2r1, r2) which are effectively the same operation.
+     *    (The algorithm assumed that the gcd test would be faster). Since the
+     *    mod_inverse is currently faster than calling the constant time
+     *    BN_gcd(), the call to BN_gcd() has been omitted. The inverse result
+     *    is used further down.
+     */
     if (!(BN_lshift1(r1x2, r1)
-            /* (Step 1) GCD(2r1, r2) = 1 */
-            && BN_gcd(tmp, r1x2, r2, ctx)
-            && BN_is_one(tmp)
+            && (BN_mod_inverse(tmp, r1x2, r2, ctx) != NULL)
             /* (Step 2) R = ((r2^-1 mod 2r1) * r2) - ((2r1^-1 mod r2)*2r1) */
-            && BN_mod_inverse(R, r2, r1x2, ctx)
+            && (BN_mod_inverse(R, r2, r1x2, ctx) != NULL)
             && BN_mul(R, R, r2, ctx) /* R = (r2^-1 mod 2r1) * r2 */
-            && BN_mod_inverse(tmp, r1x2, r2, ctx)
             && BN_mul(tmp, tmp, r1x2, ctx) /* tmp = (2r1^-1 mod r2)*2r1 */
             && BN_sub(R, R, tmp)
             /* Calculate 2r1r2 */
@@ -305,7 +311,8 @@ int ossl_bn_rsa_fips186_4_derive_prime(BIGNUM *Y, BIGNUM *X, const BIGNUM *Xin,
 
     /*
      * In FIPS 186-4 imax was set to 5 * nlen/2.
-     * Analysis by Allen Roginsky (See https://csrc.nist.gov/CSRC/media/Publications/fips/186/4/final/documents/comments-received-fips186-4-december-2015.pdf
+     * Analysis by Allen Roginsky
+     * (See https://csrc.nist.gov/CSRC/media/Publications/fips/186/4/final/documents/comments-received-fips186-4-december-2015.pdf
      * page 68) indicates this has a 1 in 2 million chance of failure.
      * The number has been updated to 20 * nlen/2 as used in
      * FIPS186-5 Appendix B.9 Step 9.
@@ -337,10 +344,9 @@ int ossl_bn_rsa_fips186_4_derive_prime(BIGNUM *Y, BIGNUM *X, const BIGNUM *Xin,
 
             /* (Step 7) If GCD(Y-1) == 1 & Y is probably prime then return Y */
             if (BN_copy(y1, Y) == NULL
-                    || !BN_sub_word(y1, 1)
-                    || !BN_gcd(tmp, y1, e, ctx))
+                    || !BN_sub_word(y1, 1))
                 goto err;
-            if (BN_is_one(tmp)) {
+            if (BN_are_coprime(y1, e, ctx)) {
                 int rv = BN_check_prime(Y, ctx, cb);
 
                 if (rv > 0)
diff --git a/doc/man3/BN_cmp.pod b/doc/man3/BN_cmp.pod
index f302818f21..e9ddf8fa2d 100644
--- a/doc/man3/BN_cmp.pod
+++ b/doc/man3/BN_cmp.pod
@@ -2,7 +2,8 @@
 
 =head1 NAME
 
-BN_cmp, BN_ucmp, BN_is_zero, BN_is_one, BN_is_word, BN_abs_is_word, BN_is_odd - BIGNUM comparison and test functions
+BN_cmp, BN_ucmp, BN_is_zero, BN_is_one, BN_is_word, BN_abs_is_word, BN_is_odd, BN_are_coprime
+- BIGNUM comparison and test functions
 
 =head1 SYNOPSIS
 
@@ -17,6 +18,8 @@ BN_cmp, BN_ucmp, BN_is_zero, BN_is_one, BN_is_word, BN_abs_is_word, BN_is_odd -
  int BN_abs_is_word(const BIGNUM *a, const BN_ULONG w);
  int BN_is_odd(const BIGNUM *a);
 
+ int BN_are_coprime(BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+
 =head1 DESCRIPTION
 
 BN_cmp() compares the numbers I<a> and I<b>. BN_ucmp() compares their
@@ -26,6 +29,10 @@ BN_is_zero(), BN_is_one(), BN_is_word() and BN_abs_is_word() test if
 I<a> equals 0, 1, I<w>, or E<verbar>I<w>E<verbar> respectively.
 BN_is_odd() tests if I<a> is odd.
 
+BN_are_coprime() determines if B<a> and B<b> are coprime.
+B<ctx> is used internally for storing temporary variables.
+The values of B<a> and B<b> and B<ctx> must not be NULL.
+
 =head1 RETURN VALUES
 
 BN_cmp() returns -1 if I<a> E<lt> I<b>, 0 if I<a> == I<b> and 1 if
@@ -35,11 +42,16 @@ of I<a> and I<b>.
 BN_is_zero(), BN_is_one() BN_is_word(), BN_abs_is_word() and
 BN_is_odd() return 1 if the condition is true, 0 otherwise.
 
+BN_are_coprime() returns 1 if the B<BIGNUM>'s are coprime, otherwise it
+returns 0.
+
 =head1 HISTORY
 
 Prior to OpenSSL 1.1.0, BN_is_zero(), BN_is_one(), BN_is_word(),
 BN_abs_is_word() and BN_is_odd() were macros.
 
+The function BN_are_coprime() was added in OpenSSL 3.1.
+
 =head1 COPYRIGHT
 
 Copyright 2000-2021 The OpenSSL Project Authors. All Rights Reserved.
diff --git a/include/openssl/bn.h b/include/openssl/bn.h
index 333e201eae..ea706dca7f 100644
--- a/include/openssl/bn.h
+++ b/include/openssl/bn.h
@@ -350,6 +350,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
 int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); /* returns
                                                                   * -2 for
                                                                   * error */
+int BN_are_coprime(BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
 BIGNUM *BN_mod_inverse(BIGNUM *ret,
                        const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx);
 BIGNUM *BN_mod_sqrt(BIGNUM *ret,
diff --git a/test/bntest.c b/test/bntest.c
index 85445b701b..b35b53df7e 100644
--- a/test/bntest.c
+++ b/test/bntest.c
@@ -41,6 +41,7 @@ typedef struct mpitest_st {
 
 static const int NUM0 = 100;           /* number of tests */
 static const int NUM1 = 50;            /* additional tests for some functions */
+static const int NUM_PRIME_TESTS = 20;
 static BN_CTX *ctx;
 
 /*
@@ -2722,6 +2723,25 @@ static int test_ctx_consttime_flag(void)
     return st;
 }
 
+static int test_coprime(void)
+{
+    BIGNUM *a = NULL, *b = NULL;
+    int ret = 0;
+
+    ret = TEST_ptr(a = BN_new())
+          && TEST_ptr(b = BN_new())
+          && TEST_true(BN_set_word(a, 66))
+          && TEST_true(BN_set_word(b, 99))
+          && TEST_int_eq(BN_are_coprime(a, b, ctx), 0)
+          && TEST_int_eq(BN_are_coprime(b, a, ctx), 0)
+          && TEST_true(BN_set_word(a, 67))
+          && TEST_int_eq(BN_are_coprime(a, b, ctx), 1)
+          && TEST_int_eq(BN_are_coprime(b, a, ctx), 1);
+    BN_free(a);
+    BN_free(b);
+    return ret;
+}
+
 static int test_gcd_prime(void)
 {
     BIGNUM *a = NULL, *b = NULL, *gcd = NULL;
@@ -2734,11 +2754,12 @@ static int test_gcd_prime(void)
 
     if (!TEST_true(BN_generate_prime_ex(a, 1024, 0, NULL, NULL, NULL)))
             goto err;
-    for (i = 0; i < NUM0; i++) {
+    for (i = 0; i < NUM_PRIME_TESTS; i++) {
         if (!TEST_true(BN_generate_prime_ex(b, 1024, 0,
                                             NULL, NULL, NULL))
                 || !TEST_true(BN_gcd(gcd, a, b, ctx))
-                || !TEST_true(BN_is_one(gcd)))
+                || !TEST_true(BN_is_one(gcd))
+                || !TEST_true(BN_are_coprime(a, b, ctx)))
             goto err;
     }
 
@@ -3216,6 +3237,7 @@ int setup_tests(void)
         ADD_ALL_TESTS(test_is_prime, (int)OSSL_NELEM(primes));
         ADD_ALL_TESTS(test_not_prime, (int)OSSL_NELEM(not_primes));
         ADD_TEST(test_gcd_prime);
+        ADD_TEST(test_coprime);
         ADD_ALL_TESTS(test_mod_exp, (int)OSSL_NELEM(ModExpTests));
         ADD_ALL_TESTS(test_mod_exp_consttime, (int)OSSL_NELEM(ModExpTests));
         ADD_TEST(test_mod_exp2_mont);
diff --git a/util/libcrypto.num b/util/libcrypto.num
index 1db533d4a2..4d46195c8c 100644
--- a/util/libcrypto.num
+++ b/util/libcrypto.num
@@ -5430,6 +5430,7 @@ OPENSSL_strncasecmp                     5557	3_0_3	EXIST::FUNCTION:
 EVP_RAND_CTX_up_ref                     ?	3_1_0	EXIST::FUNCTION:
 RAND_set0_public                        ?	3_1_0	EXIST::FUNCTION:
 RAND_set0_private                       ?	3_1_0	EXIST::FUNCTION:
+BN_are_coprime                          ?	3_1_0	EXIST::FUNCTION:
 X509_PUBKEY_set0_public_key             ?	3_2_0	EXIST::FUNCTION:
 OSSL_STACK_OF_X509_free                 ?	3_2_0	EXIST::FUNCTION:
 EVP_MD_CTX_dup                          ?	3_2_0	EXIST::FUNCTION:
author	slontis <shane.lontis@oracle.com>	2022-11-02 03:01:34 +0100
committer	Tomas Mraz <tomas@openssl.org>	2022-11-21 11:17:59 +0100
commit	dd1d7bcb69994d81662e709b0ad838880b943870 (patch)
tree	f24c3ce03aa4d0bd374ce4cba03d0968cd886b9c
parent	Design document for the QUIC-TLS integration (diff)
download	openssl-dd1d7bcb69994d81662e709b0ad838880b943870.tar.xz openssl-dd1d7bcb69994d81662e709b0ad838880b943870.zip