X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=tests%2Ftest-hash.c;h=67a1f6c8903e61d4b18be6f8ae5b89884443877a;hb=d0a46cb4608e632f5028034762f0adde2ce947a0;hp=abec33d4280bbffb59daf5f8edee029c7b632722;hpb=06af0bbfb931ae5fabdb2d5da0035cd27c755433;p=cascardo%2Fovs.git

diff --git a/tests/test-hash.c b/tests/test-hash.c
index abec33d42..67a1f6c89 100644
--- a/tests/test-hash.c
+++ b/tests/test-hash.c
@@ -162,7 +162,7 @@ check_hash_bytes128(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *),
         set_bit128(&in1, i, n_bits);
         hash(in0, sizeof(ovs_u128), 0, &out0);
         hash(&in1, sizeof(ovs_u128), 0, &out1);
-        if (!ovs_u128_equal(&out0, &out1)) {
+        if (!ovs_u128_equals(&out0, &out1)) {
             printf("%s hash not the same for non-64 aligned data "
                    "%016"PRIx64"%016"PRIx64" != %016"PRIx64"%016"PRIx64"\n",
                    name, out0.u64.lo, out0.u64.hi, out1.u64.lo, out1.u64.hi);
@@ -214,7 +214,7 @@ check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *),
         set_bit128(in1, i, n_bits);
         hash(in0, sizeof(ovs_u128) * 16, 0, &out0);
         hash(in1, sizeof(ovs_u128) * 16, 0, &out1);
-        if (!ovs_u128_equal(&out0, &out1)) {
+        if (!ovs_u128_equals(&out0, &out1)) {
             printf("%s hash not the same for non-64 aligned data "
                    "%016"PRIx64"%016"PRIx64" != %016"PRIx64"%016"PRIx64"\n",
                    name, out0.u64.lo, out0.u64.hi, out1.u64.lo, out1.u64.hi);
@@ -242,94 +242,77 @@ check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *),
 static void
 test_hash_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 {
-    /* Check that all hashes computed with hash_words with one 1-bit (or no
-     * 1-bits) set within a single 32-bit word have different values in all
-     * 11-bit consecutive runs.
+    /*
+     * The following tests check that all hashes computed with hash_function
+     * with one 1-bit (or no 1-bits) set within a X-bit word have different
+     * values in all N-bit consecutive comparisons.
+     *
+     *    test_function(hash_function, test_name, N)
      *
      * Given a random distribution, the probability of at least one collision
-     * in any set of 11 bits is approximately
+     * in any set of N bits is approximately
      *
-     *                      1 - (proportion of same_bits)
-     *                          **(binomial_coefficient(n_bits_in_data + 1, 2))
-     *                   == 1 - ((2**11 - 1)/2**11)**C(33,2)
-     *                   == 1 - (2047/2048)**528
-     *                   =~ 0.22
+     *                      1 - (prob of no collisions)
+     *                          **(combination of all possible comparisons)
+     *                   == 1 - ((2**N - 1)/2**N)**C(X+1,2)
+     *                   == p
      *
-     * There are 21 ways to pick 11 consecutive bits in a 32-bit word, so if we
+     * There are (X-N) ways to pick N consecutive bits in a X-bit word, so if we
      * assumed independence then the chance of having no collisions in any of
-     * those 11-bit runs would be (1-0.22)**21 =~ .0044.  Obviously
-     * independence must be a bad assumption :-)
-     */
-    check_word_hash(hash_words_cb, "hash_words", 11);
-    check_word_hash(jhash_words_cb, "jhash_words", 11);
-
-    /* Check that all hash functions of with one 1-bit (or no 1-bits) set
-     * within three 32-bit words have different values in their lowest 12
-     * bits.
-     *
-     * Given a random distribution, the probability of at least one collision
-     * in 12 bits is approximately
+     * those X-bit runs would be (1-p)**(X-N) == q.  If this q is very small
+     * and we can also find a relatively small 'magic number' N such that there
+     * is no collision in any comparison, then it means we have a pretty good
+     * hash function.
      *
-     *                      1 - ((2**12 - 1)/2**12)**C(97,2)
-     *                   == 1 - (4095/4096)**4656
-     *                   =~ 0.68
+     * The values of each parameters mentioned above for the tested hash
+     * functions are summarized as follow:
      *
-     * so we are doing pretty well to not have any collisions in 12 bits.
-     */
-    check_3word_hash(hash_words, "hash_words");
-    check_3word_hash(jhash_words, "jhash_words");
-
-    /* Check that all hashes computed with hash_int with one 1-bit (or no
-     * 1-bits) set within a single 32-bit word have different values in all
-     * 12-bit consecutive runs.
-     *
-     * Given a random distribution, the probability of at least one collision
-     * in any set of 12 bits is approximately
+     * hash_function       X      N        p             q
+     * -------------      ---    ---    -------       -------
      *
-     *                      1 - ((2**12 - 1)/2**12)**C(33,2)
-     *                   == 1 - (4,095/4,096)**528
-     *                   =~ 0.12
+     * hash_words_cb       32     11     0.22          0.0044
+     * jhash_words_cb      32     11     0.22          0.0044
+     * hash_int_cb         32     12     0.12          0.0078
+     * hash_bytes128      128     19     0.0156        0.174
      *
-     * There are 20 ways to pick 12 consecutive bits in a 32-bit word, so if we
-     * assumed independence then the chance of having no collisions in any of
-     * those 12-bit runs would be (1-0.12)**20 =~ 0.078.  This refutes our
-     * assumption of independence, which makes it seem like a good hash
-     * function.
      */
+    check_word_hash(hash_words_cb, "hash_words", 11);
+    check_word_hash(jhash_words_cb, "jhash_words", 11);
     check_word_hash(hash_int_cb, "hash_int", 12);
+    check_hash_bytes128(hash_bytes128, "hash_bytes128", 19);
 
-    /* Check that all hashes computed with hash_bytes128 with one 1-bit (or no
-     * 1-bits) set within a single 128-bit word have different values in all
-     * 19-bit consecutive runs.
+    /*
+     * The following tests check that all hashes computed with hash_function
+     * with one 1-bit (or no 1-bits) set within Y X-bit word have different
+     * values in their lowest N bits.
+     *
+     *    test_function(hash_function, test_name, N)
      *
      * Given a random distribution, the probability of at least one collision
-     * in any set of 19 bits is approximately
+     * in any set of N bits is approximately
      *
-     *                      1 - ((2**19 - 1)/2**19)**C(129,2)
-     *                   == 1 - (524,287/524,288)**8256
-     *                   =~ 0.0156
+     *                      1 - (prob of no collisions)
+     *                          **(combination of all possible comparisons)
+     *                   == 1 - ((2**N - 1)/2**N)**C(Y*X+1,2)
+     *                   == p
      *
-     * There are 111 ways to pick 19 consecutive bits in a 128-bit word, so if
-     * we assumed independence then the chance of having no collisions in any of
-     * those 19-bit runs would be (1-0.0156)**111 =~ 0.174.  This refutes our
-     * assumption of independence, which makes it seem like a good hash
-     * function.
-     */
-    check_hash_bytes128(hash_bytes128, "hash_bytes128", 19);
-
-    /* Check that all hashes computed with hash_bytes128 with 1-bit (or no
-     * 1-bits) set within 16 128-bit words have different values in their
-     * lowest 23 bits.
+     * If this p is not very small and we can also find a relatively small
+     * 'magic number' N such that there is no collision in any comparison,
+     * then it means we have a pretty good hash function.
      *
-     * Given a random distribution, the probability of at least one collision
-     * in any set of 23 bits is approximately
+     * The values of each parameters mentioned above for the tested hash
+     * functions are summarized as follow:
      *
-     *                      1 - ((2**23 - 1)/2**23)**C(2049,2)
-     *                   == 1 - (8,388,607/8,388,608)**2,098,176
-     *                   =~ 0.22
+     * hash_function       Y      X      N        p
+     * -------------      ---    ---    ---    -------
+     *
+     * hash_words          3      32     12     0.68
+     * jhash_words         3      32     12     0.68
+     * hash_bytes128      16     128     23     0.22
      *
-     * so we are doing pretty well to not have any collisions in 23 bits.
      */
+    check_3word_hash(hash_words, "hash_words");
+    check_3word_hash(jhash_words, "jhash_words");
     check_256byte_hash(hash_bytes128, "hash_bytes128", 23);
 }