HighLightFanYa 发表于 2023-3-9 18:28

Rand 初探 - The Next Random Num

# Rand 初探 - The Next Random Num

## 源码分析

### `glibc2.27/stdlib/stdlib.h`

~~~c
/* The largest number rand will return (same as INT_MAX).*/
#define        RAND_MAX        2147483647
~~~

~~~python
>>> import math
>>> math.log(2147483648,2)
31.000000000000004
>>> 2**31-1
2147483647
~~~

### `glibc2.27/stdlib/rand.c`

~~~c
/* Return a random integer between 0 and RAND_MAX.*/
int rand (void)
{
return (int) __random ();
}
~~~

### `glibc2.27/stdlib/random.c`

~~~c
#include <libc-lock.h>
#include <limits.h>
#include <stddef.h>
#include <stdlib.h>


/* An improved random number generation package.In addition to the standard
   rand()/srand() like interface, this package also has a special state info
   interface.The initstate() routine is called with a seed, an array of
   bytes, and a count of how many bytes are being passed in; this array is
   then initialized to contain information for random number generation with
   that much state information.Good sizes for the amount of state
   information are 32, 64, 128, and 256 bytes.The state can be switched by
   calling the setstate() function with the same array as was initialized
   with initstate().By default, the package runs with 128 bytes of state
   information and generates far better random numbers than a linear
   congruential generator.If the amount of state information is less than
   32 bytes, a simple linear congruential R.N.G. is used.Internally, the
   state information is treated as an array of longs; the zeroth element of
   the array is the type of R.N.G. being used (small integer); the remainder
   of the array is the state information for the R.N.G.Thus, 32 bytes of
   state information will give 7 longs worth of state information, which will
   allow a degree seven polynomial.(Note: The zeroth word of state
   information also has some other information stored in it; see setstate
   for details).The random number generation technique is a linear feedback
   shift register approach, employing trinomials (since there are fewer terms
   to sum up that way).In this approach, the least significant bit of all
   the numbers in the state table will act as a linear feedback shift register,
   and will have period 2^deg - 1 (where deg is the degree of the polynomial
   being used, assuming that the polynomial is irreducible and primitive).
   The higher order bits will have longer periods, since their values are
   also influenced by pseudo-random carries out of the lower bits.The
   total period of the generator is approximately deg*(2**deg - 1); thus
   doubling the amount of state information has a vast influence on the
   period of the generator.Note: The deg*(2**deg - 1) is an approximation
   only good for large deg, when the period of the shift register is the
   dominant factor.With deg equal to seven, the period is actually much
   longer than the 7*(2**7 - 1) predicted by this formula.*/



/* For each of the currently supported random number generators, we have a
   break value on the amount of state information (you need at least this many
   bytes of state info to support this random number generator), a degree for
   the polynomial (actually a trinomial) that the R.N.G. is based on, and
   separation between the two lower order coefficients of the trinomial.*/

/* Linear congruential.*/
#define        TYPE_0                0
#define        BREAK_0                8
#define        DEG_0                0
#define        SEP_0                0

/* x**7 + x**3 + 1.*/
#define        TYPE_1                1
#define        BREAK_1                32
#define        DEG_1                7
#define        SEP_1                3

/* x**15 + x + 1.*/
#define        TYPE_2                2
#define        BREAK_2                64
#define        DEG_2                15
#define        SEP_2                1

/* x**31 + x**3 + 1.*/
#define        TYPE_3                3
#define        BREAK_3                128
#define        DEG_3                31
#define        SEP_3                3

/* x**63 + x + 1.*/
#define        TYPE_4                4
#define        BREAK_4                256
#define        DEG_4                63
#define        SEP_4                1


/* Array versions of the above information to make code run faster.
   Relies on fact that TYPE_i == i.*/

#define        MAX_TYPES        5        /* Max number of types above.*/


/* Initially, everything is set up as if from:
        initstate(1, randtbl, 128);
   Note that this initialization takes advantage of the fact that srandom
   advances the front and rear pointers 10*rand_deg times, and hence the
   rear pointer which starts at 0 will also end up at zero; thus the zeroth
   element of the state information, which contains info about the current
   position of the rear pointer is just
        (MAX_TYPES * (rptr - state)) + TYPE_3 == TYPE_3.*/

static int32_t randtbl =   // randtbl
{
    TYPE_3,

    -1726662223, 379960547, 1735697613, 1040273694, 1313901226,
    1627687941, -179304937, -2073333483, 1780058412, -1989503057,
    -615974602, 344556628, 939512070, -1249116260, 1507946756,
    -812545463, 154635395, 1388815473, -1926676823, 525320961,
    -1009028674, 968117788, -123449607, 1284210865, 435012392,
    -2017506339, -911064859, -370259173, 1132637927, 1398500161,
    -205601318,
};

// stdlib.h
struct random_data
{
    int32_t *fptr;                /* Front pointer.*/
    int32_t *rptr;                /* Rear pointer.*/
    int32_t *state;                /* Array of state values.*/
    int rand_type;                /* Type of random number generator.*/
    int rand_deg;                /* Degree of random number generator.*/
    int rand_sep;                /* Distance between front and rear.*/
    int32_t *end_ptr;                /* Pointer behind state table.*/
};

static struct random_data unsafe_state =
{
/* FPTR and RPTR are two pointers into the state info, a front and a rear
   pointer.These two pointers are always rand_sep places apart, as they
   cycle through the state information.(Yes, this does mean we could get
   away with just one pointer, but the code for random is more efficient
   this way).The pointers are left positioned as they would be from the call:
        initstate(1, randtbl, 128);
   (The position of the rear pointer, rptr, is really 0 (as explained above
   in the initialization of randtbl) because the state table pointer is set
   to point to randtbl (as explained below).)*/

    .fptr = &randtbl, // 1040273694
    .rptr = &randtbl, // -1726662223

/* The following things are the pointer to the state information table,
   the type of the current generator, the degree of the current polynomial
   being used, and the separation between the two pointers.
   Note that for efficiency of random, we remember the first location of
   the state information, not the zeroth.Hence it is valid to access
   state[-1], which is used to store the type of the R.N.G.
   Also, we remember the last location, since this is more efficient than
   indexing every time to find the address of the last element to see if
   the front and rear pointers have wrapped.*/

    .state = &randtbl, // -1726662223 ->

    .rand_type = TYPE_3, // 3
    .rand_deg = DEG_3,   // 31
    .rand_sep = SEP_3,   // 3

    .end_ptr = &randtbl)] // 32
};

/* POSIX.1c requires that there is mutual exclusion for the `rand' and
   `srand' functions to prevent concurrent calls from modifying common
   data.*/

/* Initialize the random number generator based on the given seed.If the
   type is the trivial no-state-information type, just remember the seed.
   Otherwise, initializes state[] based on the given "seed" via a linear
   congruential generator.Then, the pointers are set to known locations
   that are exactly rand_sep places apart.Lastly, it cycles the state
   information a given number of times to get rid of any initial dependencies
   introduced by the L.C.R.N.G.Note that the initialization of randtbl[]
   for default usage relies on values produced by this routine.*/

/* If we are using the trivial TYPE_0 R.N.G., just do the old linear
   congruential bit.Otherwise, we do our fancy trinomial stuff, which is the
   same in all the other cases due to all the global variables that have been
   set up.The basic operation is to add the number at the rear pointer into
   the one at the front pointer.Then both pointers are advanced to the next
   location cyclically in the table.The value returned is the sum generated,
   reduced to 31 bits by throwing away the "least random" low bit.
   Note: The code takes advantage of the fact that both the front and
   rear pointers can't wrap on the same call by not testing the rear
   pointer if the front one has wrapped.Returns a 31-bit random number.*/

long int
__random (void)
{
int32_t retval;

__libc_lock_lock (lock);

// 其中 retval 为返回值,unsafe_state 为一静态全局变量
(void) __random_r (&unsafe_state, &retval);

__libc_lock_unlock (lock);

return retval;
}
~~~

`glibc2.27/stdlib/random_r.c`

~~~C
struct random_poly_info
{
int seps;
int degrees;
};

static const struct random_poly_info random_poly_info =
{
{ SEP_0, SEP_1, SEP_2, SEP_3, SEP_4 },
{ DEG_0, DEG_1, DEG_2, DEG_3, DEG_4 }
};

int __random_r (struct random_data *buf, int32_t *result)
{
int32_t *state;

if (buf == NULL || result == NULL)
    goto fail;

state = buf->state;

if (buf->rand_type == TYPE_0)
    {
      int32_t val = state;
      val = ((state * 1103515245) + 12345) & 0x7fffffff;
      state = val;
      *result = val;
    }
else
    {
      int32_t *fptr = buf->fptr;
      int32_t *rptr = buf->rptr;
      int32_t *end_ptr = buf->end_ptr;
      int32_t val;
       
      // 生成随机数后会同时修改`*fptr = *fptr + *rptr`
      val = *fptr += *rptr;
      /* Chucking least random bit.*/
      // typedef signed int int32_t
      
      // 生成的随机数为 (uint32_t)(*fptr + *rptr) >> 1
      *result = (val >> 1) & 0x7fffffff;
      ++fptr;
      
      // 高版本
      //uint32_t val;
      //val = *fptr += (uint32_t) *rptr;
      //*result = val >> 1;
      //++fptr;
      
      //
      // 若 `fptr`指向了`randtbl`的末尾, 则会使其指向`randtbl`的第一个伪随机数。
      if (fptr >= end_ptr)
        {
          fptr = state;
          ++rptr;
        }
      else
        {
          ++rptr;
      // // 若 `rptr`指向了`randtbl`的末尾, 则会使其指向`randtbl`的第一个伪随机数。
          if (rptr >= end_ptr)
          rptr = state;
        }
      // 迭代
      buf->fptr = fptr;
      buf->rptr = rptr;
    }
return 0;

fail:
__set_errno (EINVAL);
return -1;
}
~~~

~~~c
#include<stdio.h>

int main()
{
    int a = -1;
    printf("%x\n",a);
    printf("%x\n",a >> 1);

    int b = 1;
    printf("%x\n",b);
    printf("%x\n",b >> 1);

    int c = -103;
    printf("%x\n",c + 10);
    printf("%x\n",(unsigned int)c + 10);
    printf("%x\n",((unsigned int)c + 10) >> 1);
}


➜random_demo ./a.out
// int a = -1;
ffffffff // printf("%x\n",a);
ffffffff // printf("%x\n",a >> 1);

// int b = 1;
1      // printf("%x\n",b);
0      // printf("%x\n",b >> 1);
   
// int c = -103;
ffffffa3 // printf("%x\n",c + 10);
ffffffa3 // printf("%x\n",(unsigned int)c + 10);
7fffffd1 // printf("%x\n",((unsigned int)c + 10) >> 1);
~~~

~~~txt
也就是说 在 c 语言中,>> 运算 是逻辑右移,是补符号位的
正数补 0
复数补 1
unsigned 补 0
~~~

我们可以看到:生成的随机数为`(uint32_t)(*fptr + *rptr) >> 1 `, 且生成随机数后会同时修改`*fptr = *fptr + *rptr`, 若 `fptr`或`rptr`指向了`randtbl`的末尾, 则会使其指向`randtbl`的第一个伪随机数。如果用通俗的c代码重写一遍:

~~~c
// glibc-2.23/stdlib/random_r.c
#include <stdio.h>
#include <stdlib.h>

int32_t randtbl =
{
3,
-1726662223, 379960547, 1735697613, 1040273694, 1313901226,
1627687941, -179304937, -2073333483, 1780058412, -1989503057,
-615974602, 344556628, 939512070, -1249116260, 1507946756,
-812545463, 154635395, 1388815473, -1926676823, 525320961,
-1009028674, 968117788, -123449607, 1284210865, 435012392,
-2017506339, -911064859, -370259173, 1132637927, 1398500161,
-205601318,
};

int main()
{
    int count = 0;
    int font = 4;
    int rear = 1;
    int ret = 0;
    int tmp;

    srand(0);

    for(int i = 0;; i++)
    {
      int32_t fptr = randtbl;
      int32_t rptr = randtbl;
      ret = ((fptr + rptr) >> 1) & 0x7fffffff;
      randtbl = fptr + rptr;
      font ++;
      if(font >= 32){
            font = 1;
            rear ++;
      }else{
            rear ++;
            if(rear >= 32){
                rear = 1;
            }
      }
      tmp = rand();
      printf("%10ld == %10ld -> %c \n", tmp, ret, tmp ==ret ? 'Y' : 'N');
      if(i>=100)
            break;
    }
    return 0;
}
~~~

我们可以看到运行结果:

~~~bash
1804289383 == 1804289383 -> Y
846930886 ==846930886 -> Y
1681692777 == 1681692777 -> Y
1714636915 == 1714636915 -> Y
1957747793 == 1957747793 -> Y
424238335 ==424238335 -> Y
719885386 ==719885386 -> Y
1649760492 == 1649760492 -> Y
596516649 ==596516649 -> Y
1189641421 == 1189641421 -> Y
1025202362 == 1025202362 -> Y
1350490027 == 1350490027 -> Y
783368690 ==783368690 -> Y
1102520059 == 1102520059 -> Y
2044897763 == 2044897763 -> Y
1967513926 == 1967513926 -> Y
1365180540 == 1365180540 -> Y
1540383426 == 1540383426 -> Y
304089172 ==304089172 -> Y
1303455736 == 1303455736 -> Y
35005211 ==   35005211 -> Y
521595368 ==521595368 -> Y
294702567 ==294702567 -> Y
1726956429 == 1726956429 -> Y
336465782 ==336465782 -> Y
861021530 ==861021530 -> Y
278722862 ==278722862 -> Y
233665123 ==233665123 -> Y
2145174067 == 2145174067 -> Y
468703135 ==468703135 -> Y
1101513929 == 1101513929 -> Y
1801979802 == 1801979802 -> Y
1315634022 == 1315634022 -> Y
635723058 ==635723058 -> Y
1369133069 == 1369133069 -> Y
1125898167 == 1125898167 -> Y
1059961393 == 1059961393 -> Y
2089018456 == 2089018456 -> Y
628175011 ==628175011 -> Y
1656478042 == 1656478042 -> Y
1131176229 == 1131176229 -> Y
1653377373 == 1653377373 -> Y
859484421 ==859484421 -> Y
1914544919 == 1914544919 -> Y
608413784 ==608413784 -> Y
756898537 ==756898537 -> Y
1734575198 == 1734575198 -> Y
1973594324 == 1973594324 -> Y
149798315 ==149798315 -> Y
2038664370 == 2038664370 -> Y
1129566413 == 1129566413 -> Y
184803526 ==184803526 -> Y
412776091 ==412776091 -> Y
1424268980 == 1424268980 -> Y
1911759956 == 1911759956 -> Y
749241873 ==749241873 -> Y
137806862 ==137806862 -> Y
42999170 ==   42999170 -> Y
982906996 ==982906996 -> Y
135497281 ==135497281 -> Y
511702305 ==511702305 -> Y
2084420925 == 2084420925 -> Y
1937477084 == 1937477084 -> Y
1827336327 == 1827336327 -> Y
572660336 ==572660336 -> Y
1159126505 == 1159126505 -> Y
805750846 ==805750846 -> Y
1632621729 == 1632621729 -> Y
1100661313 == 1100661313 -> Y
1433925857 == 1433925857 -> Y
1141616124 == 1141616124 -> Y
84353895 ==   84353895 -> Y
939819582 ==939819582 -> Y
2001100545 == 2001100545 -> Y
1998898814 == 1998898814 -> Y
1548233367 == 1548233367 -> Y
610515434 ==610515434 -> Y
1585990364 == 1585990364 -> Y
1374344043 == 1374344043 -> Y
760313750 ==760313750 -> Y
1477171087 == 1477171087 -> Y
356426808 ==356426808 -> Y
945117276 ==945117276 -> Y
1889947178 == 1889947178 -> Y
1780695788 == 1780695788 -> Y
709393584 ==709393584 -> Y
491705403 ==491705403 -> Y
1918502651 == 1918502651 -> Y
752392754 ==752392754 -> Y
1474612399 == 1474612399 -> Y
2053999932 == 2053999932 -> Y
1264095060 == 1264095060 -> Y
1411549676 == 1411549676 -> Y
1843993368 == 1843993368 -> Y
943947739 ==943947739 -> Y
1984210012 == 1984210012 -> Y
855636226 ==855636226 -> Y
1749698586 == 1749698586 -> Y
1469348094 == 1469348094 -> Y
1956297539 == 1956297539 -> Y
1036140795 == 1036140795 -> Y
~~~

此时我们也会发现`randtbl`是按照~~`srand(0)`~~`srand(1)`而生成的伪随机数表。

~~~c
int
__srandom_r (unsigned int seed, struct random_data *buf)
{
int type;
int32_t *state;
long int i;
int32_t word;
int32_t *dst;
int kc;

if (buf == NULL)
    goto fail;
type = buf->rand_type;
if ((unsigned int) type >= MAX_TYPES)
    goto fail;

state = buf->state;
/* We must make sure the seed is not 0.Take arbitrarily 1 in this case.*/
if (seed == 0)
    seed = 1;
state = seed;
if (type == TYPE_0)
    goto done;

dst = state;
word = seed;
kc = buf->rand_deg;
for (i = 1; i < kc; ++i)
    {
      /* This does:
           state = (16807 * state) % 2147483647;
       but avoids overflowing 31 bits.*/
      long int hi = word / 127773;
      long int lo = word % 127773;
      word = 16807 * lo - 2836 * hi;
      if (word < 0)
        word += 2147483647;
      *++dst = word;
    }

buf->fptr = &state;
buf->rptr = &state;
kc *= 10;
while (--kc >= 0)
    {
      int32_t discard;
      (void) __random_r (buf, &discard);
    }

done:
return 0;

fail:
return -1;
}
~~~



## 预测随机数

到此,我们便可以通过此方法来预测随机数,但是我们发现我们上述所述都是指随机种子固定且为0的情况,那要是随机种子不固定呢? 我们又该如何处理:

设初始随机数列表为 `s`, 生成的随机数列表`o`在有限域 `0x80000000` 下 (`0x7fffffff + 1`)(`INT_MAX`)(`RAND_MAX`) 有

~~~
o = (s + s) >> 1-> s = s + s
o = (s + s) >> 1-> s = s + s
o = (s + s) >> 1-> s = s + s
o = (s + s) >> 1-> s = s + s
       ······                     ······
o = (s + s) >> 1 -> s = s + s
o = (s + s) >> 1 -> s = s + s
o = (s + s) >> 1 -> s = s + s
~~~

也就是说在 31 个随机数生成之后,原来的随机数列表`s`会焕然一新,此时我用`s'`来代表新的初始随机数表。

~~~
o = (s' + s') >> 1   =>   o = (s + s + s + s) >> 1
o = (s' + s') >> 1   =>   o = (s + s + s + s) >> 1
o = (s' + s') >> 1   =>   o = (s + s + s + s) >> 1
          ······                                 ······
~~~

此时会有:`o = o + o`或`o = o + o + 1`两种情况,为什么会有如此情况呢

~~~
123 >> 1 = 61
bin(123) = 0b1111011

124 >> 1 = 62
bin(124) = 0b1111100

(123 + 124) >> 1 = 123
bin(123+124) = 0b11110111

100 >> 1 = 50
bin(100) = 0b1100100

150 >> 1 = 75
bin(150) = 0b10010110

(100 + 150) >> 1 = 125
bin(100 + 150) = 0b11111010
~~~

也就是说 当一个数为奇数时,它的二进制位末位为 1, 此时我们右移一位时,会将末尾的 1 移去;当一个数为偶数时,它的二进制位末位为 0, 此时我们右移一位时,会将末尾的 0 移去。

在此基础上会有如下四种情况:

1. 当 `s + s`的二进制位末位为 0, `s + s`的二进制位末位为 0, 对应的`o`右移之前的二进制位末位为 0:

   此时 `o = o + o`

2. 当 `s + s`的二进制位末位为 0, `s + s`的二进制位末位为 1, 对应的`o`右移之前的二进制位末位为 1:

   此时 `o = o + o`

3. 当 `s + s`的二进制位末位为 1, `s + s`的二进制位末位为 0, 对应的`o`右移之前的二进制位末位为 1:

   此时 `o = o + o`

4. 当 `s + s`的二进制位末位为 1, `s + s`的二进制位末位为 1, 对应的`o`右移之前的二进制位末位为 0:

   此时 `o = o + o + 1`

也就是说,当我们有了`o`即`o`的值之后,我们有大于`50%`的几率预测`o`,进一步,我们可以发现 **`o = o + o`**或**`o = o + o + 1`**

## Demo

~~~c
#include<stdio.h>
#include<stdlib.h>
#include <time.h>

void init()
{
    setbuf(stdin,0);
    setbuf(stdout,0);
    setbuf(stderr,0);
}

int main()
{
    init();
    int num = 0;
    srand((unsigned int)time(NULL));
    for(int i = 0; i < 32; i++){
      printf("The %2d Random Num is %ld \n", i, rand());
    }
    printf("Can You Guess The Next Num? \n");
    scanf("%d", &num);
    int tmp = rand();
    if(num == tmp){
      puts("ohhhhhhhhh! You are right !!!!");
    }else{
      puts("It looks like something is wrong :(");
    }
    return 0;
}
~~~

~~~python
from pwn import *
file = './demo4'
elf = context.binary = ELF(file)
io = process(file)
random_list = []

for i in range(32):
    io.recvuntil(b'is ');
    num = io.recvuntil(b' \n', drop=True)
    random_list.append(int(num))

io.recvuntil(b'Next Num? \n')
num = (random_list + random_list) & 0x7fffffff

io.sendline(bytes(str(num),encoding='utf8'))

print(io.recv())
~~~

~~~sh
┌┌──(fanya㉿ferity)-[~/Desktop/show/random_demo]
└─$ /bin/python /home/fanya/Desktop/show/random_demo/demo4_exp.py
[*] '/home/fanya/Desktop/show/random_demo/demo4'
    Arch:   amd64-64-little
    RELRO:    Partial RELRO
    Stack:    No canary found
    NX:       NX enabled
    PIE:      PIE enabled
[+] Starting local process './demo4': pid 119509
b'ohhhhhhhhh! You are right !!!!\n'
[*] Process './demo4' stopped with exit code 0 (pid 119509)
~~~

## Refer

- (https://mp.weixin.qq.com/s/VmanBTtEtSZzMYAe4BLXcA)
- (https://www.mathstat.dal.ca/~selinger/random/)

bailiang123 发表于 2023-3-15 12:36

6666啊666

Spacecraft 发表于 2023-3-15 12:38

不明觉厉, 感谢分享{:1_893:}
页: [1]
查看完整版本: Rand 初探 - The Next Random Num