BrightXu 发表于 2022-7-25 00:05

C语言实现 Base64 和 Base32 编解码

# C语言实现 Base64 和 Base32 编解码

### Base64 编解码

#### 定义头文件 `base64.h`

```c
#ifndef _BASE64_H
#define _BASE64_H

#include <stdint.h>

static const char BASE64_MAP[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
static const uint8_t BASE64_REVERSE_MAP[] = {
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4,
      5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, 0, 26, 27, 28, 29,
      30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
};

char *base64encode(const char *str, uint64_t len);

char *base64parse(const char *base64Str, uint64_t len);

#endif //_BASE64_H

```

#### 思路

`Base64`编码就是用64个可打印字符表示二进制数据。$$64 = 2^6$$,所以需要`6 Bit`来表示一个base64字符。一个字节`8 Bit`,6和8的最小公倍数是`24`。编码的过程中,以`3`个字节为一组转为`4`个base64字符,不足`3`个字节以`0`代替。为方便转换,以一个无符号32位整数(`uint32_t`)为中间载体。先由高位到低位将这三个字节填充到这个整数中,然后由高位到低位依次读取6位,获取对应数值的字母,共读取4次。如下图所示。解码的过程是上述的逆过程。



#### 实现

```c
#include "base64.h"
#include <stdlib.h>

#ifndef CEIL_POS
// 正数向上取整 CEIL_POS(2.345) => 3
#define CEIL_POS(X) (X > (uint64_t)(X) ? (uint64_t)(X+1) : (uint64_t)(X))
#endif

char *base64encode(const char *str, uint64_t len) {
    uint64_t length = CEIL_POS(len * 4 / 3) + 1;
    char *base64Chars = (char *) malloc(sizeof(char) * length);
    uint64_t idx = 0;
   
    for (uint64_t i = 0; i < len; i += 3) {
      uint32_t byte1 = (uint8_t) str;
      uint16_t byte2 = (i + 1 < len) ? (uint8_t) str : 0;
      uint8_t byte3 = (i + 2 < len) ? (uint8_t) str : 0;
      
      uint32_t triplet = (byte1 << 16) | (byte2 << 8) | byte3;
      
      for (uint64_t j = 0; (j < 4) && (i + j * 0.75 < len); j++) {
            base64Chars = BASE64_MAP[(triplet >> (6 * (3 - j))) & 0x3f];
            idx++;
      }
    }
   
    char paddingChar = BASE64_MAP;
    if (paddingChar) {
      while (idx % 4) {
            base64Chars = paddingChar;
            idx++;
      }
    }
    base64Chars = 0;
    return base64Chars;
}

char *base64parse(const char *base64Str, uint64_t len) {
    while (base64Str == BASE64_MAP) {
      len--;
    }
    uint64_t length = CEIL_POS(len * 3 / 4) + 1;
    char *str = (char *) malloc(sizeof(char) * length);
    uint64_t idx = 0;
   
    for (uint64_t i = 0; i < len; i += 4) {
      uint32_t triplet = 0;
      for (uint8_t j = 0; j < 4; ++j) {
            if (i + j < len) triplet = (triplet << 6) | ((uint8_t) BASE64_REVERSE_MAP] & 0x3f);
            else triplet = triplet << 6;
      }
      for (uint8_t j = 0; (j < 3); ++j) {
            str = (triplet >> (8 * (2 - j))) & 0xff;
            idx++;
      }
    }
    str = 0;
    return str;
}

```

#### 使用

```c
// test_b64.c
#include <stdio.h>
#include <string.h>
#include "base64.h"

int main() {
    char str1[] = "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.";
    char *encoded = base64encode(str1, strlen(str1));
    puts(encoded);

    char str2[] = "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=";
    char *decoded = base64parse(str2, strlen(str2));
    puts(decoded);
    return 0;
}
```



### Base32 编解码

#### 定义头文件 `base32.h`

```c
#ifndef _BASE32_H
#define _BASE32_H

#include <stdint.h>

static const char BASE32_MAP[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=";
static const uint8_t BASE32_REVERSE_MAP[] = {
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6,
      7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
};

char *base32encode(const char *str, uint64_t len);

char *base32parse(const char *base32Str, uint64_t len);

#endif //_BASE32_H

```

#### 思路

`Base32`编码就是用32个可打印字符表示二进制数据。$$32 = 2^5$$,所以需要`5 Bit`来表示一个base32字符。一个字节`8 Bit`,5和8的最小公倍数是`40`。编码的过程中,以`5`个字节为一组转为`8`个base32字符,不足`5`个字节以`0`代替。为方便转换,以一个无符号64位整数(`uint64_t`)为中间载体。先由高位到低位将这5个字节填充到这个整数中,然后由高位到低位依次读取5位,获取对应数值的字母,共读取8次。如下图所示。解码的过程是上述的逆过程。



#### 实现

```c
#include "base32.h"
#include <stdlib.h>

#ifndef CEIL_POS
#define CEIL_POS(X) (X > (uint64_t)(X) ? (uint64_t)(X+1) : (uint64_t)(X))
#endif

char *base32encode(const char *str, uint64_t len) {
    uint64_t length = CEIL_POS(len * 8 / 5) + 1;
    char *base32Chars = (char *) malloc(sizeof(char) * length);
    uint64_t idx = 0;
   
    for (uint64_t i = 0; i < len; i += 5) {
      uint64_t byte1 = (uint8_t) str;
      uint64_t byte2 = (i + 1 < len) ? (uint8_t) str : 0;
      uint32_t byte3 = (i + 2 < len) ? (uint8_t) str : 0;
      uint16_t byte4 = (i + 3 < len) ? (uint8_t) str : 0;
      uint8_t byte5 = (i + 4 < len) ? (uint8_t) str : 0;
      
      uint64_t quintuple = (byte1 << 32) | (byte2 << 24) | (byte3 << 16) | (byte4 << 8) | byte5;
      
      for (uint64_t j = 0; (j < 8) && (i + j * 0.625 < len); j++) {
            base32Chars = BASE32_MAP[(quintuple >> (5 * (7 - j))) & 0x1f];
            idx++;
      }
    }
   
    char paddingChar = BASE32_MAP;
    if (paddingChar) {
      while (idx % 8) {
            base32Chars = paddingChar;
            idx++;
      }
    }
    base32Chars = 0;
    return base32Chars;
}

char *base32parse(const char *base32Str, uint64_t len) {
    while (base32Str == BASE32_MAP) {
      len--;
    }
    uint64_t length = CEIL_POS(len * 5 / 8) + 1;
    char *str = (char *) malloc(sizeof(char) * length);
    uint64_t idx = 0;
   
    for (uint64_t i = 0; i < len; i += 8) {
      uint64_t quintuple = 0;
      for (uint8_t j = 0; j < 8; ++j) {
            if (i + j < len) quintuple = (quintuple << 5) | ((uint8_t) BASE32_REVERSE_MAP] & 0x1f);
            else quintuple = quintuple << 5;
      }
      for (uint8_t j = 0; (j < 5); ++j) {
            str = (quintuple >> (8 * (4 - j))) & 0xff;
            idx++;
      }
    }
    str = 0;
    return str;
}

```

#### 使用

```c
// test_b32.c
#include <stdio.h>
#include <string.h>
#include "base32.h"

int main() {
    char str1[] = "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.";
    char *encoded = base32encode(str1, strlen(str1));
    puts(encoded);

    char str2[] = "JVQW4IDJOMQGI2LTORUW4Z3VNFZWQZLEFQQG433UEBXW43DZEBRHSIDINFZSA4TFMFZW63RMEBRHK5BAMJ4SA5DINFZSA43JNZTXK3DBOIQHAYLTONUW63RAMZZG63JAN52GQZLSEBQW42LNMFWHGLBAO5UGSY3IEBUXGIDBEBWHK43UEBXWMIDUNBSSA3LJNZSCYIDUNBQXIIDCPEQGCIDQMVZHGZLWMVZGC3TDMUQG6ZRAMRSWY2LHNB2CA2LOEB2GQZJAMNXW45DJNZ2WKZBAMFXGIIDJNZSGKZTBORUWOYLCNRSSAZ3FNZSXEYLUNFXW4IDPMYQGW3TPO5WGKZDHMUWCAZLYMNSWKZDTEB2GQZJAONUG64TUEB3GK2DFNVSW4Y3FEBXWMIDBNZ4SAY3BOJXGC3BAOBWGKYLTOVZGKLQ=";
    char *decoded = base32parse(str2, strlen(str2));
    puts(decoded);
    return 0;
}
```

BrightXu 发表于 2022-11-21 01:20

zhukoov 发表于 2022-11-20 13:47
楼主的代码给了我很大的启发.得知Base64的译码表即破解.那么在传输过程中如何准确识别Base64呢?

这跟破解有什么关系?base64只是一种数据编码方式,不存在加密解密之说,又何来破解可言?就像十六进制和十进制之间的关系,只是一种数据表示形式而已,base64编码后的内容只是对人类来说不可直接看懂罢了。而且Base64码表也有统一的规范和标准,大多数情况用的都是通用的码表。

zhukoov 发表于 2022-12-8 12:42

BrightXu 发表于 2022-11-21 01:20
这跟破解有什么关系?base64只是一种数据编码方式,不存在加密解密之说,又何来破解可言?就像十六进制和 ...

原来如此,谢谢楼主纠正,我还以为它本身是一种加密呢。现在我了解了,非常感谢。

sknbs 发表于 2022-7-25 01:32

学习了,这就去试一下

tl;dr 发表于 2022-7-25 06:31

QS69 发表于 2022-7-25 07:28

这都能行?

blindcat 发表于 2022-7-25 07:54

膜拜大佬

l441669899 发表于 2022-7-25 07:55

感谢楼主!

MI20220721 发表于 2022-7-25 08:26

有些看不懂,先学习学习!

lxfw2000 发表于 2022-7-25 08:41

学习了。下来找机会练一下手!

怜渠客 发表于 2022-7-25 08:46

自实现函数,学习学习

metoo2 发表于 2022-7-25 10:02

感谢楼主~~
页: [1] 2 3
查看完整版本: C语言实现 Base64 和 Base32 编解码