判断文件编码(UTF8,8BOM,16LE,16BE,ANSI)附string互转wstring、String互转Wchar_...
本帖最后由 2370177068 于 2023-12-24 18:41 编辑至于是不是原创,我自己也不知道怎么算,很多都是ctrlCV,我拼凑的,能用就行。
前面是字符转换,split是str分割成vector,get_last_error是获取错误文本
代码写得比较屎,你们用之前写个控制台测试一下吧
#pragma once
#ifndef MYTOOL_H
#define MYTOOL_H
#include <stdio.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <atlconv.h>
class MYTOOL {
public:
MYTOOL() = default;
~MYTOOL() = default;
static std::wstring string2wstring(std::string str);
static std::string wstring2string(std::wstring wstr);
static std::vector<std::string> split(std::string str, std::string pattern);
static void Wchar_tToString(std::string& szDst, wchar_t* wchar);
static wchar_t* StringToWchar_t(std::string& str);
static std::string get_last_error(DWORD errCode = GetLastError());
static int IsUTF8(const void* pBuffer, long size);
static int CalculateFileEncoding(LPCSTR filePath, std::string& str);
static int CalculateFileEncodingW(LPCWSTR filePath, std::string& str);
static std::string 判断文件编码(LPCSTR filePath, std::string& 读到的文本);
static std::string 判断文件编码W(LPCWSTR filePath, std::string& 读到的文本);
static std::string 读取文件UTF8(std::string file);
static std::string 读取文件UTF8W(const wchar_t* file);
private:
};
std::string MYTOOL::读取文件UTF8(std::string file) {
FILE* fp;
auto err = _wfopen_s(&fp, string2wstring(file).c_str(), L"r,ccs=UTF-8");
if (fp == NULL) {
return "";
}
if (err != 0) {
return "";
}
std::string sum;
wchar_t str = { 0 };
while (fgetws(str, 1024, fp) != NULL) {
std::string 当前行内容;
Wchar_tToString(当前行内容, str);
sum += 当前行内容;
}
fclose(fp);
return sum;
}
std::string MYTOOL::读取文件UTF8W(const wchar_t* file) {
FILE* fp;
auto err = _wfopen_s(&fp, file, L"r,ccs=UTF-8");
if (fp == NULL) {
return "";
}
if (err != 0) {
return "";
}
std::string sum;
wchar_t str = { 0 };
while (fgetws(str, 1024, fp) != NULL) {
std::string 当前行内容;
Wchar_tToString(当前行内容, str);
sum += 当前行内容;
}
fclose(fp);
return sum;
}
std::wstring MYTOOL::string2wstring(std::string str) {
std::wstring result;
int len = MultiByteToWideChar(CP_ACP, 0, str.c_str(), (int)str.size(), NULL, 0);
wchar_t* buffer = new wchar_t;
MultiByteToWideChar(CP_ACP, 0, str.c_str(), (int)str.size(), buffer, len);
buffer = '\0';
result.append(buffer);
delete[] buffer;
return result;
}
std::string MYTOOL::wstring2string(std::wstring wstr) {
std::string result;
int len = WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), (int)wstr.size(), NULL, 0, NULL, NULL);
char* buffer = new char;
WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), (int)wstr.size(), buffer, len, NULL, NULL);
buffer = '\0';
result.append(buffer);
delete[] buffer;
return result;
}
std::vector<std::string> MYTOOL::split(std::string str, std::string pattern) {
int pos;
std::vector<std::string> result;
str += pattern;
int size = (int)str.size();
for (int i = 0; i < size; i++) {
pos = (int)str.find(pattern, i);
if (pos < size) {
std::string s = str.substr(i, static_cast<std::basic_string<char, std::char_traits<char>, std::allocator<char>>::size_type>(pos) - i);
result.push_back(s);
i = pos + (int)pattern.size() - 1;
}
}
return result;
}
void MYTOOL::Wchar_tToString(std::string& szDst, wchar_t* wchar) {
wchar_t* wText = wchar;
DWORD dwNum = WideCharToMultiByte(CP_OEMCP, NULL, wText, -1, NULL, 0, NULL, FALSE);
char* psText;
psText = new char;
WideCharToMultiByte(CP_ACP, NULL, wText, -1, psText, dwNum, NULL, FALSE);
szDst = psText;
delete[]psText;
}
wchar_t* MYTOOL::StringToWchar_t(std::string& str) {
wchar_t* text1 = new wchar_t;
swprintf(text1, str.size() + 1, L"%S ", str.c_str());
return text1;
}
std::string MYTOOL::get_last_error(DWORD errCode) {
std::string err("");
if (errCode == 0) errCode = GetLastError();
LPTSTR lpBuffer = NULL;
//失败
if (0 == FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, errCode, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR)&lpBuffer, 0, NULL)) {
char tmp = { 0 };
sprintf_s(tmp, "{未定义错误描述(%d)}", errCode);
err = tmp;
} else {
//成功
USES_CONVERSION;
if (lpBuffer != NULL) err = wstring2string(lpBuffer);
LocalFree(lpBuffer);
}
return err;
}
int MYTOOL::IsUTF8(const void* pBuffer, long size) {
int IsUTF8 = 1;
unsigned char* start = (unsigned char*)pBuffer;
unsigned char* end = (unsigned char*)pBuffer + size;
while (start < end) {
if (*start < 0x80) {
start++;
} else if (*start < (0xC0)) {
IsUTF8 = 0; break;
} else if (*start < (0xE0)) {
if (start >= end - 1) break;
if ((start & (0xC0)) != 0x80) { IsUTF8 = 0; break; }
start += 2;
} else if (*start < (0xF0)) {
if (start >= end - 2) break;
if ((start & (0xC0)) != 0x80 || (start & (0xC0)) != 0x80) { IsUTF8 = 0; break; }
start += 3;
} else { IsUTF8 = 0; break; }
}
return IsUTF8;
}
int MYTOOL::CalculateFileEncoding(LPCSTR filePath, std::string& str) {
/*返回值说明
* 0 文件读取失败
* 1 UTF-8
* 2 UTF-16LE
* 3 UTF16_BE
* 4 UTF8_BOM
* 5 未知
*/
HANDLE pFile; char* tmpBuf;
DWORD fileSize, dwBytesRead, dwBytesToRead;
pFile = CreateFileA(filePath, FILE_GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (pFile == INVALID_HANDLE_VALUE) { CloseHandle(pFile); return 0; }
fileSize = GetFileSize(pFile, NULL);
char* buffer = new char[(size_t)fileSize + (size_t)1];
//buffer = (char*)malloc(static_cast<size_t>(fileSize) + (size_t)1);
//if (buffer == NULL) { return 0; }
ZeroMemory(buffer, (size_t)fileSize + (size_t)1);
dwBytesToRead = fileSize;
dwBytesRead = 0;
tmpBuf = buffer;
do {
BOOL success = ReadFile(pFile, tmpBuf, dwBytesToRead, &dwBytesRead, NULL);
if (success == NULL) printf("ReadFile failed : %s", get_last_error().c_str());
if (dwBytesRead == 0) break;
dwBytesToRead -= dwBytesRead;
tmpBuf += dwBytesRead;
} while (dwBytesToRead > 0);
CloseHandle(pFile);
// 处理读到的数据 buffer
//puts(buffer);
//std::cout << "buffer0:" << (int)buffer << std::endl;
//std::cout << "buffer1:" << (int)buffer << std::endl;
//std::cout << "buffer2:" << (int)buffer << std::endl;
str.clear();
str += buffer;
if (buffer == 0xFF && buffer == 0xFE) {
return 2;//UTF16_LE
} else if (buffer == -1 && buffer == -2) {
return 2;//UTF16_LE
} else if (buffer == 0xFE && buffer == 0xFF) {
return 3;//UTF16_BE
} else if (buffer == -2 && buffer == -1) {
return 3;//UTF16_BE
} else if (buffer == 0xEF && buffer == 0xBB && buffer == 0xBF) {
return 4;//UTF8_BOM
} else if (buffer == -17 && buffer == -69 && buffer == -65) {
return 4;//UTF8_BOM
} else if (IsUTF8(buffer, fileSize + 1)) {
return 1;//UTF-8
} else {
return 5;//以上都不是,可能是ANSI
}
}
int MYTOOL::CalculateFileEncodingW(LPCWSTR filePath, std::string& str) {
/*返回值说明
* 0 文件读取失败
* 1 UTF-8
* 2 UTF-16LE
* 3 UTF16_BE
* 4 UTF8_BOM
* 5 未知
*/
HANDLE pFile; char* tmpBuf;
DWORD fileSize, dwBytesRead, dwBytesToRead;
pFile = CreateFileW(filePath, FILE_GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (pFile == INVALID_HANDLE_VALUE) { CloseHandle(pFile); return 0; }
fileSize = GetFileSize(pFile, NULL);
char* buffer = new char;
buffer = (char*)malloc(static_cast<size_t>(fileSize) + (size_t)1);
if (buffer == NULL) { return 0; }
ZeroMemory(buffer, static_cast<size_t>(fileSize) + (size_t)1);
dwBytesToRead = fileSize;
dwBytesRead = 0;
tmpBuf = buffer;
do {
BOOL success = ReadFile(pFile, tmpBuf, dwBytesToRead, &dwBytesRead, NULL);
if (success == NULL) printf("ReadFile failed : %s", get_last_error().c_str());
if (dwBytesRead == 0) break;
dwBytesToRead -= dwBytesRead;
tmpBuf += dwBytesRead;
} while (dwBytesToRead > 0);
CloseHandle(pFile);
// 处理读到的数据 buffer
//puts(buffer);
//std::cout << "buffer0:" << (int)buffer << std::endl;
//std::cout << "buffer1:" << (int)buffer << std::endl;
//std::cout << "buffer2:" << (int)buffer << std::endl;
str.clear();
str += buffer;
if (buffer == 0xFF && buffer == 0xFE) {
return 2;//UTF16_LE
} else if (buffer == -1 && buffer == -2) {//我自己调试了,FF是255,但是实际读到的是-1,所以才加了这么几行-1 -2 -17的
return 2;//UTF16_LE
} else if (buffer == 0xFE && buffer == 0xFF) {
return 3;//UTF16_BE
} else if (buffer == -2 && buffer == -1) {
return 3;//UTF16_BE
} else if (buffer == 0xEF && buffer == 0xBB && buffer == 0xBF) {
return 4;//UTF8_BOM
} else if (buffer == -17 && buffer == -69 && buffer == -65) {
return 4;//UTF8_BOM
} else if (IsUTF8(buffer, fileSize + 1)) {
return 1;//UTF-8
} else {
return 5;//以上都不是,可能是ANSI
}
}
std::string MYTOOL::判断文件编码(LPCSTR filePath, std::string& 读到的文本) {
/*返回值说明
* 0 文件读取失败
* 1 UTF-8
* 2 UTF-16LE
* 3 UTF16_BE
* 4 UTF8_BOM
* 5 未知
*/
int ret = CalculateFileEncoding(filePath, 读到的文本);
switch (ret) {
case 0:
return "READ_FAIL";
break;
case 1:
return "UTF-8";
break;
case 2:
return "UTF-16LE_BOM";
break;
case 3:
return "UTF16_BE_BOM";
break;
case 4:
return "UTF8_BOM";
break;
case 5://以上都不是,可能是ANSI
return "ANSI";
break;
default:
return "ERROR";
break;
}
}
std::string MYTOOL::判断文件编码W(LPCWSTR filePath, std::string& 读到的文本) {
/*返回值说明
* 0 文件读取失败
* 1 UTF-8
* 2 UTF-16LE
* 3 UTF16_BE
* 4 UTF8_BOM
* 5 未知
*/
int ret = CalculateFileEncodingW(filePath, 读到的文本);
switch (ret) {
case 0:
return "READ_FAIL";
break;
case 1:
return "UTF-8";
break;
case 2:
return "UTF-16LE_BOM";
break;
case 3:
return "UTF16_BE_BOM";
break;
case 4:
return "UTF8_BOM";
break;
case 5://以上都不是,可能是ANSI
return "ANSI";
break;
default:
return "ERROR";
break;
}
}
#endif 谢谢楼主分享 受教了感谢大佬 收藏了感谢 先收藏,以后转化成其它语言,可能用的上。{:1_918:}
页:
[1]