本帖最后由 5omggx 于 2022-5-2 17:15 编辑
用途:搜索指定目录的文件内容是否存在特定字符串(utf-8、utf-16-le、utf-16-be、gbk)
用法:python.exe binSearch.py "需要搜索的字符串" 需要搜索的目录路径
# coding="utf-8"
import sys
import os
import glob
import msvcrt
f = []
try:
if (len(sys.argv) > 2):
searchPath = sys.argv[2]
if os.path.isfile(searchPath):
print('处理文件: ' + searchPath)
f.append(searchPath)
#outputFilePath = os.path.dirname(searchPath)+r'\\output.txt'
else:
print('处理目录: ' + searchPath + r'\*.*')
f = glob.glob(searchPath + r'\*.*')
#outputFilePath = searchPath+r'\\output.txt'
searchStrEncoding = ['utf-8', 'utf-16-le', 'utf-16-be', 'gbk']
searchStr = sys.argv[1]
for i in f:
with open(i, 'rb') as pf:
s = pf.read()
for sse in searchStrEncoding:
curSearchBytes = searchStr.encode(sse)
curSearchBytesLength = len(curSearchBytes)
foundOffset = s.find(curSearchBytes)
while foundOffset != -1:
print(
"found '%s' in file : %s offset : %d, encoding=%s"
% (searchStr, i, foundOffset, sse))
foundOffset = s.find(
curSearchBytes, foundOffset + curSearchBytesLength)
else:
print('usage: python.exe binSearch.py "Search String" [searchpath]')
except Exception as e:
trace_back = sys.exc_info()[2]
line = trace_back.tb_lineno
print("Process Exception in line {} : ".format(line) + str(e))
finally:
print("search done, exit.")
msvcrt.getch()
|