[Python] 纯文本查看 复制代码
import os
import math
import pandas as pd
import datetime
# -*- coding: utf-8 -*-
# @Time : 2024/05/22
# [url=home.php?mod=space&uid=686208]@AuThor[/url] : fengjicheng
# [url=home.php?mod=space&uid=267492]@file[/url] : scan_disk_size.py
# @Software: 根据层级统计文件夹
def get_drives():
"""获取Windows系统中的所有盘符"""
drives = []
for letter in 'F':
if os.path.exists(letter + ':\\'):
drives.append(letter + ':\\')
return drives
def dfs_calculate_dir_size_and_export_to_excel(dir_path='.', current_depth=0, max_depth=4, excel_writer=None, sheet_name=None):
"""递归计算指定目录的总大小,并将结果输出到单个Excel文件的不同工作表中。控制搜索深度。
:param dir_path: 要计算大小的目录路径,默认为当前目录
:param current_depth: 当前递归深度,默认为0
:param max_depth: 最大递归深度,默认为4
:param excel_writer: ExcelWriter对象,用于写入数据
:param sheet_name: Excel的sheet名称
:return: None
"""
try:
# 如果到达最大深度,则计算目录和文件的大小
for entry in os.scandir(dir_path):
# 如果为回收站 和系统模块则跳过
if entry.name in ['System Volume Information', '$Recycle.Bin','$RECYCLE.BIN','$360Honeypot']:
continue
row_data = {
'盘符': dir_path.split(':')[0],
'是否为文件夹':'是' if not entry.is_file() else '否',
'文件路径': entry.path,
'文件大小': getFileFolderSize(entry.path) if not entry.is_file() else entry.stat().st_size,
'友好的文件大小': '',
'搜索层级': current_depth
}
row_data['友好的文件大小'] = convert_size(row_data['文件大小'])
df = pd.DataFrame([row_data])
if excel_writer is not None and sheet_name is not None:
if sheet_name not in excel_writer.sheets:
df.to_excel(excel_writer, sheet_name=sheet_name, index=False, header=True)
else:
df.to_excel(excel_writer, sheet_name=sheet_name, index=False, header=False, startrow=excel_writer.sheets[sheet_name].max_row)
if entry.is_dir() and current_depth < max_depth:
dfs_calculate_dir_size_and_export_to_excel(entry.path, current_depth + 1, max_depth, excel_writer, sheet_name)
except PermissionError:
print(f"Permission denied for accessing: {dir_path}. Skipping this directory.")
except FileNotFoundError:
print(f"Directory not found: {dir_path}. Continuing to the next one.")
except Exception as e:
print(f"An unexpected error occurred: {e}. Skipping this directory.")
def convert_size(size_bytes):
"""将字节转换为更易读的格式(KB, MB, GB)"""
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s %s" % (s, size_name[i])
def getFileFolderSize(fileOrFolderPath):
"""获得目录大小"""
totalSize = 0
if not os.path.exists(fileOrFolderPath):
return totalSize
if os.path.isfile(fileOrFolderPath):
totalSize = os.path.getsize(fileOrFolderPath) # 5041481
return totalSize
if os.path.isdir(fileOrFolderPath):
with os.scandir(fileOrFolderPath) as dirEntryList:
for curSubEntry in dirEntryList:
curSubEntryFullPath = os.path.join(fileOrFolderPath, curSubEntry.name)
if curSubEntry.is_dir():
curSubFolderSize = getFileFolderSize(curSubEntryFullPath) # 5800007
totalSize += curSubFolderSize
elif curSubEntry.is_file():
curSubFileSize = os.path.getsize(curSubEntryFullPath) # 1891
totalSize += curSubFileSize
return totalSize
def main():
drives = get_drives()
# 获取当前时间
now = datetime.datetime.now()
# 输出文件名
output_file = now.strftime("%Y%m%d%H%M%S") + "_search_results.xlsx"
# 需要提前 pip install openpyxl
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
# 遍历盘符
for drive in drives:
dfs_calculate_dir_size_and_export_to_excel(drive,current_depth=0,max_depth=3, excel_writer=writer, sheet_name='Search Results')
print("搜索完成,结果已输出到各驱动器对应的Excel文件中。")
if __name__ == "__main__":
main()