python 扫描目录文件的一些信息
我也忘了当时为啥做这么个玩意,好像当时貌似是定时扫描文件的更新时间,如果一边的更新较新,就从新的那边复制过来吧,就是一个py文件,直接运行就好.
import os,time
import pandas as pd
from multiprocessing import Pool
def main_paths():
global pathd #定义全局变量 保存地址
while True:
pathd = input('请输入扫描文件夹:')
if os.path.isdir(pathd)==True or pathd == 'exit()':
break
else:
print('目录不存在,请重新输入,如果想终止请输入:exit()')
pathd = pathd.replace('/','\\')
listdirs = []
files = []
files_size = []
files_updated = []
files_created = []
for i in os.listdir(pathd):
if os.path.isdir(os.path.join(pathd,i)): # 判断是文件还是文件夹
if os.path.join(pathd,i) == os.path.join(pathd,'$Recycle.Bin'): #跳过回收站
pass
else:
listdirs.append(os.path.join(pathd,i))
else:
filed = os.path.join(pathd,i) #文件绝对路径
files.append(filed)
files_size.append(round(os.path.getsize(filed) / float(1024*1024),4))#获取文件大小
files_updated.append(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(os.path.getmtime(filed))))#更新时间
files_created.append(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(os.path.getctime(filed))))#创建时间
global main_dirs #定义全局变量,获取主目录的文件
main_dirs = {'绝对路径':files,'文件大小(m)':files_size,'更新时间':files_updated,'创建时间':files_created}
main_dirs = pd.DataFrame(main_dirs)
return listdirs
def path_list(paths):
files = []
files_size = []
files_updated = []
files_created = []
# start = time.process_time()
start = time.perf_counter()
for q,w,e in os.walk(paths):
for i in e:
filed = os.path.join(q,i) #文件绝对路径
files.append(filed)
files_size.append(round(os.path.getsize(filed) / float(1024*1024),4))#获取文件大小
files_updated.append(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(os.path.getmtime(filed))))#更新时间
files_created.append(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(os.path.getctime(filed))))#创建时间
dirs = {'绝对路径':files,'文件大小(m)':files_size,'更新时间':files_updated,'创建时间':files_created}
# end = time.process_time()
end = time.perf_counter()
print('{}完成,耗时:'.format(paths),end - start)
return pd.DataFrame(dirs)
def save_paths(dirs):
df = ''
if len(dirs) == 0: #判断是否有空的dataframe,会导致pd.concat拼接失败
df = main_dirs
elif len(main_dirs) == 0:
df = pd.concat(dirs)
else:
df = pd.concat(dirs) #多个dataframe,这里可以用concat()拼接
df = pd.concat()#在拼接主目录下的文件,也就是全局变量main_dirs
pathname = os.path.join(pathd,'目录扫描文件.xlsx')# 获取全局变量的保存地址
df.to_excel(pathname,index=False)
print('文件保存成功,地址在:{}'.format(pathname))
if __name__ == "__main__":
start = time.perf_counter()
paths = main_paths()
pool=Pool(processes=10)
dirs = pool.map(path_list,paths)
pool.close()
pool.join()
# print(len(main_dirs))# 返回的是list
save_paths(dirs)
end = time.perf_counter()
print('总耗时:',end - start)
页:
[1]