手机微信收到的图片data格式转码为jpg等图片文件

JaySun520 发表于 2023-7-28 09:34

本帖最后由 JaySun520 于 2023-7-28 09:49 编辑

手机微信中经常收到各种图片，想批量操作，但微信使用的是data格式，无法进行预览，如果图片较多，全部删除又怕删除有用的图片，使用以下代码可以直接读取手机中的微信图片data转成可以预览的图片，可以进行批量操作，或者用于清理手机中的垃圾图片，非常好用。不仅可以提取图片格式，还可以提取其它文件格式。
使用的是进程+线程的方式加快转换速度。

附件是效果视频

# 系统库
import sys
import os
import stat
# 数据类型库
from enum import Enum
import math
from time import perf_counter

#线程库
import threading
from multiprocessing import Pool,Process,cpu_count
import multiprocessing

sem = threading.Semaphore(1000)

class ImgType(Enum):
gif = 0x4749 # GIF (gif)，文件头：47494638
jpg = 0xFFD8 # JPEG (jpg)，文件头：FFD8FF
png = 0x8950 # PNG (png)，文件头：89504E47
tif = 0x4949 # TIFF (tif)，文件头：49492A00
bmp = 0x424D # Windows Bitmap (bmp)，文件头：424D
dwg = 0x4143 # CAD (dwg)，文件头：41433130
psd = 0x3842 # Adobe Photoshop (psd)，文件头：38425053
rtf = 0x7B5C # Rich Text Format (rtf)，文件头：7B5C727466
xml = 0x3C3F # XML (xml)，文件头：3C3F786D6C
html = 0x6874 # HTML (html)，文件头：68746D6C3E
eml = 0x4465 # Email (eml)，文件头：44656C69766572792D646174653A
dbx = 0xCFAD # Outlook Express (dbx)，文件头：CFAD12FEC5FD746F
pst = 0x2142 # Outlook (pst)，文件头：2142444E
xlsOrdoc = 0xD0CF # MS Word/Excel (xls.or.doc)，文件头：D0CF11E0
mdb = 0x5374 # MS Access (mdb)，文件头：5374616E64617264204A
wpd = 0xFF57 # WordPerfect (wpd)，文件头：FF575043
pdf = 0x2550 # Adobe Acrobat (pdf)，文件头：255044462D312E
qdf = 0xAC9E # Quicken (qdf)，文件头：AC9EBD8F
pwl = 0xE382 # Windows Password (pwl)，文件头：E3828596
zip = 0x504B # ZIP Archive (zip)，文件头：504B0304
rar = 0x5261 # RAR Archive (rar)，文件头：52617221
wav = 0x5741 # Wave (wav)，文件头：57415645
avi = 0x4156 # AVI (avi)，文件头：41564920
ram = 0x2E72 # Real Audio (ram)，文件头：2E7261FD
rm = 0x2E52 # Real Media (rm)，文件头：2E524D46
mpg = 0x0000 # MPEG (mpg)，文件头：000001BA
mpeg = 0x0000 # MPEG (mpg)，文件头：000001B3
mov = 0x6D6F # Quicktime (mov)，文件头：6D6F6F76
asf = 0x3026 # Windows Media (asf)，文件头：3026B2758E66CF11
mid = 0x4D54 # MIDI (mid)，文件头：4D546864

class WechatConvert(object):

@staticmethod
def find_img_type(file_path):
os.chmod(file_path, stat.S_IWRITE) # 取消文件只读属性
   with open(file_path, 'rb+') as f:
         byte1 = int.from_bytes(f.read(1), byteorder=sys.byteorder)
         byte2 = int.from_bytes(f.read(1), byteorder=sys.byteorder)
   for img_enum in ImgType:
         png_tuple = WechatConvert.hex_to_tuple(img_enum.value)
         if png_tuple ^ byte1 == png_tuple ^ byte2:
            return img_enum.name, png_tuple ^ byte1
   return 'none', '0x' + hex(byte1) + hex(byte2) # 返回找不到的图片类型为:none
   # raise Exception("不支持的图片类型")

@staticmethod
def hex_to_tuple(img_type):
   return img_type >> 8, img_type & 0b11111111

def convert(self, file_path, output_path=".",number='1',total='1'):

   with sem:# 限制最大线程数
         file_name = file_path.split("\\")[-1]
         # 获取图片类型
         img_type,img_xor = WechatConvert.find_img_type(file_path)
         convert_filetype = ['gif','jpg','png','tif','bmp']
         if img_type!='none':# 只转换支持的图片类型
            if img_type in convert_filetype:
               print('正在转换图片' + str(number) + '/' + str(total) +'：' + file_name)
               with open(file_path, 'rb+') as fd:
                     # 读取2 byte
                     with open(output_path + "\\" + file_name + "." + img_type, 'wb+') as w:
                        while True:
                           b = fd.read(1)
                           if not b:
                                 break
                           real = int.from_bytes(b, byteorder=sys.byteorder) ^ img_xor
                           real_bytes = int.to_bytes(real, 1, sys.byteorder)
                           w.write(real_bytes)
               print('图片转换完成：'+ str(number) + '/' + str(total) + '：'+ file_name)
            else:
               os.rename(output_path + "\\" + file_name ,output_path + "\\" + file_name + "." + img_type) # 给无法转换的文件加后缀名

def thread_it(func, *args):
'''将函数打包进线程'''
file_path_list = args
output_path = args
number = args
total = args
for file_name in file_path_list:
   t = threading.Thread(target=func, args=(file_name, output_path, number + file_path_list.index(file_name) + 1 , total)) # 创建
t.start() # 启动

if __name__ == '__main__':
multiprocessing.freeze_support()
start = perf_counter()# 记下开始时刻
file_name_list = os.listdir(os.getcwd())
cpu_count = cpu_count()
p = Pool(cpu_count)

convert_file_list=[]
for file_name in file_name_list:
   splittext = os.path.splitext(file_name)[-1]
   if splittext == '.dat':
         convert_file_list.append(file_name)

convert_file_list_qty = math.ceil(len(convert_file_list) / cpu_count)
for n in range(cpu_count):#进程数
   if convert_file_list_qty * n < len(convert_file_list):
         p.apply_async(func=thread_it,args=(WechatConvert().convert,convert_file_list, ".",convert_file_list_qty * n , len(convert_file_list)))

p.close()
p.join()
end = perf_counter()# 记下结束时刻
process_time = end - start
if process_time > 60:
   process_time = process_time /60
   process_time_str = str(process_time) + '分'
elif process_time > 60 * 60:
   process_time = process_time / 60 /60
   process_time_str = str(process_time) + '小时'
else:
   process_time_str = str(process_time) + '秒'
print('=================图片全部转换完成===========')
print('一共转换图片数：' + str(len(convert_file_list)))
print('一共用时：' + process_time_str)

kingyun 发表于 2024-2-20 14:47

=================图片全部转换完成===========
一共转换图片数：34
一共用时：13.155442299999999秒
请问楼主怎么定义file_path和output_path的路径

梁茵发表于 2023-7-28 14:10

感谢分享，这样就可以省下很多时间转换图片了，一步到位不错

13586175423 发表于 2023-8-2 10:02

7694166 发表于 2023-8-2 19:14

本帖最后由 7694166 于 2023-8-2 19:16 编辑

为什么附件是视频呢？

提拉米苏子冉 发表于 2023-8-2 21:12

大佬能不能打包成成品啊

ouwen827 发表于 2023-8-17 16:00

大佬不能直接用吗？没有成品呀？

吖力锅 发表于 2023-8-18 17:03

为什么我会无法运行的呢

JaySun520 发表于 2023-8-21 14:06

提拉米苏子冉发表于 2023-8-2 21:12
大佬能不能打包成成品啊

打包后文件太大，无法上传，直接将代码编译运行即可

hy125800 发表于 2023-8-26 15:22

差评，代码执行不出来，你倒是给举个例子呀，视频看了也没啥用，只是看你成功了，我们还是不会

liuwankai 发表于 2023-9-2 02:12

除了54行和101行缩进有问题直接拷贝就可以运行

页: [1] 2

吾爱破解 - 52pojie.cn's Archiver

手机微信收到的图片data格式转码为jpg等图片文件