包体积优化-Python查找项目中重复图片

随着App版本的更新迭代,项目中的图片也逐渐增加,除了去除无用的图片资源,对于项目中重复的图片资源也可进行统一的整理,可通过比较图片的哈希值,筛选出重复的图片列表。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

#coding:utf-8
import glob, os
import hashlib

# rootPath = '/Users/jinying.zhou.o/Desktop/TestIconOC'
#rootPath = '/Users/jinying.zhou.o/Documents/NIO/FDAppKit'
rootPath = '/Users/jinying.zhou.o/Documents/NIO/NIO/NextevCar'

def get_FileSize(filePath):
fsize = os.path.getsize(filePath)
fsize = fsize/float(1024)
return round(fsize,2)

#记录所有的图片列表
filesList = []
#记录大图的列表
bigSizeList = []
#记录所有的图片总体积
totalSize = 0.0
#记录大图的size
bigTotalSize = 0.0
for root, dirs, files in os.walk(rootPath):
for file in files:
if file.endswith(".png"):
tempPath = root + '/' + os.path.join(file)
filesList.append(tempPath)

#查找重复图片
md5list = {}
for file in filesList:
if os.path.isdir(file):
continue

md5obj = hashlib.md5()
fd = open(file, 'rb')
while True:
buff = fd.read(2048)
if not buff:
break
md5obj.update(buff)
fd.close()

filemd5 = str(md5obj.hexdigest()).lower()
if filemd5 in md5list:
md5list[filemd5].add(file)
else:
md5list[filemd5] = set([file])

for key in md5list:
values = md5list[key]
if len(values) > 1:
valueList = list(values)
tempPath = valueList[0]
tempSize = get_FileSize(tempPath)
print(values)
print(str(tempSize) + "KB")
for path in valueList:
tempSize = get_FileSize(path)
#print(str(tempSize) + "KB")
totalSize += tempSize

print("总计重复图片size为:" + str(totalSize))