-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDuplicateFileChecker.py
executable file
·39 lines (36 loc) · 1.4 KB
/
DuplicateFileChecker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python
'''
Checks for duplicate files in a particular given directory based on SHA256 hashes.
'''
__author__ = "Aniruddha Biyani"
__version__ = "1.0"
__maintainer__ = "Aniruddha Biyani"
__email__ = "[email protected]"
__status__ = "Production"
__date__ = "20150312"
import hashlib, os, pprint, thread
def main():
print 'Please enter the absolute path in the input.'
dirname = raw_input('Enter the directory in which you want to find the duplicates: ')
dirname = dirname.strip()
allsizes = []
duplicates = set()
for (thisDir, subsHere, filesHere) in os.walk(dirname):
for filename in filesHere:
# if filename.endswith('.py'): This to check for a particular type of file.
fullname = os.path.join(thisDir, filename)
# fullsize = os.path.getsize(fullname)
with open(fullname, "rb") as f:
contents = f.read()
sha2hash = hashlib.sha256(contents).hexdigest()
allsizes.append((fullname, sha2hash))
# pprint.pprint(allsizes) - Just a debug to list the whole "list".
for intr in allsizes:
for i in allsizes:
if intr != i:
if i[0] not in duplicates:
if intr[1] == i[1]:
print intr[0] + " is a duplicate of file " + i[0]
duplicates.add(intr[0])
if __name__ == '__main__':
main()