1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
#!/usr/bin/env python3
import os
import hashlib
import sys
import stat
from optparse import OptionParser
# dupekill - deletes duplicates of existing data
# version 1.2
# written by zlg <zlg@zlg.space>
# and NF <radicalmori@gmail.com>
#
# licensed under the...
#
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# Version 2, December 2004
#
# Copyright (C) 2004 Sam Hocevar
# 14 rue de Plaisance, 75014 Paris, France
# Everyone is permitted to copy and distribute verbatim or modified
# copies of this license document, and changing it is allowed as long
# as the name is changed.
#
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
#
# 0. You just DO WHAT THE FUCK YOU WANT TO.
#
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law.
#
# You have been warned. >:3
def dupekill(dry_run=False, recursive=False, verbose=False, path=os.getcwd()):
if not os.path.isdir(path):
print("Error: Unable to fetch directory to work with.")
sys.exit(1)
else:
# Create the generator, create the hash list and the counters.
file_list = os.walk(path)
hashList = []
processed_files = 0
deleted_files = 0
for root, dirs, file in file_list:
ignore_dirs = ['.git', '.config']
for dir in ignore_dirs:
if recursive == True:
# Recursion still needs to ignore certain dirs
if dir in dirs:
dirs.remove(dir)
else:
# While no recursion doesn't need _any_ dirs!
while dirs:
dirs.pop()
for item in file:
checkedFile = open(os.path.join(root, item), "rb").read()
hash = hashlib.sha256(checkedFile).hexdigest()
processed_files += 1
if len(hashList) > 0 and hash in hashList:
# We want to count these, even if it's a dry run.
deleted_files += 1
if not dry_run:
os.remove(os.path.join(root, item))
if verbose:
print("Dupe", os.path.join(root, item), "found.")
else:
hashList.append(hash)
if verbose:
print("New file", os.path.join(root, item))
# Print a summary
print()
if dry_run:
print("THIS IS A DRY RUN! NO FILES WILL BE ALTERED!")
print(processed_files, "files processed,", deleted_files, "deleted.\n")
if __name__ == '__main__':
try:
usage = "Usage: %prog [options] {path}"
parser = OptionParser(usage=usage)
parser.add_option("-d", "--dry", dest='dry_run', action='store_true', default=False, help="displays a list of files dupekill will delete if you run it again without this flag")
parser.add_option("-r", "--recursive", dest='recursive', action='store_true', default=False, help="Recurses into all directories below the starting point")
parser.add_option("-v", "--verbose", dest='verbose', action='store_true', default=False, help="Provide more detailed output")
(options, args) = parser.parse_args()
if args and os.path.isdir(args[0]):
path = os.path.abspath(args[0])
else:
path = os.getcwd()
dupekill(options.dry_run, options.recursive, options.verbose, path)
except KeyboardInterrupt:
print("Aborted")
|