summaryrefslogtreecommitdiff
path: root/dupekill
blob: ff9e3b26c752201fea261d7e7b31f888558a3d8f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python3
import os
import hashlib
import sys
import stat
from optparse import OptionParser

# dupekill - deletes duplicates of existing data
# version 1.2
# written by zlg <zlg@zlg.space>
#        and NF <radicalmori@gmail.com>
#
#                     licensed under the...
#
#            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
#                    Version 2, December 2004
#
# Copyright (C) 2004 Sam Hocevar
#  14 rue de Plaisance, 75014 Paris, France
# Everyone is permitted to copy and distribute verbatim or modified
# copies of this license document, and changing it is allowed as long
# as the name is changed.
#
#            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
#   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
#
#  0. You just DO WHAT THE FUCK YOU WANT TO.
# 
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law.
#
# You have been warned. >:3

def dupekill(dry_run=False, recursive=False, verbose=False, path=os.getcwd()):

    if not os.path.isdir(path):
        print("Error: Unable to fetch directory to work with.")
        sys.exit(1)
    else:
        # Create the generator, create the hash list and the counters.
        file_list = os.walk(path)
        hashList = []
        processed_files = 0
        deleted_files = 0

        for root, dirs, file in file_list:
            ignore_dirs = ['.git', '.config']
            for dir in ignore_dirs:
                if recursive == True:
                    # Recursion still needs to ignore certain dirs
                    if dir in dirs:
                        dirs.remove(dir)

                else:
                    # While no recursion doesn't need _any_ dirs!
                    while dirs:
                        dirs.pop()

            for item in file:
                checkedFile = open(os.path.join(root, item), "rb").read()
                hash = hashlib.sha256(checkedFile).hexdigest()
                processed_files += 1
                
                if len(hashList) > 0 and hash in hashList:
                    # We want to count these, even if it's a dry run.
                    deleted_files += 1

                    if not dry_run:
                        os.remove(os.path.join(root, item))

                    if verbose:
                        print("Dupe", os.path.join(root, item), "found.")

                else:
                    hashList.append(hash)

                    if verbose:
                        print("New file", os.path.join(root, item))

        # Print a summary
        print()

        if dry_run:
            print("THIS IS A DRY RUN! NO FILES WILL BE ALTERED!")

        print(processed_files, "files processed,", deleted_files, "deleted.\n")

if __name__ == '__main__':
    try:
        usage = "Usage: %prog [options] {path}"
        parser = OptionParser(usage=usage)
        parser.add_option("-d", "--dry", dest='dry_run', action='store_true', default=False, help="displays a list of files dupekill will delete if you run it again without this flag")
        parser.add_option("-r", "--recursive", dest='recursive', action='store_true', default=False, help="Recurses into all directories below the starting point")
        parser.add_option("-v", "--verbose", dest='verbose', action='store_true', default=False, help="Provide more detailed output")
        (options, args) = parser.parse_args()
        if args and os.path.isdir(args[0]):
            path = os.path.abspath(args[0])
        else:
            path = os.getcwd()
        dupekill(options.dry_run, options.recursive, options.verbose, path)
    except KeyboardInterrupt:
        print("Aborted")