aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzlg <zlg@zlg.space>2011-10-19 04:33:36 -0500
committerzlg <zlg@zlg.space>2018-06-25 21:40:13 -0700
commit857343790af8290803deb5b1edd46633e8fa347a (patch)
treed7cab9590eaf0a11471ff34491b064a214e318ac
downloaddupekill-857343790af8290803deb5b1edd46633e8fa347a.tar.gz
dupekill-857343790af8290803deb5b1edd46633e8fa347a.tar.bz2
dupekill-857343790af8290803deb5b1edd46633e8fa347a.tar.xz
dupekill-857343790af8290803deb5b1edd46633e8fa347a.zip
Initial commit
-rw-r--r--README.mdown14
-rwxr-xr-xdupekill102
2 files changed, 116 insertions, 0 deletions
diff --git a/README.mdown b/README.mdown
new file mode 100644
index 0000000..88c2356
--- /dev/null
+++ b/README.mdown
@@ -0,0 +1,14 @@
+# dupekill -- deletes duplicates of existing data
+
+dupekill is a simple dupe finder (and deleter). It comes with a few nifty flags:
+
+* `-d | --dry`: Provides a preview of what you will do if the `-d` flag is
+ removed.
+* `-r | --recursive`: Descends into all subdirectories below the starting
+ point.
+* `-v | --verbose`: Provide more detail.
+
+# Roadmap
+
+* Create options for file preference, e.g. "if two files are the same, prefer
+ the older one".
diff --git a/dupekill b/dupekill
new file mode 100755
index 0000000..91a059b
--- /dev/null
+++ b/dupekill
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+import os
+import hashlib
+import sys
+import stat
+from optparse import OptionParser
+
+# dupekill - deletes duplicates of existing data
+# version 1.2
+# written by zlg <zlg@zlg.space>
+# and NF <radicalmori@gmail.com>
+#
+# licensed under the...
+#
+# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+# Version 2, December 2004
+#
+# Copyright (C) 2004 Sam Hocevar
+# 14 rue de Plaisance, 75014 Paris, France
+# Everyone is permitted to copy and distribute verbatim or modified
+# copies of this license document, and changing it is allowed as long
+# as the name is changed.
+#
+# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+#
+# 0. You just DO WHAT THE FUCK YOU WANT TO.
+#
+# This program is free software. It comes without any warranty, to
+# the extent permitted by applicable law.
+#
+# You have been warned. >:3
+
+def dupekill():
+ usage = "Usage: %prog [options] {path}"
+ parser = OptionParser(usage=usage)
+ parser.add_option("-d", "--dry", dest='dry_run', action='store_true', default=False, help="displays a list of files dupekill will delete if you run it again without this flag")
+ parser.add_option("-r", "--recursive", dest='recursive', action='store_true', default=False, help="Recurses into all directories below the starting point")
+ parser.add_option("-v", "--verbose", dest='verbose', action='store_true', default=False, help="Provide more detailed output")
+ (options, args) = parser.parse_args()
+ if args and os.path.isdir(args[0]):
+ path = os.path.abspath(args[0])
+ else:
+ path = os.getcwd()
+
+ if not os.path.isdir(path):
+ print("Error: Unable to fetch directory to work with.")
+ sys.exit(1)
+ else:
+ # Create the generator, create the hash list and the counters.
+ file_list = os.walk(path)
+ hashList = []
+ processed_files = 0
+ deleted_files = 0
+
+ for root, dirs, file in file_list:
+ ignore_dirs = ['.git', '.config']
+ for dir in ignore_dirs:
+ if options.recursive == True:
+ # Recursion still needs to ignore certain dirs
+ if dir in dirs:
+ dirs.remove(dir)
+
+ else:
+ # While no recursion doesn't need _any_ dirs!
+ while dirs:
+ dirs.pop()
+
+ for item in file:
+ checkedFile = open(os.path.join(root, item), "rb").read()
+ hash = hashlib.sha256(checkedFile).hexdigest()
+ processed_files += 1
+
+ if len(hashList) > 0 and hash in hashList:
+ # We want to count these, even if it's a dry run.
+ deleted_files += 1
+
+ if not options.dry_run:
+ os.remove(os.path.join(root, item))
+
+ if options.verbose:
+ print("Dupe", os.path.join(root, item), "found.")
+
+ else:
+ hashList.append(hash)
+
+ if options.verbose:
+ print("New file", os.path.join(root, item))
+
+ # Print a summary
+ print()
+
+ if options.dry_run:
+ print("THIS IS A DRY RUN! NO FILES WILL BE ALTERED!")
+
+ print(processed_files, "files processed,", deleted_files, "deleted.\n")
+
+if __name__ == '__main__':
+ try:
+ dupekill()
+ except KeyboardInterrupt:
+ print("Aborted")