From 857343790af8290803deb5b1edd46633e8fa347a Mon Sep 17 00:00:00 2001 From: zlg Date: Wed, 19 Oct 2011 04:33:36 -0500 Subject: Initial commit --- README.mdown | 14 ++++++++ dupekill | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 README.mdown create mode 100755 dupekill diff --git a/README.mdown b/README.mdown new file mode 100644 index 0000000..88c2356 --- /dev/null +++ b/README.mdown @@ -0,0 +1,14 @@ +# dupekill -- deletes duplicates of existing data + +dupekill is a simple dupe finder (and deleter). It comes with a few nifty flags: + +* `-d | --dry`: Provides a preview of what you will do if the `-d` flag is + removed. +* `-r | --recursive`: Descends into all subdirectories below the starting + point. +* `-v | --verbose`: Provide more detail. + +# Roadmap + +* Create options for file preference, e.g. "if two files are the same, prefer + the older one". diff --git a/dupekill b/dupekill new file mode 100755 index 0000000..91a059b --- /dev/null +++ b/dupekill @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +import os +import hashlib +import sys +import stat +from optparse import OptionParser + +# dupekill - deletes duplicates of existing data +# version 1.2 +# written by zlg +# and NF +# +# licensed under the... +# +# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +# Version 2, December 2004 +# +# Copyright (C) 2004 Sam Hocevar +# 14 rue de Plaisance, 75014 Paris, France +# Everyone is permitted to copy and distribute verbatim or modified +# copies of this license document, and changing it is allowed as long +# as the name is changed. +# +# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +# +# 0. You just DO WHAT THE FUCK YOU WANT TO. +# +# This program is free software. It comes without any warranty, to +# the extent permitted by applicable law. +# +# You have been warned. >:3 + +def dupekill(): + usage = "Usage: %prog [options] {path}" + parser = OptionParser(usage=usage) + parser.add_option("-d", "--dry", dest='dry_run', action='store_true', default=False, help="displays a list of files dupekill will delete if you run it again without this flag") + parser.add_option("-r", "--recursive", dest='recursive', action='store_true', default=False, help="Recurses into all directories below the starting point") + parser.add_option("-v", "--verbose", dest='verbose', action='store_true', default=False, help="Provide more detailed output") + (options, args) = parser.parse_args() + if args and os.path.isdir(args[0]): + path = os.path.abspath(args[0]) + else: + path = os.getcwd() + + if not os.path.isdir(path): + print("Error: Unable to fetch directory to work with.") + sys.exit(1) + else: + # Create the generator, create the hash list and the counters. + file_list = os.walk(path) + hashList = [] + processed_files = 0 + deleted_files = 0 + + for root, dirs, file in file_list: + ignore_dirs = ['.git', '.config'] + for dir in ignore_dirs: + if options.recursive == True: + # Recursion still needs to ignore certain dirs + if dir in dirs: + dirs.remove(dir) + + else: + # While no recursion doesn't need _any_ dirs! + while dirs: + dirs.pop() + + for item in file: + checkedFile = open(os.path.join(root, item), "rb").read() + hash = hashlib.sha256(checkedFile).hexdigest() + processed_files += 1 + + if len(hashList) > 0 and hash in hashList: + # We want to count these, even if it's a dry run. + deleted_files += 1 + + if not options.dry_run: + os.remove(os.path.join(root, item)) + + if options.verbose: + print("Dupe", os.path.join(root, item), "found.") + + else: + hashList.append(hash) + + if options.verbose: + print("New file", os.path.join(root, item)) + + # Print a summary + print() + + if options.dry_run: + print("THIS IS A DRY RUN! NO FILES WILL BE ALTERED!") + + print(processed_files, "files processed,", deleted_files, "deleted.\n") + +if __name__ == '__main__': + try: + dupekill() + except KeyboardInterrupt: + print("Aborted") -- cgit v1.2.3-54-g00ecf