aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LICENSE7
-rw-r--r--TODO2
-rwxr-xr-xdupekill77
3 files changed, 41 insertions, 45 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..3afa8d5
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,7 @@
+ DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. You just DO WHAT THE FUCK YOU WANT TO.
+
+This program is free software. It comes without any warranty, to the extent
+permitted by applicable law.
diff --git a/TODO b/TODO
index 81cd368..cbbeb7c 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1 @@
-dupekill
---------
* Find a way to indicate which file a dupe is a copy of.
diff --git a/dupekill b/dupekill
index 19d9123..e8521ac 100755
--- a/dupekill
+++ b/dupekill
@@ -6,33 +6,14 @@ import stat
from optparse import OptionParser
# dupekill - deletes duplicates of existing data
-# version 1.2
-# written by zlg <zlg@zlg.space>
-# and NF <radicalmori@gmail.com>
-#
-# licensed under the...
-#
-# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
-# Version 2, December 2004
-#
-# Copyright (C) 2004 Sam Hocevar
-# 14 rue de Plaisance, 75014 Paris, France
-# Everyone is permitted to copy and distribute verbatim or modified
-# copies of this license document, and changing it is allowed as long
-# as the name is changed.
-#
-# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
-# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-#
-# 0. You just DO WHAT THE FUCK YOU WANT TO.
-#
-# This program is free software. It comes without any warranty, to
-# the extent permitted by applicable law.
-#
-# You have been warned. >:3
+# Version 1.4 (2012-04-02)
+# Written by zlg <zlg@zlg.space>
+# Original idea and code by NF <radicalmori@gmail.com>
+# License: WTFPL <http://sam.zoy.org/wtfpl>
# This function determines whether or not the file needs to be added
# to the list of files in the data list.
+
def need_to_add(filepath, datalist):
found = 0
if len(datalist) == 0:
@@ -44,7 +25,11 @@ def need_to_add(filepath, datalist):
return found != 1
-def dupekill(dry_run=False, recursive=False, verbose=False, all_files=False, path=os.getcwd()):
+def dupekill(dry_run=False, recursive=False, verbose=False, all_files=False, ignore_links=False, path=os.getcwd()):
+
+ if all_files and verbose:
+ print("Error: All operations (-a) or only important ones (-v), not both.")
+ sys.exit(1)
if not os.path.isdir(path):
print("Error: Unable to fetch directory to work with.")
@@ -76,31 +61,33 @@ def dupekill(dry_run=False, recursive=False, verbose=False, all_files=False, pat
# Check for information. If we can't fetch any, there's likely
# a dead symlink or something else wrong.
try:
- filesize = os.stat(filepath).st_size
+ filesize = os.lstat(filepath).st_size
# This occurs when the data points to something that can't be found or
# resolved.
except IOError:
- if verbose:
+ if verbose or all_files:
print("NOT FOUND:", filepath)
- raise
- break
+ continue
# This occurs mostly with symlinks.
except OSError:
- if verbose:
+ if verbose or all_files:
print("DEAD LINK:", filepath)
- raise
- break
+ continue
+
+ if ignore_links:
+ if os.lstat(filepath).st_mode == 41471:
+ continue
deleted = False # We need this flag to determine state before adding to the list
if not os.access(filepath, os.R_OK):
- if verbose:
+ if verbose or all_files:
print("NOTICE: Cannot read from", filepath)
- break
+ continue
if not os.access(filepath, os.W_OK):
- if verbose:
+ if verbose or all_files:
print("NOTICE: Cannot write to", filepath)
# Funny, processed_files will always equal the index of the file
@@ -138,7 +125,7 @@ def dupekill(dry_run=False, recursive=False, verbose=False, all_files=False, pat
if not dry_run:
os.remove(filepath)
- if verbose:
+ if verbose or all_files:
print("DUPE:", filepath)
break
else:
@@ -161,7 +148,7 @@ def dupekill(dry_run=False, recursive=False, verbose=False, all_files=False, pat
if not dry_run:
os.remove(filepath)
- if verbose:
+ if verbose or all_files:
print("DUPE:", filepath)
break
@@ -182,16 +169,20 @@ def dupekill(dry_run=False, recursive=False, verbose=False, all_files=False, pat
if __name__ == '__main__':
try:
usage = "Usage: %prog [options] {path}"
- parser = OptionParser(usage=usage)
- parser.add_option("-d", "--dry", dest='dry_run', action='store_true', default=False, help="displays a list of files dupekill will delete if you run it again without this flag")
- parser.add_option("-r", "--recursive", dest='recursive', action='store_true', default=False, help="Recurses into all directories below the starting point")
- parser.add_option("-v", "--verbose", dest='verbose', action='store_true', default=False, help="Provide more detailed output")
- parser.add_option("-a", "--all-files", dest='all_files', action='store_true', default=False, help="Show all processed files, not just dupes and errors")
+ description = "Deletes files that have duplicate data in them"
+ epilog = "dupekill likes to munch on files. A lot. By default, symlinks and hardlinks that point to the same file will be deleted. Be careful!"
+ version = "%prog version 1.5 (2012-04-05)"
+ parser = OptionParser(usage=usage, description=description, epilog=epilog, version=version)
+ parser.add_option("-d", "--dry", dest='dry_run', action='store_true', default=False, help="don't delete any files")
+ parser.add_option("-r", "--recursive", dest='recursive', action='store_true', default=False, help="recurse into all directories below the current directory")
+ parser.add_option("-v", "--verbose", dest='verbose', action='store_true', default=False, help="provide more detailed output")
+ parser.add_option("-a", "--all-files", dest='all_files', action='store_true', default=False, help="show all processed files, not just dupes and errors")
+ parser.add_option("-i", "--ignore-links", dest="ignore_links", action='store_true', default=False, help="don't process symlinks")
(options, args) = parser.parse_args()
if args and os.path.isdir(args[0]):
path = os.path.abspath(args[0])
else:
path = os.getcwd()
- dupekill(options.dry_run, options.recursive, options.verbose, options.all_files, path)
+ dupekill(options.dry_run, options.recursive, options.verbose, options.all_files, options.ignore_links, path)
except KeyboardInterrupt:
print("Aborted")