aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzlg <zlg@zlg.space>2018-06-25 22:17:41 -0700
committerzlg <zlg@zlg.space>2018-06-25 22:19:51 -0700
commitee091bca816849b42866a387f0925b3a65578d89 (patch)
tree659e8b0cba44ec5fef5fc5525d2cb4ae02b82121
parentCorrected dupekill 1.3 tag (diff)
downloaddupekill-ee091bca816849b42866a387f0925b3a65578d89.tar.gz
dupekill-ee091bca816849b42866a387f0925b3a65578d89.tar.bz2
dupekill-ee091bca816849b42866a387f0925b3a65578d89.tar.xz
dupekill-ee091bca816849b42866a387f0925b3a65578d89.zip
dupekill: Skip files that can't be accessed
Also included checks for dead symlinks and network links that won't resolve. Added -a, which will display all processed files. The -v flag will only output dupes, errors, and notices.
-rw-r--r--TODO3
-rwxr-xr-xdupekill37
2 files changed, 36 insertions, 4 deletions
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..81cd368
--- /dev/null
+++ b/TODO
@@ -0,0 +1,3 @@
+dupekill
+--------
+* Find a way to indicate which file a dupe is a copy of.
diff --git a/dupekill b/dupekill
index 79d5c04..19d9123 100755
--- a/dupekill
+++ b/dupekill
@@ -44,7 +44,7 @@ def need_to_add(filepath, datalist):
return found != 1
-def dupekill(dry_run=False, recursive=False, verbose=False, path=os.getcwd()):
+def dupekill(dry_run=False, recursive=False, verbose=False, all_files=False, path=os.getcwd()):
if not os.path.isdir(path):
print("Error: Unable to fetch directory to work with.")
@@ -72,8 +72,36 @@ def dupekill(dry_run=False, recursive=False, verbose=False, path=os.getcwd()):
for item in file:
# Set up a few variables we'll be needing.
filepath = os.path.join(root, item)
- filesize = os.stat(filepath).st_size
+
+ # Check for information. If we can't fetch any, there's likely
+ # a dead symlink or something else wrong.
+ try:
+ filesize = os.stat(filepath).st_size
+ # This occurs when the data points to something that can't be found or
+ # resolved.
+ except IOError:
+ if verbose:
+ print("NOT FOUND:", filepath)
+ raise
+ break
+
+ # This occurs mostly with symlinks.
+ except OSError:
+ if verbose:
+ print("DEAD LINK:", filepath)
+ raise
+ break
+
deleted = False # We need this flag to determine state before adding to the list
+ if not os.access(filepath, os.R_OK):
+ if verbose:
+ print("NOTICE: Cannot read from", filepath)
+
+ break
+
+ if not os.access(filepath, os.W_OK):
+ if verbose:
+ print("NOTICE: Cannot write to", filepath)
# Funny, processed_files will always equal the index of the file
# in our list. :D We might not need it, though
@@ -140,7 +168,7 @@ def dupekill(dry_run=False, recursive=False, verbose=False, path=os.getcwd()):
if need_to_add(filepath, hashList) and not deleted:
hashList.append([filepath, filesize])
- if verbose:
+ if all_files:
print("FILE:", filepath)
print()
@@ -158,11 +186,12 @@ if __name__ == '__main__':
parser.add_option("-d", "--dry", dest='dry_run', action='store_true', default=False, help="displays a list of files dupekill will delete if you run it again without this flag")
parser.add_option("-r", "--recursive", dest='recursive', action='store_true', default=False, help="Recurses into all directories below the starting point")
parser.add_option("-v", "--verbose", dest='verbose', action='store_true', default=False, help="Provide more detailed output")
+ parser.add_option("-a", "--all-files", dest='all_files', action='store_true', default=False, help="Show all processed files, not just dupes and errors")
(options, args) = parser.parse_args()
if args and os.path.isdir(args[0]):
path = os.path.abspath(args[0])
else:
path = os.getcwd()
- dupekill(options.dry_run, options.recursive, options.verbose, path)
+ dupekill(options.dry_run, options.recursive, options.verbose, options.all_files, path)
except KeyboardInterrupt:
print("Aborted")