From ee091bca816849b42866a387f0925b3a65578d89 Mon Sep 17 00:00:00 2001 From: zlg Date: Mon, 25 Jun 2018 22:17:41 -0700 Subject: dupekill: Skip files that can't be accessed Also included checks for dead symlinks and network links that won't resolve. Added -a, which will display all processed files. The -v flag will only output dupes, errors, and notices. --- TODO | 3 +++ dupekill | 37 +++++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000..81cd368 --- /dev/null +++ b/TODO @@ -0,0 +1,3 @@ +dupekill +-------- +* Find a way to indicate which file a dupe is a copy of. diff --git a/dupekill b/dupekill index 79d5c04..19d9123 100755 --- a/dupekill +++ b/dupekill @@ -44,7 +44,7 @@ def need_to_add(filepath, datalist): return found != 1 -def dupekill(dry_run=False, recursive=False, verbose=False, path=os.getcwd()): +def dupekill(dry_run=False, recursive=False, verbose=False, all_files=False, path=os.getcwd()): if not os.path.isdir(path): print("Error: Unable to fetch directory to work with.") @@ -72,8 +72,36 @@ def dupekill(dry_run=False, recursive=False, verbose=False, path=os.getcwd()): for item in file: # Set up a few variables we'll be needing. filepath = os.path.join(root, item) - filesize = os.stat(filepath).st_size + + # Check for information. If we can't fetch any, there's likely + # a dead symlink or something else wrong. + try: + filesize = os.stat(filepath).st_size + # This occurs when the data points to something that can't be found or + # resolved. + except IOError: + if verbose: + print("NOT FOUND:", filepath) + raise + break + + # This occurs mostly with symlinks. + except OSError: + if verbose: + print("DEAD LINK:", filepath) + raise + break + deleted = False # We need this flag to determine state before adding to the list + if not os.access(filepath, os.R_OK): + if verbose: + print("NOTICE: Cannot read from", filepath) + + break + + if not os.access(filepath, os.W_OK): + if verbose: + print("NOTICE: Cannot write to", filepath) # Funny, processed_files will always equal the index of the file # in our list. :D We might not need it, though @@ -140,7 +168,7 @@ def dupekill(dry_run=False, recursive=False, verbose=False, path=os.getcwd()): if need_to_add(filepath, hashList) and not deleted: hashList.append([filepath, filesize]) - if verbose: + if all_files: print("FILE:", filepath) print() @@ -158,11 +186,12 @@ if __name__ == '__main__': parser.add_option("-d", "--dry", dest='dry_run', action='store_true', default=False, help="displays a list of files dupekill will delete if you run it again without this flag") parser.add_option("-r", "--recursive", dest='recursive', action='store_true', default=False, help="Recurses into all directories below the starting point") parser.add_option("-v", "--verbose", dest='verbose', action='store_true', default=False, help="Provide more detailed output") + parser.add_option("-a", "--all-files", dest='all_files', action='store_true', default=False, help="Show all processed files, not just dupes and errors") (options, args) = parser.parse_args() if args and os.path.isdir(args[0]): path = os.path.abspath(args[0]) else: path = os.getcwd() - dupekill(options.dry_run, options.recursive, options.verbose, path) + dupekill(options.dry_run, options.recursive, options.verbose, options.all_files, path) except KeyboardInterrupt: print("Aborted") -- cgit v1.2.3-54-g00ecf