Source code for jfscripts.find_dupes_by_size

#! /usr/bin/env python3

from jfscripts import __version__
import argparse
import os


[docs]def check_for_duplicates(path): print(path) sizes = {} for dirpath, dirnames, filenames in os.walk(path): for filename in filenames: full_path = os.path.join(dirpath, filename) size = os.path.getsize(full_path) if size in sizes: sizes[size].append(full_path) else: sizes[size] = [full_path] count = 0 duplicate_paths = {} for size, full_paths in sizes.items(): if len(full_paths) > 1: count += 1 full_paths.sort() duplicate_paths[full_paths[0]] = full_paths for key, full_paths in sorted(duplicate_paths.items()): print('-----------------------------------------') for full_path in full_paths: print('rm -f "' + full_path + '"') print('Duplicates found: ' + str(count))
[docs]def get_parser(): """The argument parser for the command line interface. :return: A ArgumentParser object. :rtype: argparse.ArgumentParser """ parser = argparse.ArgumentParser( description='Find duplicate files by size.', ) parser.add_argument( 'path', help='A directory to recursively search for duplicate files.', ) parser.add_argument( '-V', '--version', action='version', version='%(prog)s {version}'.format(version=__version__), ) return parser
[docs]def main(): args = get_parser().parse_args() check_for_duplicates(args.path)
if __name__ == '__main__': main()