#! /usr/bin/env python3
from jfscripts import __version__
import argparse
import os
[docs]def check_for_duplicates(path):
print(path)
sizes = {}
for dirpath, dirnames, filenames in os.walk(path):
for filename in filenames:
full_path = os.path.join(dirpath, filename)
size = os.path.getsize(full_path)
if size in sizes:
sizes[size].append(full_path)
else:
sizes[size] = [full_path]
count = 0
duplicate_paths = {}
for size, full_paths in sizes.items():
if len(full_paths) > 1:
count += 1
full_paths.sort()
duplicate_paths[full_paths[0]] = full_paths
for key, full_paths in sorted(duplicate_paths.items()):
print('-----------------------------------------')
for full_path in full_paths:
print('rm -f "' + full_path + '"')
print('Duplicates found: ' + str(count))
[docs]def get_parser():
"""The argument parser for the command line interface.
:return: A ArgumentParser object.
:rtype: argparse.ArgumentParser
"""
parser = argparse.ArgumentParser(
description='Find duplicate files by size.',
)
parser.add_argument(
'path',
help='A directory to recursively search for duplicate files.',
)
parser.add_argument(
'-V',
'--version',
action='version',
version='%(prog)s {version}'.format(version=__version__),
)
return parser
[docs]def main():
args = get_parser().parse_args()
check_for_duplicates(args.path)
if __name__ == '__main__':
main()