Source code for jfscripts.pdf_compress

#! /usr/bin/env python3

from jfscripts import __version__
from jfscripts import list_files
from jfscripts._utils import check_dependencies, Run, FilePath
import argparse
import multiprocessing
import os
import PyPDF2
import random
import re
import shutil
import time
import uuid


run = Run()
state = None
"""The global :class:`State` object."""

identifier = 'magick'
"""To allow better assignment of the output files."""

tmp_identifier = '{}_{}'.format(identifier, uuid.uuid1())
"""Used for the identification of temporary files."""

args = None
"""The argparse object."""

dependencies = (
    ('convert', 'imagemagick'),
    ('identify', 'imagemagick'),
    ('pdfimages', 'poppler'),
    ('pdfinfo', 'poppler'),
    'pdftk',
    'tesseract',
)


[docs]def check_threshold(value):
    """
    Check if `value` is a valid threshold value.

    :param value:
    :type value: integer or string

    :return: A normalized threshold string (`90%`)
    :rtype: string
    """
    value = re.sub(r'%$', '', str(value))
    value = int(value)
    if value < 0 or value > 100:
        message = '{} is an invalid int value. Should be 0-100'.format(value)
        raise argparse.ArgumentTypeError(message)
    return '{}%'.format(value)


[docs]def get_parser():
    """The argument parser for the command line interface.

    :return: A ArgumentParser object.
    :rtype: argparse.ArgumentParser
    """
    parser = argparse.ArgumentParser(
        description='Convert and compress PDF scans. \
        Make scans suitable for imslp.org (International Music Score Library \
        Project). See also http://imslp.org/wiki/IMSLP:Musiknoten_beisteuern. \
        The output files are monochrome bitmap images at a resolution of \
        600 dpi and the compression format CCITT group 4.',
    )

    parser.add_argument(
        '-c',
        '--colorize',
        action='store_true',
        help='Colorize the terminal output.',
    )

    parser.add_argument(
        '-m',
        '--multiprocessing',
        action='store_true',
        default=False,
        help='Use multiprocessing to run commands in parallel.',
    )

    parser.add_argument(
        '-N',
        '--no-cleanup',
        action='store_true',
        help='Don’t clean up the temporary files.',
    )

    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='Make the command line output more verbose.',
    )

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version='%(prog)s {version}'.format(version=__version__),
    )

    subcommand = parser.add_subparsers(
        dest='subcommand',
        help='Subcommand',
    )
    subcommand.required = True

    ##
    # convert
    ##

    convert_parser = subcommand.add_parser(
        'convert',
        aliases=['con', 'c'],
        description='Convert scanned images (can be many image '
        'file formats or a PDF files) in monochrome bitmap images. The '
        'resulting images are compressed using the CCITT group 4 compression.'
    )

    convert_parser_color = convert_parser.add_mutually_exclusive_group()
    convert_parser_compress = convert_parser.add_mutually_exclusive_group()

    # auto_black_white
    convert_parser_color.add_argument(
        '-a',
        '--auto-black-white',
        action='store_true',
        help='The same as “'
        '--deskew '
        '--join '
        '--ocr '
        '--pdf '
        '--resize '
        '--trim '
        '--unify”',
    )

    # auto_color
    convert_parser_color.add_argument(
        '-C',
        '--auto-color',
        action='store_true',
        help='The same as “'
        '--color '
        '--deskew '
        '--join '
        '--ocr '
        '--pdf '
        '--resize '
        '--trim '
        '--unify”',
    )

    # auto_png
    convert_parser_color.add_argument(
        '-P',
        '--auto-png',
        action='store_true',
        help='The same as “'
        '--deskew '
        '--resize '
        '--trim”',
    )

    # backup
    convert_parser.add_argument(
        '-b',
        '--backup',
        action='store_true',
        help='Backup original images (add _backup.ext to filename).',
    )

    # blur
    convert_parser.add_argument(
        '--blur',
        nargs=1,
        default=False,
        help='Blur images for better jpeg2000 compression rate.',
    )

    # border
    convert_parser.add_argument(
        '-B',
        '--border',
        action='store_true',
        help='Frame the images with a white border.',
    )

    # color
    convert_parser.add_argument(
        '-c',
        '--color',
        action='store_true',
        help='The input files are colored images.',
    )

    # deskew
    convert_parser.add_argument(
        '-d',
        '--deskew',
        action='store_true',
        help='Straighten the images.',
    )

    # enlighten_border
    convert_parser.add_argument(
        '-e',
        '--enlighten-border',
        action='store_true',
        help='Enlighten the border.',
    )

    # force
    convert_parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        help='Overwrite the output file even if it exists and it seems to be '
        'already converted.',
    )

    # join
    convert_parser.add_argument(
        '-j',
        '--join',
        action='store_true',
        help='Join single paged PDF files to one PDF file. This option takes '
        'only effect with the option --pdf.',
    )

    # ocr
    convert_parser.add_argument(
        '-o',
        '--ocr',
        action='store_true',
        default=False,
        help='Perform optical character recognition (OCR) on the input files.'
        'The output format must be PDF.',
    )

    # ocr_lanuage
    convert_parser.add_argument(
        '-l',
        '--ocr-language',
        nargs='+',
        help='Run tesseract --list-langs to get your installed languages.',
    )

    # pdf
    convert_parser.add_argument(
        '-p',
        '--pdf',
        action='store_true',
        help='Generate a PDF file.',
    )

    # png
    convert_parser.add_argument(
        '-n',
        '--png',
        action='store_true',
        help='Generate a PNG file.',
    )

    # quality
    convert_parser_compress.add_argument(
        '-q',
        '--quality',
        default=False,
        help='Compress the input images in a specific quality. The command '
        'automatically turns into the color mode.',
    )

    # resize
    convert_parser.add_argument(
        '-r',
        '--resize',
        action='store_true',
        help='Resize 200 percent.',
    )

    # threshold
    convert_parser_compress.add_argument(
        '-t',
        '--threshold',
        default='50%',
        type=check_threshold,
        help='Threshold for monochrome, black and white images, default 50 \
        percent. Colors above the threshold will be white and below will be \
        black.',
    )

    # trim
    convert_parser.add_argument(
        '-T',
        '--trim',
        action='store_true',
        help='This option removes any edges that are exactly the same color \
        as the corner pixels.',
    )

    # unify
    convert_parser.add_argument(
        '-u',
        '--unify',
        action='store_true',
        help='Unify the page size of all pages in a PDF File. The output must \
        be a joined PDF.',
    )

    convert_parser.add_argument(
        'input_files',
        help=list_files.doc_examples('%(prog)s', 'tiff'),
        nargs='+',
    )

    ##
    # extract
    ##

    extract_parser = subcommand.add_parser(
        'extract',
        aliases=['ex', 'e'],
        description='Extract images from a PDF file and export them in the '
        'TIFF format.'
    )

    extract_parser.add_argument(
        'input_files',
        metavar='input_file',
        help='A pdf file',
        nargs='+',
    )

    ##
    # join
    ##

    join_parser = subcommand.add_parser(
        'join',
        aliases=['jn', 'j'],
        description='Join the input files into a single PDF file. If the '
        'input file is not PDF file, it is converted into a monochrome CCITT '
        'Group 4 compressed PDF file.',
    )

    join_parser.add_argument(
        '-o',
        '--ocr',
        action='store_true',
        default=False,
        help='Perform optical character recognition (OCR) on the input files.',
    )

    join_parser.add_argument(
        '-l',
        '--ocr-language',
        nargs='+',
        help='Run tesseract --list-langs to get your installed languages.',
    )

    join_parser.add_argument(
        'input_files',
        help=list_files.doc_examples('%(prog)s', 'png'),
        nargs='+',
    )

    ##
    # samples
    ##

    samples_parser = subcommand.add_parser(
        'samples',
        aliases=['sp', 's'],
        description='Convert the samge image with different threshold values \
        to find the best threshold value.',
    )

    samples_parser.add_argument(
        'input_files',
        metavar='input_file',
        help='A image or a PDF file. The script selects randomly one page of \
        a multipaged PDF to build the series with differnt threshold values.',
    )

    samples_parser.add_argument(
        '-b',
        '--blur',
        action='store_true',
        help='Convert images on different blur values.',
    )

    samples_parser.add_argument(
        '-q',
        '--quality',
        action='store_true',
        help='Compress to JPEG2000 images in different quality steps.',
    )

    samples_parser.add_argument(
        '-t',
        '--threshold',
        action='store_true',
        help='Convert images on different threshold values to monochrome \
        black and white images.',
    )

    ##
    # unify
    ##

    unify_parser = subcommand.add_parser(
        'unify',
        aliases=['un', 'u'],
        description='Unify the page size of all pages in a PDF File.',
    )

    unify_parser.add_argument(
        'input_files',
        metavar='input_file',
        help='A PDF file',
    )

    # margin
    unify_parser.add_argument(
        '-m',
        '--margin',
        nargs=1,
        help='Add a margin around each page in the PDF file.',
    )

    return parser


###############################################################################
# do_* functions (alphabetically sorted)
###############################################################################

# do_magick_convert
# do_magick_convert_pdf
# do_magick_identify
# do_pdfimages
# do_pdfinfo_page_count
# do_pdftk_cat
# do_tesseract


[docs]def _do_magick_command(command):
    """ImageMagick version 7 introduces a new top level command named
    `magick`. Use this newer command if present.

    :return: A list of command segments
    """
    if shutil.which('magick'):
        return ['magick', command]
    else:
        return [command]


[docs]def _do_magick_convert_enlighten_border(width, height):
    """
    Build the command line arguments to enlighten the border in four regions.

    :param int width: The width of the image.
    :param int height: The height of the image.

    :return: Command line arguments for imagemagicks’ `convert`.
    :rtype: list
    """
    border = int(round(((width + height) / 2) * 0.05))

    # top
    # right
    # bottom
    # left
    r = ('{}x{}'.format(width - border, border),
         '{}x{}+{}'.format(border, height - border, width - border),
         '{}x{}+{}+{}'.format(width - border, border, border, height - border),
         '{}x{}+{}+{}'.format(border, height - border, 0, border))

    out = []
    for region in r:
        out += ['-region', region, '-level', '0%,30%']

    return out


[docs]def do_magick_convert(input_file, output_file, threshold=None,
                      enlighten_border=False, border=False, resize=False,
                      deskew=False, trim=False, color=False, quality=75,
                      blur=False):
    """
    Convert a input image file using the subcommand convert of the
    imagemagick suite.

    :return: The output image file.
    :rtype: jfscripts._utils.FilePath
    """

    cmd_args = _do_magick_command('convert')
    cmd_args += ['-units', 'PixelsPerInch']

    if enlighten_border:
        info_input_file = do_magick_identify(input_file)
        cmd_args += _do_magick_convert_enlighten_border(
            info_input_file['width'],
            info_input_file['height'],
        )

    if resize:
        cmd_args += ['-resize', '200%']

    if deskew:
        cmd_args += ['-deskew', '40%']

    if threshold and not color:
        cmd_args += ['-threshold', threshold]

    if trim:
        cmd_args += ['-trim', '+repage']

    if border:
        cmd_args += ['-border', '5%', '-bordercolor', '#FFFFFF']

    if blur:
        cmd_args += ['-blur', str(blur)]

    if not color:
        cmd_args += ['-compress', 'Group4', '-monochrome']
    else:
        cmd_args += ['-quality', str(quality)]

    if color and output_file.extension == 'pdf':
        cmd_args += ['-compress', 'JPEG2000']

    cmd_args += [str(input_file), str(output_file)]

    return run.run(cmd_args)


[docs]def do_magick_identify(input_file):
    """The different informations of an image.

    :param input_file: The input file.
    :type input_file: jfscripts._utils.FilePath

    :return: A directory with the keys `width`, `height` and `colors`.
    :rtype: dict
    """
    def _get_by_format(input_file, format):
        return run.check_output(_do_magick_command('identify') + ['-format',
                                format, str(input_file)]).decode('utf-8')

    return {
        'width': int(_get_by_format(input_file, '%w')),
        'height': int(_get_by_format(input_file, '%h')),
        'colors': int(_get_by_format(input_file, '%k')),
    }


[docs]def do_pdfimages(pdf_file, state, page_number=None, use_tmp_identifier=True):
    """Convert a PDF file to images in the TIFF format.

    :param pdf_file: The input file.
    :type pdf_file: jfscripts._utils.FilePath
    :param state: The state object.
    :type state: jfscripts.pdf_compress.State
    :param int page_number: Extract only the page with a specific page number.

    :return: The return value of `subprocess.run`.
    :rtype: subprocess.CompletedProcess
    """
    if use_tmp_identifier:
        image_root = '{}_{}'.format(pdf_file.basename, tmp_identifier)
    else:
        image_root = pdf_file.basename

    command = ['pdfimages', '-tiff', str(pdf_file), image_root]

    if page_number:
        page_number = str(page_number)
        page_segments = ['-f', page_number, '-l', page_number]
        command = command[:2] + page_segments + command[2:]
    return run.run(command, cwd=state.common_path)


[docs]def do_pdfinfo_page_count(pdf_file):
    """Get the amount of pages a PDF files have.

    :param str pdf_file: Path of the PDF file.

    :return: Page count
    :rtype: int
    """
    output = run.check_output(['pdfinfo', str(pdf_file)], encoding='utf-8')
    page_count = re.search(r'Pages:\s*([0-9]*)', output)
    return int(page_count.group(1))


[docs]def do_pdftk_cat(pdf_files, state):
    """Join a list of PDF files into a single PDF file using the tool `pdftk`.

    :param list pdf_files: a list of PDF files
    :param state: The state object.
    :type state: jfscripts.pdf_compress.State

    :return: None
    """
    cmd = ['pdftk']

    pdf_file_paths = map(lambda pdf_file: str(pdf_file), pdf_files)
    cmd += pdf_file_paths

    output_file_path = os.path.join(
        state.common_path,
        '{}_magick.pdf'.format(state.first_input_file.basename)
    )
    cmd += ['cat', 'output', output_file_path]

    result = run.run(cmd)
    if result.returncode == 0:
        print('Successfully created: {}'.format(output_file_path))


[docs]def do_tesseract(input_file, languages=['deu', 'eng']):
    cmd_args = ['tesseract']
    if languages:
        cmd_args += ['-l', '+'.join(languages)]
    cmd_args += [str(input_file), input_file.base, 'pdf']
    return run.run(cmd_args, stderr=run.PIPE, stdout=run.PIPE)


###############################################################################
#
###############################################################################

[docs]def collect_images(state):
    """Collection all images using the temporary identifier in a common path.

    :param state: The state object.
    :type state: jfscripts.pdf_compress.State

    :return: A sorted list of image paths.
    :rtype: list
    """
    prefix = state.common_path
    out = []
    for input_file in os.listdir(prefix):
        if tmp_identifier in input_file and \
           os.path.getsize(os.path.join(prefix, input_file)) > 200:
            out.append(os.path.join(prefix, input_file))
    out.sort()
    return out


[docs]def cleanup(state):
    """Delete all images using the temporary identifier in a common path.

    :param state: The state object.
    :type state: jfscripts.pdf_compress.State

    :return: None"""

    for work_file in os.listdir(state.common_path):
        if tmp_identifier in work_file:
            os.remove(os.path.join(state.common_path, work_file))


[docs]def unify_page_size(input_file, output_file, margin=0):
    input_file = open(str(input_file), 'rb')
    input_pdf = PyPDF2.PdfFileReader(input_file)

    output_pdf = PyPDF2.PdfFileWriter()

    max_width = 0
    max_height = 0
    for page in input_pdf.pages:
        width = page.mediaBox.getWidth()
        height = page.mediaBox.getHeight()

        if width > max_width:
            max_width = width

        if height > max_height:
            max_height = height

    for page in input_pdf.pages:
        width = page.mediaBox.getWidth()
        height = page.mediaBox.getHeight()

        blank = PyPDF2.pdf.PageObject.createBlankPage(
            None,
            max_width + 2 * margin,
            max_height + 2 * margin,
        )
        blank.mergeScaledTranslatedPage(
                page,
                1,
                margin + (max_width - width) / 2,
                margin + (max_height - height) / 2,
        )
        output_pdf.addPage(blank)

    output_file = open(str(output_file), 'wb')
    output_pdf.write(output_file)


###############################################################################
# subcommand wrapper functions
###############################################################################


[docs]def subcommand_convert_file(arguments):
    """Manipulate one input file

    :param tuple arguments: A tuple containing two elements: The first element
      is the input_file file object and the second element is the state object.
    """
    input_file = arguments[0]

    if args.color:
        intermediate_extension = 'jp2'
    else:
        intermediate_extension = 'tiff'

    if args.pdf:
        extension = 'pdf'
    elif args.auto_png or args.png:
        extension = 'png'
    else:
        extension = intermediate_extension

    if args.ocr:
        extension = intermediate_extension

    if not args.join:
        output_file = input_file.new(extension=extension,
                                     del_substring='_' + tmp_identifier)
    else:
        output_file = input_file.new(extension=extension)

    if input_file == output_file:
        info_output_file = do_magick_identify(output_file)
        if info_output_file['colors'] == 2 and not args.force:
            print('The output file has already been converted.')
            return output_file

        if args.backup:
            backup = input_file.new(append='_backup')
            shutil.copy2(str(input_file), str(backup))

    completed_process = do_magick_convert(
        input_file,
        output_file,
        threshold=args.threshold,
        enlighten_border=args.enlighten_border,
        border=args.border,
        resize=args.resize,
        deskew=args.deskew,
        trim=args.trim,
        color=args.color,
        quality=args.quality,
        blur=args.blur,
    )

    if completed_process.returncode != 0:
        raise RuntimeError('magick convert failed.')

    if args.ocr:
        if output_file.extension not in ['tiff', 'jp2']:
            raise RuntimeError('Tesseract needs a tiff or a jp2 file as '
                               'input.')
        completed_process = do_tesseract(output_file, args.ocr_language)
        if completed_process.returncode != 0:
            raise RuntimeError('tesseract failed.')
        os.remove(str(output_file))
        output_file = output_file.new(extension='pdf')

    return output_file


[docs]def subcommand_join_convert_pdf(arguments):
    input_file = arguments[0]
    if args.ocr:
        extension = 'tiff'
    else:
        extension = 'pdf'

    output_file = input_file.new(extension=extension)
    process = do_magick_convert(input_file, output_file)
    if process.returncode != 0:
        raise RuntimeError('join: convert to pdf failed.')

    if args.ocr:
        process = do_tesseract(output_file)
        if process.returncode != 0:
            raise RuntimeError('join: ocr failed.')
        os.remove(str(output_file))
        output_file = output_file.new(extension='pdf')

    return output_file


[docs]def subcommand_samples(input_file, state):
    """Generate a list of example files with different threshold values.

    :param input_file: The input file.
    :type input_file: jfscripts._utils.FilePath
    :param state: The state object.
    :type state: jfscripts.pdf_compress.State

    :return: None
    """

    args = state.args

    def fix_output_path(output_file):
        output_file = str(output_file).replace('_-000', '')
        return FilePath(output_file, absolute=True)

    if state.input_is_pdf:
        page_count = do_pdfinfo_page_count(input_file)
        page_number = random.randint(1, page_count)
        print('Used page number {} of {} pages to generate a series of images '
              'with different threshold values.'
              .format(page_number, page_count))
        do_pdfimages(input_file, state, page_number)
        images = collect_images(state)
        input_file = FilePath(images[0], absolute=True)

    if args.threshold:
        for threshold in range(40, 100, 5):
            appendix = '_threshold-{}'.format(threshold)
            output_file = input_file.new(extension='tiff', append=appendix,
                                         del_substring=tmp_identifier)
            output_file = str(output_file).replace('_-000', '')
            do_magick_convert(input_file, fix_output_path(output_file),
                              threshold='{}%'.format(threshold))

    if args.quality:
        for quality in range(40, 100, 5):
            appendix = '_quality-{}'.format(quality)
            output_file = input_file.new(extension='pdf', append=appendix,
                                         del_substring=tmp_identifier)
            do_magick_convert(input_file, fix_output_path(output_file),
                              color=True, quality=quality)

    if args.blur:
        for blur in (1, 2, 3, 4, 5):
            appendix = '_blur-{}'.format(blur)
            output_file = input_file.new(extension='pdf', append=appendix,
                                         del_substring=tmp_identifier)
            do_magick_convert(input_file, fix_output_path(output_file),
                              color=True, blur=blur, quality=100)

###############################################################################
#
###############################################################################


[docs]class Timer(object):
    """Class to calculate the execution time. Mainly to test the speed
    improvements of the multiprocessing implementation."""

    def __init__(self):
        self.end = None
        """UNIX timestamp the execution ended."""
        self.begin = self.end = time.time()
        """UNIX timestamp the execution began."""

[docs]    def stop(self):
        """Stop the time calculation and return the formated result.

        :return: The result
        :rtype: str
        """
        self.end = time.time()
        return '{:.1f}s'.format(self.end - self.begin)


[docs]class State(object):
    """This object holds runtime data for the multiprocessing environment."""

    def __init__(self, args):
        self.args = args
        """argparse arguments"""

        self.cwd = os.getcwd()
        """The current working directory"""

        self.input_files = []
        """A list of all input files."""
        if isinstance(self.args.input_files, str):
            self.input_files = [self.args.input_files]
        else:
            self.input_files = list_files.list_files(self.args.input_files)

        self.common_path = \
            list_files.common_path(self.input_files)
        """The common path prefix of all input files."""

        if self.common_path == '':
            self.common_path = self.cwd
        self.first_input_file = FilePath(self.input_files[0], absolute=True)
        """The first input file."""

        self.input_is_pdf = False
        """Boolean that indicates if the first file is a pdf."""

        if self.first_input_file.extension.lower() == 'pdf':
            self.input_is_pdf = True


[docs]def convert_file_paths(files):
    """Convert a list of file paths in a list of
    :class:`jfscripts._utils.FilePath` objects.

    :param list files: A list of file paths

    :return: a list of  :class:`jfscripts._utils.FilePath` objects.
    """
    out = []
    for f in files:
        out.append(FilePath(f, absolute=True))
    return out


[docs]def main():
    """Main function.

    :return: None
    """
    timer = Timer()
    global args
    args = get_parser().parse_args()

    run.setup(verbose=args.verbose, colorize=args.colorize)
    global state
    state = State(args)

    check_dependencies(*dependencies)

    ##
    # convert
    ##

    if args.subcommand in ['convert', 'cv', 'c']:

        if args.join and not args.pdf:
            args.pdf = True

        if args.auto_black_white or args.auto_color:
            args.deskew = True
            args.join = True
            args.ocr = True
            args.pdf = True
            args.trim = True
            args.unify = True

        if args.auto_black_white:
            args.resize = True

        if args.auto_png:
            args.deskew = True
            args.trim = True
            args.resize = True

        if args.auto_color:
            args.color = True

        if args.quality and not args.color:
            args.color = True

        if args.color and not args.quality:
            args.quality = 75

        if args.blur:
            args.blur = args.blur[0]

        if state.first_input_file.extension == 'pdf':
            if len(state.input_files) > 1:
                raise ValueError('Specify only one PDF file.')
            do_pdfimages(state.first_input_file, state)
            input_files = collect_images(state)
        else:
            input_files = state.input_files

        input_files = convert_file_paths(input_files)

        if args.multiprocessing:
            pool = multiprocessing.Pool()
            data = []
            for input_file in input_files:
                data.append((input_file, state))
            output_files = pool.map(subcommand_convert_file, data)
        else:
            output_files = []
            for input_file in input_files:
                output_files.append(
                    subcommand_convert_file((input_file, state))
                )

        if args.join:
            do_pdftk_cat(output_files, state)

        if args.unify and len(state.input_files) > 1:
            unify_page_size(
                state.first_input_file.new(append='_magick'),
                state.first_input_file.new(append='_unifed'),
                margin=0,
            )

        if not args.no_cleanup:
            cleanup(state)

    ##
    # extract
    ##

    elif args.subcommand in ['extract', 'ex', 'e']:
        if not state.input_is_pdf:
            raise ValueError('Specify a PDF file.')
        do_pdfimages(state.first_input_file, state, page_number=None,
                     use_tmp_identifier=False)

    ##
    # join
    ##

    elif args.subcommand in ['join', 'jn', 'j']:
        input_files = convert_file_paths(state.input_files)
        if args.multiprocessing:
            pool = multiprocessing.Pool()
            data = []
            for input_file in input_files:
                data.append((input_file, state))
            files_converted = pool.map(subcommand_join_convert_pdf, data)
        else:
            files_converted = []
            for input_file in input_files:
                files_converted.append(
                    subcommand_join_convert_pdf((input_file, state))
                )
        do_pdftk_cat(files_converted, state)

    ##
    # samples
    ##

    elif args.subcommand in ['samples', 'sp', 's']:
        if args.blur == args.quality == \
           args.threshold is False:
            args.blur = True
            args.quality = True
            args.threshold = True

        subcommand_samples(state.first_input_file, state)
        if not args.no_cleanup:
            cleanup(state)

    ##
    # unify
    ##

    elif args.subcommand in ['unify', 'un', 'u']:
        if state.first_input_file.extension != 'pdf':
            raise ValueError('Specify a PDF file.')

        if len(state.input_files) > 1:
            raise ValueError('Specify only one PDF file.')

        if args.margin:
            margin = int(args.margin[0])
        else:
            margin = 0

        unify_page_size(
            state.first_input_file,
            state.first_input_file.new(append='_unifed'),
            margin,
        )

    print('Execution time: {}'.format(timer.stop()))


if __name__ == '__main__':
    main()