FreedomBox/doc/scripts/post-processor

#!/usr/bin/python3
# SPDX-License-Identifier: AGPL-3.0-or-later

import argparse
import xml.etree.ElementTree as etree


def parse_arguments():
    """Return parsed command line arguments as dictionary."""
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(dest='subcommand', help='Sub command')

    subparser = subparsers.add_parser(
        'remove-footer', help='Remove footer from the XML document')
    subparser.add_argument('filename', help='Name of the XML file')

    subparser = subparsers.add_parser('fix-wiki-urls',
                                      help='Fix wrongly formatted wiki urls')
    subparser.add_argument('filename', help='Name of the Docbook file')

    subparsers.required = True
    return parser.parse_args()


def subcommand_fix_wiki_urls(arguments):
    """ Rebase the wiki urls.

    The base for relative links in Debian wiki is the wiki's URL but our
    extraction process rebases the relative links to the page containing
    the link. This function fixes that.
    """
    WIKI_URL = 'https://wiki.debian.org'
    _, lang, file_name = arguments.filename.split('/')
    page_name = file_name.split('.')[0]

    with open(arguments.filename, 'r') as xml_file:
        lines = xml_file.readlines()

    if 'en' == lang:
        pattern = f'{WIKI_URL}/FreedomBox/Manual/{page_name}'
    else:
        pattern = f'{WIKI_URL}/{lang}/FreedomBox/Manual/{page_name}'
    lines = list(map(lambda s: s.replace(pattern, f'{WIKI_URL}'), lines))

    with open(arguments.filename, 'w') as xml_file:
        xml_file.writelines(lines)


def subcommand_remove_footer(arguments):
    """Remove the footer template from the given wiki page."""
    filename = arguments.filename
    tree = etree.parse(filename)
    root = tree.getroot()

    # The footer will always be in the last <section>
    def find_last_section(elem):
        if len(elem):
            last_element = elem[-1]
            if last_element.tag == 'section':
                return find_last_section(last_element)
        return elem

    last_section = find_last_section(root)

    if len(last_section):
        # Remove all elements till <informaltable> is reached
        while last_section[-1].tag != 'informaltable':
            last_section.remove(last_section[-1])
        # remove <informaltable> itself
        last_section.remove(last_section[-1])

        # Remove the line "Back to Features introduction or manual pages."
        if last_section[-1].text.startswith('Back to'):
            last_section.remove(last_section[-1])

    processed_xml = etree.tostring(root, encoding='utf-8').decode()

    with open(filename, 'r') as xml_file:
        # <xml> and <DOCTYPE> elements which etree skips
        header = xml_file.readlines()[:2]

    with open(filename, 'w') as xml_file:
        xml_file.writelines(header)
        xml_file.write(processed_xml)


def main():
    """Parse arguments and perform all duties."""
    arguments = parse_arguments()

    subcommand = arguments.subcommand.replace('-', '_')
    subcommand_method = globals()['subcommand_' + subcommand]
    subcommand_method(arguments)


if __name__ == '__main__':
    main()