FreedomBox/doc/scripts/post-processor
Fioddor Superconcentrado f0000c4ba3
post-processor: Solve 1908 fixing the wiki links fix
The previous logic of the wiki links fix didn't work for translated pages.

Closes: #1908.

Signed-off-by: Fioddor Superconcentrado <fioddor@gmail.com>
[jvalleroy: Remove extra spaces]
[jvalleroy: Fix output path]
Signed-off-by: James Valleroy <jvalleroy@mailbox.org>
Reviewed-by: James Valleroy <jvalleroy@mailbox.org>
2020-07-25 10:12:27 -04:00

98 lines
3.1 KiB
Python
Executable File

#!/usr/bin/python3
# SPDX-License-Identifier: AGPL-3.0-or-later
import argparse
import xml.etree.ElementTree as etree
def parse_arguments():
"""Return parsed command line arguments as dictionary."""
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest='subcommand', help='Sub command')
subparser = subparsers.add_parser(
'remove-footer', help='Remove footer from the XML document')
subparser.add_argument('filename', help='Name of the XML file')
subparser = subparsers.add_parser('fix-wiki-urls',
help='Fix wrongly formatted wiki urls')
subparser.add_argument('filename', help='Name of the Docbook file')
subparsers.required = True
return parser.parse_args()
def subcommand_fix_wiki_urls(arguments):
""" Rebase the wiki urls.
The base for relative links in Debian wiki is the wiki's URL but our
extraction process rebases the relative links to the page containing
the link. This function fixes that.
"""
WIKI_URL = 'https://wiki.debian.org'
_, lang, file_name = arguments.filename.split('/')
page_name = file_name.split('.')[0]
with open(arguments.filename, 'r') as xml_file:
lines = xml_file.readlines()
if 'en' == lang:
pattern = f'{WIKI_URL}/FreedomBox/Manual/{page_name}'
else:
pattern = f'{WIKI_URL}/{lang}/FreedomBox/Manual/{page_name}'
lines = list(map(lambda s: s.replace(pattern, f'{WIKI_URL}'), lines))
with open(arguments.filename, 'w') as xml_file:
xml_file.writelines(lines)
def subcommand_remove_footer(arguments):
"""Remove the footer template from the given wiki page."""
filename = arguments.filename
tree = etree.parse(filename)
root = tree.getroot()
# The footer will always be in the last <section>
def find_last_section(elem):
if len(elem):
last_element = elem[-1]
if last_element.tag == 'section':
return find_last_section(last_element)
return elem
last_section = find_last_section(root)
if len(last_section):
# Remove all elements till <informaltable> is reached
while last_section[-1].tag != 'informaltable':
last_section.remove(last_section[-1])
# remove <informaltable> itself
last_section.remove(last_section[-1])
# Remove the line "Back to Features introduction or manual pages."
if last_section[-1].text.startswith('Back to'):
last_section.remove(last_section[-1])
processed_xml = etree.tostring(root, encoding='utf-8').decode()
with open(filename, 'r') as xml_file:
# <xml> and <DOCTYPE> elements which etree skips
header = xml_file.readlines()[:2]
with open(filename, 'w') as xml_file:
xml_file.writelines(header)
xml_file.write(processed_xml)
def main():
"""Parse arguments and perform all duties."""
arguments = parse_arguments()
subcommand = arguments.subcommand.replace('-', '_')
subcommand_method = globals()['subcommand_' + subcommand]
subcommand_method(arguments)
if __name__ == '__main__':
main()