FreedomBox/doc/fetch-manual-pages
Joseph Nuthalapati b168599106
docs: Fix MediaWiki manual page download failing
- Removed an unnecessary and dangerous optimization which was added earlier.

Fixes #1369

Signed-off-by: Joseph Nuthalapati <njoseph@thoughtworks.com>
Reviewed-by: James Valleroy <jvalleroy@mailbox.org>
2018-09-04 21:50:21 -04:00

60 lines
1.8 KiB
Python
Executable File

#!/usr/bin/python3
#
# This file is part of FreedomBox.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import re
import urllib.request
MANUAL_PAGE_URL = "https://wiki.debian.org/FreedomBox/Manual/{}?action=show&mimetype=text%2Fdocbook"
MANUAL_INDEX_RAW_URL = "https://wiki.debian.org/FreedomBox/Manual?action=raw"
manual_pages = []
to_remove = ['QuickStart', 'GettingHelp', 'Developer']
def list_manual_pages():
"""Fetch the list of manual pages and write them to a file.
"""
pattern = 'FreedomBox/Manual/\w+'
lst = list(urllib.request.urlopen(MANUAL_INDEX_RAW_URL))
global manual_pages
manual_pages = list(l[0].split('/')[-1] for l in filter(
None, map(lambda x: re.findall(pattern, x.decode()), lst)))
for entry in to_remove:
if entry in manual_pages:
manual_pages.remove(entry)
with open('manual-pages.list', 'w') as lst_file:
lst_file.write('\n'.join(manual_pages))
def fetch_manual_pages_in_docbook_format():
for page in manual_pages:
url = MANUAL_PAGE_URL.format(page)
filename = '{}.raw.xml'.format(page)
urllib.request.urlretrieve(url, filename)
def main():
list_manual_pages()
fetch_manual_pages_in_docbook_format()
if __name__ == '__main__':
main()