doc: Use Makefile to fetch raw wiki files

- Fetch all pages included in manual
- Also fetch pages with + in name
- Generate raw xml from raw wiki files
- Don't fetch raw xml for manual pages
- Delete manual raw xml at end of fetch
- Iterate manual pages over raw wiki files instead of raw xml
- Skip remove-footer step for generated manual xml pages

Signed-off-by: James Valleroy <jvalleroy@mailbox.org>
[sunil: Don't remove intermediate targets during build]
Signed-off-by: Sunil Mohan Adapa <sunil@medhas.org>
Reviewed-by: Sunil Mohan Adapa <sunil@medhas.org>
This commit is contained in:
James Valleroy 2020-04-12 13:03:08 -04:00
parent 806db903cf
commit 5d80ba1262
No known key found for this signature in database
GPG Key ID: 77C0C75E7B650808
2 changed files with 24 additions and 15 deletions

1
.gitignore vendored
View File

@ -4,7 +4,6 @@
doc/manual/*/*.pdf
doc/manual/*/*.html
doc/manual/*/*.xml
!doc/manual/*/*.raw.xml
doc/plinth.1
doc/dev/_build
\#*

View File

@ -7,7 +7,7 @@ MANUAL_LANGUAGES=en es
MANUAL_URL="https://wiki.debian.org/{lang-fragment}FreedomBox/Manual?action=show&mimetype=text%2Fdocbook"
MANUAL_URL_RAW="https://wiki.debian.org/{lang-fragment}FreedomBox/Manual?action=raw"
MANUAL_PAGE_URL="https://wiki.debian.org/{lang-fragment}FreedomBox/Manual/{page}?action=show&mimetype=text%2Fdocbook"
MANUAL_PAGE_URL_RAW="https://wiki.debian.org/{page}?action=raw"
DESTDIR=
INSTALL_DIR=$(DESTDIR)/usr/share/freedombox
@ -15,6 +15,7 @@ SCRIPTS_DIR=scripts
manual-pdfs=$(foreach lang,$(MANUAL_LANGUAGES),manual/$(lang)/freedombox-manual.pdf)
manual-xmls=$(patsubst %.pdf,%.xml,$(manual-pdfs))
manual-raw-xmls=$(patsubst %.pdf,%.raw.xml,$(manual-pdfs))
OUTPUTS=$(manual-pdfs) plinth.1 manual-pages
INSTALL_OPTS=-D --mode=644
@ -67,34 +68,38 @@ fetch: $(fetch-main-list) $(fetch-pages-list)
fetch-main-%: lang = $*
fetch-main-%: lang-fragment = $(subst en/,,$*/)
$(fetch-main-list): fetch-main-%:
MANUAL_URL_RAW_LANG=$(subst {lang-fragment},$(lang-fragment),$(MANUAL_URL_RAW)) ; \
wget --quiet --user-agent=Firefox \
-O manual/$(lang)/freedombox-manual.raw.wiki $${MANUAL_URL_RAW_LANG}
MANUAL_URL_LANG=$(subst {lang-fragment},$(lang-fragment),$(MANUAL_URL)) ; \
wget --quiet -O - $${MANUAL_URL_LANG} | \
wget --quiet --user-agent=Firefox -O - $${MANUAL_URL_LANG} | \
xmllint --format --output manual/$(lang)/freedombox-manual.raw.xml -
mkdir -p manual/$(lang)/images/
xsltproc $(SCRIPTS_DIR)/fetch-images.xslt manual/$(lang)/freedombox-manual.raw.xml | \
sort -u | \
awk 'NF {print "wget --quiet -O manual/$(lang)/images/" $$1 " " $$2}' | \
sh
rm manual/$(lang)/freedombox-manual.raw.xml
fetch-pages-%: lang = $*
fetch-pages-%: lang-fragment = $(subst en/,,$*/)
$(fetch-pages-list): fetch-pages-%:
MANUAL_URL_LANG=$(subst {lang-fragment},$(lang-fragment),$(MANUAL_URL_RAW)) ; \
MANUAL_PAGE_URL_LANG=$(subst {lang-fragment},$(lang-fragment),$(MANUAL_PAGE_URL)) ; \
PAGES=$$(wget --quiet -U Firefox -O - $${MANUAL_URL_LANG} | \
sed -n -e "s|.*FreedomBox/Manual/\([a-zA-Z0-9_-]*\).*|\1|p" | sort -u | \
grep -v -e GettingHelp -e Developer -e QuickStart) ; \
PAGES=$$(wget --quiet --user-agent=Firefox -O - $${MANUAL_URL_LANG} | \
sed -n -e "s|.*<<Include(\([a-zA-Z0-9_/+-]*\),.*|\1|p" | sort -u) ; \
for PAGE in $${PAGES} ; do \
FILE="manual/$(lang)/$${PAGE}.raw.xml" ; \
URL=$$(echo $${MANUAL_PAGE_URL_LANG} | sed "s/{page}/$${PAGE}/") ; \
PAGE_NAME=$$(basename $${PAGE}) ; \
echo "Downloading $(lang) $${PAGE}" ; \
wget --quiet --user-agent=Firefox -O $${FILE} $${URL} ; \
RAW_FILE="manual/$(lang)/$${PAGE_NAME}.raw.wiki" ; \
RAW_URL=$$(echo $(MANUAL_PAGE_URL_RAW) | sed "s|{page}|$${PAGE}|") ; \
wget --quiet --user-agent=Firefox -O $${RAW_FILE} $${RAW_URL} ; \
done
manual-pages-raw:=$(foreach lang,$(MANUAL_LANGUAGES),$(filter-out manual/%/freedombox-manual.raw.xml,$(wildcard manual/$(lang)/*.raw.xml)))
manual-pages-part-html:=$(patsubst %.raw.xml, %.part.html, $(manual-pages-raw)) $(foreach lang,$(MANUAL_LANGUAGES),manual/$(lang)/freedombox-manual.part.html)
manual-pages-raw-wiki:=$(foreach lang,$(MANUAL_LANGUAGES),$(filter-out manual/%/freedombox-manual.raw.wiki,$(wildcard manual/$(lang)/*.raw.wiki)))
manual-pages-raw-xml:=$(patsubst %.raw.wiki, %.raw.xml, $(manual-pages-raw-wiki))
manual-pages-part-html:=$(patsubst %.raw.xml, %.part.html, $(manual-pages-raw-xml)) $(foreach lang,$(MANUAL_LANGUAGES),manual/$(lang)/freedombox-manual.part.html)
manual-pages-html:=$(patsubst %.part.html, %.html, $(manual-pages-part-html))
manual-pages-xml:=$(patsubst %.raw.xml, %.xml, $(manual-pages-raw))
manual-pages-xml:=$(patsubst %.raw.xml, %.xml, $(manual-pages-raw-xml))
.PHONY: manual-pages
manual-pages: $(manual-pages-part-html)
@ -105,12 +110,17 @@ $(manual-pdfs): %.pdf: %.xml
$(manual-pages-part-html): %.part.html: %.html
perl -pe 'BEGIN {undef $$/} s/.*<body[^>]*>(.*)<\/body\s*>.*/$$1/si' $< > $@
$(manual-raw-xmls): %.raw.xml: %.raw.wiki
python3 $(SCRIPTS_DIR)/wikiparser.py $< > $@
$(manual-xmls): %.xml: %.raw.xml $(SCRIPTS_DIR)/fixes.xslt
xsltproc --output $@ $(SCRIPTS_DIR)/fixes.xslt $<
$(manual-pages-raw-xml): %.raw.xml: %.raw.wiki
python3 $(SCRIPTS_DIR)/wikiparser.py $< > $@
$(manual-pages-xml): %.xml: %.raw.xml $(SCRIPTS_DIR)/manual-page-fixes.xslt
xsltproc --output $@ $(SCRIPTS_DIR)/manual-page-fixes.xslt $<
$(SCRIPTS_DIR)/post-processor remove-footer $@
$(SCRIPTS_DIR)/post-processor fix-wiki-urls $@
$(manual-pages-html): %.html: %.xml
@ -122,5 +132,5 @@ $(manual-pages-html): %.html: %.xml
.PHONY: clean
clean:
rm -f $(manual-pages-html) $(manual-pages-part-html) $(manual-pages-xml) $(manual-xmls)
rm -f $(manual-pages-html) $(manual-pages-part-html) $(manual-pages-raw-xml) $(manual-pages-xml) $(manual-raw-xmls) $(manual-xmls)
rm -f $(OUTPUTS)