improvements to template substitution, begin parsing out and dumping category links
This commit is contained in:
parent
43a36ba730
commit
df25b09eb7
@ -2,11 +2,13 @@ import os
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
from traceback import print_exc
|
||||||
|
|
||||||
import chevron
|
import chevron
|
||||||
import bbcode
|
import bbcode
|
||||||
import html
|
import html
|
||||||
|
|
||||||
from .wiki import Template, Renderer, NAMESPACES as WIKI_NAMESPACES
|
from .wiki import Template, Renderer, reformat_page_title, NAMESPACES as WIKI_NAMESPACES
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger("ArchiveGenerator")
|
logger = logging.getLogger("ArchiveGenerator")
|
||||||
@ -48,6 +50,7 @@ class ArchiveGenerator():
|
|||||||
"target": "Main_Page.html"
|
"target": "Main_Page.html"
|
||||||
})
|
})
|
||||||
|
|
||||||
|
categories = {}
|
||||||
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
|
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
|
||||||
wikitext_renderer = Renderer(templates)
|
wikitext_renderer = Renderer(templates)
|
||||||
for page in wiki.get_pages():
|
for page in wiki.get_pages():
|
||||||
@ -66,18 +69,46 @@ class ArchiveGenerator():
|
|||||||
if page.redirect:
|
if page.redirect:
|
||||||
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
|
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
|
||||||
renderer.render_template_to_file("redirect", page_out, {
|
renderer.render_template_to_file("redirect", page_out, {
|
||||||
"target": "{}{}{}.html".format(base, page.redirect[0].upper(), page.redirect[1:].replace(" ", "_"))
|
"target": f"{base}{reformat_page_title(page.redirect)}.html"
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
logger.info("Archiving page %s to %s", page.title, page_out)
|
logger.info("Archiving page %s to %s", page.title, page_out)
|
||||||
|
(rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, page=page)
|
||||||
|
|
||||||
|
for category in page_categories:
|
||||||
|
if not category in categories:
|
||||||
|
categories[category] = []
|
||||||
|
|
||||||
|
categories[category].append({
|
||||||
|
"url": page_out,
|
||||||
|
"title": page.title
|
||||||
|
})
|
||||||
|
|
||||||
renderer.render_template_to_file("page", page_out, {
|
renderer.render_template_to_file("page", page_out, {
|
||||||
"title": " - {}".format(page.title),
|
"title": " - {}".format(page.title),
|
||||||
"page": page,
|
"page": page,
|
||||||
"base": base,
|
"base": base,
|
||||||
"text": wikitext_renderer.render(page.get_latest().text)
|
"text": rendered
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error encountered when archiving %s: %s", page.title, e)
|
logger.error("Error encountered when archiving %s: %s", page.title, e)
|
||||||
|
print_exc()
|
||||||
|
if isinstance(e, ValueError):
|
||||||
|
raise e
|
||||||
|
|
||||||
|
for category, pages in categories.items():
|
||||||
|
category_out = f"Category:{reformat_page_title(category)}.html"
|
||||||
|
logger.info("Archiving category %s to %s", category, category_out)
|
||||||
|
|
||||||
|
try:
|
||||||
|
renderer.render_template_to_file("category", category_out, {
|
||||||
|
"title": f" - {category}",
|
||||||
|
"category": category,
|
||||||
|
"pages": pages
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error encountered when archiving %s: %s", category, e)
|
||||||
|
print_exc()
|
||||||
|
|
||||||
def generate_forum (self, forum, out_dir):
|
def generate_forum (self, forum, out_dir):
|
||||||
logger.info("Archiving forum to %s", out_dir)
|
logger.info("Archiving forum to %s", out_dir)
|
||||||
|
138
epilogue/wiki.py
138
epilogue/wiki.py
@ -88,76 +88,97 @@ class Renderer():
|
|||||||
def __init__ (self, templates={}):
|
def __init__ (self, templates={}):
|
||||||
self.templates = templates
|
self.templates = templates
|
||||||
|
|
||||||
def render (self, wikitext):
|
def render (self, wikitext, *args, **kwargs):
|
||||||
rendered = []
|
categories = []
|
||||||
wikitext = self.transclude_templates(wikitext)
|
wikitext = self.transclude_templates(wikitext, *args, **kwargs)
|
||||||
for node in wikitext.ifilter(False):
|
|
||||||
# node types:
|
# parse out categories
|
||||||
# https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
|
for link in wikitext.ifilter_wikilinks():
|
||||||
node_type = type(node)
|
if not link.title.startswith(CATEGORY_NAMESPACE):
|
||||||
if node_type is Wikilink:
|
continue
|
||||||
image_name = self.translate_image_title(node.title)
|
|
||||||
if image_name:
|
|
||||||
rendered.append('<img src="{}" />'.format(
|
|
||||||
image_name,
|
|
||||||
self.render(node.text)
|
|
||||||
))
|
|
||||||
elif node.title.startswith(CATEGORY_NAMESPACE):
|
|
||||||
pass # todo: generate category links
|
|
||||||
else:
|
|
||||||
rendered.append('<a href="{}">{}</a>'.format(
|
|
||||||
self.translate_page_title(node.title),
|
|
||||||
self.render(node.text if node.text else node.title)
|
|
||||||
))
|
|
||||||
elif node_type is ExternalLink:
|
|
||||||
rendered.append('<a href="{}">{}</a>'.format(
|
|
||||||
node.url,
|
|
||||||
self.render(node.title if node.title else node.url)
|
|
||||||
))
|
|
||||||
elif node_type is Tag:
|
|
||||||
rendered.append("<{}>{}</{}>".format(
|
|
||||||
self.render(node.tag),
|
|
||||||
self.render(node.contents),
|
|
||||||
self.render(node.tag)
|
|
||||||
))
|
|
||||||
elif node_type is Heading:
|
|
||||||
rendered.append("<h{}>{}</h{}>".format(
|
|
||||||
node.level,
|
|
||||||
self.render(node.title),
|
|
||||||
node.level
|
|
||||||
))
|
|
||||||
elif node_type is Text:
|
|
||||||
rendered.append(node.value)
|
|
||||||
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
|
||||||
|
|
||||||
def transclude_templates (self, wikitext):
|
wikitext.remove(link)
|
||||||
|
categories.append(link.title[len(CATEGORY_NAMESPACE):])
|
||||||
|
|
||||||
|
rendered = [render(wikitext)]
|
||||||
|
if categories:
|
||||||
|
rendered.append('<h2>Categories</h2><ul class="categories">')
|
||||||
|
for category in categories:
|
||||||
|
rendered.append('<li><a href="Category:{}.html">{}</a></li>'.format(reformat_page_title(category), category))
|
||||||
|
rendered.append("</ul>")
|
||||||
|
|
||||||
|
return ("".join(rendered), categories)
|
||||||
|
|
||||||
|
def transclude_templates (self, wikitext, *args, **kwargs):
|
||||||
wikitext = mwparserfromhell.parse(wikitext)
|
wikitext = mwparserfromhell.parse(wikitext)
|
||||||
for inclusion in wikitext.ifilter_templates():
|
for inclusion in wikitext.ifilter_templates():
|
||||||
template_key = str(inclusion.name)
|
template_key = str(inclusion.name)
|
||||||
template = self.templates.get(template_key, self.templates.get(template_key[0].upper() + template_key[1:], None))
|
template = self.templates.get(template_key, self.templates.get(template_key[0].upper() + template_key[1:], None))
|
||||||
result = None
|
result = None
|
||||||
if template:
|
if template:
|
||||||
result = template(inclusion, *inclusion.params)
|
result = template(inclusion, *args, **kwargs)
|
||||||
else:
|
else:
|
||||||
result = "<span class='unknown-template'>Template:{0}</span>".format(inclusion.name)
|
result = "<span class='unknown-template'>Template:{0}</span>".format(inclusion.name)
|
||||||
|
|
||||||
wikitext.replace(inclusion, self.transclude_templates(result))
|
try:
|
||||||
|
wikitext.replace(inclusion, result) #self.transclude_templates(result))
|
||||||
|
except ValueError: pass
|
||||||
return wikitext
|
return wikitext
|
||||||
|
|
||||||
def translate_page_title (self, page_title):
|
def render (wikitext):
|
||||||
for namespace, url in INTERWIKI_NAMESPACES.items():
|
rendered = []
|
||||||
if page_title.startswith(namespace):
|
for node in wikitext.ifilter(False):
|
||||||
return url.format(page_title[len(namespace):])
|
# node types:
|
||||||
|
# https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
|
||||||
|
node_type = type(node)
|
||||||
|
if node_type is Wikilink:
|
||||||
|
image_name = translate_image_title(node.title)
|
||||||
|
if image_name:
|
||||||
|
rendered.append('<img src="{}" />'.format(
|
||||||
|
image_name,
|
||||||
|
render(mwparserfromhell.parse(node.text))
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
rendered.append('<a href="{}">{}</a>'.format(
|
||||||
|
translate_page_title(node.title),
|
||||||
|
render(node.text if node.text else node.title)
|
||||||
|
))
|
||||||
|
elif node_type is ExternalLink:
|
||||||
|
rendered.append('<a href="{}">{}</a>'.format(
|
||||||
|
node.url,
|
||||||
|
render(node.title if node.title else node.url)
|
||||||
|
))
|
||||||
|
elif node_type is Tag:
|
||||||
|
rendered.append("<{}>{}</{}>".format(
|
||||||
|
render(node.tag),
|
||||||
|
render(node.contents),
|
||||||
|
render(node.tag)
|
||||||
|
))
|
||||||
|
elif node_type is Heading:
|
||||||
|
rendered.append("<h{}>{}</h{}>".format(
|
||||||
|
node.level,
|
||||||
|
render(node.title),
|
||||||
|
node.level
|
||||||
|
))
|
||||||
|
elif node_type is Text:
|
||||||
|
rendered.append(node.value)
|
||||||
|
|
||||||
return "{}.html".format(self.reformat_page_title(page_title))
|
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
||||||
|
|
||||||
def translate_image_title (self, page_title):
|
def translate_page_title (page_title):
|
||||||
for namespace in FILE_NAMESPACES:
|
for namespace, url in INTERWIKI_NAMESPACES.items():
|
||||||
if page_title.startswith(namespace):
|
if page_title.startswith(namespace):
|
||||||
return self.reformat_page_title(page_title[len(namespace):])
|
return url.format(page_title[len(namespace):])
|
||||||
|
|
||||||
def reformat_page_title (self, page_title):
|
return "{}.html".format(reformat_page_title(page_title))
|
||||||
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_"))
|
|
||||||
|
def translate_image_title (page_title):
|
||||||
|
for namespace in FILE_NAMESPACES:
|
||||||
|
if page_title.startswith(namespace):
|
||||||
|
return reformat_page_title(page_title[len(namespace):])
|
||||||
|
|
||||||
|
def reformat_page_title (page_title):
|
||||||
|
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_"))
|
||||||
|
|
||||||
class Template():
|
class Template():
|
||||||
def __init__ (self, wikicode):
|
def __init__ (self, wikicode):
|
||||||
@ -166,11 +187,14 @@ class Template():
|
|||||||
if tag.tag == "noinclude":
|
if tag.tag == "noinclude":
|
||||||
self.wikicode.remove(tag)
|
self.wikicode.remove(tag)
|
||||||
|
|
||||||
def __call__ (self, inclusion, *args):
|
def __call__ (self, inclusion, *args, **kwargs):
|
||||||
parsed_wikicode = mwparserfromhell.parse(self.wikicode)
|
parsed_wikicode = mwparserfromhell.parse(self.wikicode)
|
||||||
for argument in parsed_wikicode.ifilter_arguments():
|
for argument in parsed_wikicode.ifilter_arguments():
|
||||||
value = argument.default if argument.default else argument.name
|
value = argument.default if argument.default else argument.name
|
||||||
if inclusion.has(argument.name):
|
if inclusion.has(argument.name):
|
||||||
value = inclusion.get(argument.name)
|
value = inclusion.get(argument.name)
|
||||||
parsed_wikicode.replace(argument, value)
|
|
||||||
|
try:
|
||||||
|
parsed_wikicode.replace(argument, value)
|
||||||
|
except ValueError: pass
|
||||||
return parsed_wikicode
|
return parsed_wikicode
|
Loading…
x
Reference in New Issue
Block a user