diff --git a/epilogue/archive_generator.py b/epilogue/archive_generator.py index c91bc8c..c7ae064 100644 --- a/epilogue/archive_generator.py +++ b/epilogue/archive_generator.py @@ -2,11 +2,13 @@ import os import logging import shutil +from traceback import print_exc + import chevron import bbcode import html -from .wiki import Template, Renderer, NAMESPACES as WIKI_NAMESPACES +from .wiki import Template, Renderer, reformat_page_title, NAMESPACES as WIKI_NAMESPACES logging.basicConfig(level=logging.INFO) logger = logging.getLogger("ArchiveGenerator") @@ -48,6 +50,7 @@ class ArchiveGenerator(): "target": "Main_Page.html" }) + categories = {} templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']]) wikitext_renderer = Renderer(templates) for page in wiki.get_pages(): @@ -66,18 +69,46 @@ class ArchiveGenerator(): if page.redirect: logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out) renderer.render_template_to_file("redirect", page_out, { - "target": "{}{}{}.html".format(base, page.redirect[0].upper(), page.redirect[1:].replace(" ", "_")) + "target": f"{base}{reformat_page_title(page.redirect)}.html" }) else: logger.info("Archiving page %s to %s", page.title, page_out) + (rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, page=page) + + for category in page_categories: + if not category in categories: + categories[category] = [] + + categories[category].append({ + "url": page_out, + "title": page.title + }) + renderer.render_template_to_file("page", page_out, { "title": " - {}".format(page.title), "page": page, "base": base, - "text": wikitext_renderer.render(page.get_latest().text) + "text": rendered }) except Exception as e: logger.error("Error encountered when archiving %s: %s", page.title, e) + print_exc() + if isinstance(e, ValueError): + raise e + + for category, pages in categories.items(): + category_out = f"Category:{reformat_page_title(category)}.html" + logger.info("Archiving category %s to %s", category, category_out) + + try: + renderer.render_template_to_file("category", category_out, { + "title": f" - {category}", + "category": category, + "pages": pages + }) + except Exception as e: + logger.error("Error encountered when archiving %s: %s", category, e) + print_exc() def generate_forum (self, forum, out_dir): logger.info("Archiving forum to %s", out_dir) diff --git a/epilogue/wiki.py b/epilogue/wiki.py index f954267..5594ee5 100644 --- a/epilogue/wiki.py +++ b/epilogue/wiki.py @@ -88,76 +88,97 @@ class Renderer(): def __init__ (self, templates={}): self.templates = templates - def render (self, wikitext): - rendered = [] - wikitext = self.transclude_templates(wikitext) - for node in wikitext.ifilter(False): - # node types: - # https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text - node_type = type(node) - if node_type is Wikilink: - image_name = self.translate_image_title(node.title) - if image_name: - rendered.append(''.format( - image_name, - self.render(node.text) - )) - elif node.title.startswith(CATEGORY_NAMESPACE): - pass # todo: generate category links - else: - rendered.append('{}'.format( - self.translate_page_title(node.title), - self.render(node.text if node.text else node.title) - )) - elif node_type is ExternalLink: - rendered.append('{}'.format( - node.url, - self.render(node.title if node.title else node.url) - )) - elif node_type is Tag: - rendered.append("<{}>{}".format( - self.render(node.tag), - self.render(node.contents), - self.render(node.tag) - )) - elif node_type is Heading: - rendered.append("{}".format( - node.level, - self.render(node.title), - node.level - )) - elif node_type is Text: - rendered.append(node.value) - return "".join(rendered).strip().replace("\n\n", "

") + def render (self, wikitext, *args, **kwargs): + categories = [] + wikitext = self.transclude_templates(wikitext, *args, **kwargs) + + # parse out categories + for link in wikitext.ifilter_wikilinks(): + if not link.title.startswith(CATEGORY_NAMESPACE): + continue - def transclude_templates (self, wikitext): + wikitext.remove(link) + categories.append(link.title[len(CATEGORY_NAMESPACE):]) + + rendered = [render(wikitext)] + if categories: + rendered.append('

Categories

") + + return ("".join(rendered), categories) + + def transclude_templates (self, wikitext, *args, **kwargs): wikitext = mwparserfromhell.parse(wikitext) for inclusion in wikitext.ifilter_templates(): template_key = str(inclusion.name) template = self.templates.get(template_key, self.templates.get(template_key[0].upper() + template_key[1:], None)) result = None if template: - result = template(inclusion, *inclusion.params) + result = template(inclusion, *args, **kwargs) else: result = "Template:{0}".format(inclusion.name) - wikitext.replace(inclusion, self.transclude_templates(result)) + try: + wikitext.replace(inclusion, result) #self.transclude_templates(result)) + except ValueError: pass return wikitext - def translate_page_title (self, page_title): - for namespace, url in INTERWIKI_NAMESPACES.items(): - if page_title.startswith(namespace): - return url.format(page_title[len(namespace):]) +def render (wikitext): + rendered = [] + for node in wikitext.ifilter(False): + # node types: + # https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text + node_type = type(node) + if node_type is Wikilink: + image_name = translate_image_title(node.title) + if image_name: + rendered.append(''.format( + image_name, + render(mwparserfromhell.parse(node.text)) + )) + else: + rendered.append('{}'.format( + translate_page_title(node.title), + render(node.text if node.text else node.title) + )) + elif node_type is ExternalLink: + rendered.append('{}'.format( + node.url, + render(node.title if node.title else node.url) + )) + elif node_type is Tag: + rendered.append("<{}>{}".format( + render(node.tag), + render(node.contents), + render(node.tag) + )) + elif node_type is Heading: + rendered.append("{}".format( + node.level, + render(node.title), + node.level + )) + elif node_type is Text: + rendered.append(node.value) - return "{}.html".format(self.reformat_page_title(page_title)) + return "".join(rendered).strip().replace("\n\n", "

") - def translate_image_title (self, page_title): - for namespace in FILE_NAMESPACES: - if page_title.startswith(namespace): - return self.reformat_page_title(page_title[len(namespace):]) +def translate_page_title (page_title): + for namespace, url in INTERWIKI_NAMESPACES.items(): + if page_title.startswith(namespace): + return url.format(page_title[len(namespace):]) - def reformat_page_title (self, page_title): - return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_")) + return "{}.html".format(reformat_page_title(page_title)) + +def translate_image_title (page_title): + for namespace in FILE_NAMESPACES: + if page_title.startswith(namespace): + return reformat_page_title(page_title[len(namespace):]) + +def reformat_page_title (page_title): + return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_")) class Template(): def __init__ (self, wikicode): @@ -166,11 +187,14 @@ class Template(): if tag.tag == "noinclude": self.wikicode.remove(tag) - def __call__ (self, inclusion, *args): + def __call__ (self, inclusion, *args, **kwargs): parsed_wikicode = mwparserfromhell.parse(self.wikicode) for argument in parsed_wikicode.ifilter_arguments(): value = argument.default if argument.default else argument.name if inclusion.has(argument.name): value = inclusion.get(argument.name) - parsed_wikicode.replace(argument, value) + + try: + parsed_wikicode.replace(argument, value) + except ValueError: pass return parsed_wikicode \ No newline at end of file