diff --git a/deploy_archives b/deploy_archives index 3e21965..264cc4e 100755 --- a/deploy_archives +++ b/deploy_archives @@ -6,13 +6,15 @@ cd archives scp index.html style.css $HOSTNAME:$ARCHIVE_PATH if [ -d "forums" ]; then - tar -cf forums.tar forums && gzip -f forums.tar - scp forums.tar.gz $HOSTNAME:$ARCHIVE_PATH + cat ../forum/structure.sql ../forum/categories.sql ../forum/boards.sql ../forum/threads.sql ../forum/misc_data.sql > forums.sql + cp ../forum/forum.sqlite forums.sqlite # forum or forums? + tar -cf forums.tar forums && gzip -f forums.tar forums.sqlite forums.sql + scp forums.sql.gz forums.sqlite.gz forums.tar.gz $HOSTNAME:$ARCHIVE_PATH ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz" fi; if [ -d "wiki" ]; then - tar -cf wiki.tar wiki && gzip -f wiki.tar - scp wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH + tar -cf wiki.tar wiki && gzip -f wiki.tar wiki.xml + scp wiki.xml.gz wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz" fi; \ No newline at end of file diff --git a/epilogue/__init__.py b/epilogue/__init__.py index 7092cff..3dbe00b 100644 --- a/epilogue/__init__.py +++ b/epilogue/__init__.py @@ -4,6 +4,7 @@ from .wiki import Wiki from .archive_generator import ArchiveGenerator import sys +import shutil BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite") @@ -39,4 +40,5 @@ def archive_wiki(): wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry)) if wiki: + shutil.copyfile(wiki.xml_path, os.path.join(ARCHIVES_BASEDIR, "wiki.xml")) ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES) \ No newline at end of file diff --git a/epilogue/archive_generator.py b/epilogue/archive_generator.py index 5392ac4..5b3760b 100644 --- a/epilogue/archive_generator.py +++ b/epilogue/archive_generator.py @@ -6,8 +6,7 @@ import chevron import bbcode import html -from .wiki import NAMESPACES as WIKI_NAMESPACES -import mwparserfromhell +from .wiki import Renderer, NAMESPACES as WIKI_NAMESPACES logging.basicConfig(level=logging.INFO) logger = logging.getLogger("ArchiveGenerator") @@ -49,28 +48,35 @@ class ArchiveGenerator(): "target": "Main_Page.html" }) + wikitext_renderer = Renderer() for page in wiki.get_pages(): - if page.redirect: - continue - - if page.namespace != WIKI_NAMESPACES['MAIN']: - continue + try: + if page.namespace != WIKI_NAMESPACES['MAIN']: + continue - page_out = "{}.html".format(page.title).replace(" ", "_") - base = "" - if "/" in page_out: - base = "../" * page_out.count("/") - try: - os.makedirs(os.path.dirname(os.path.join(out_dir, page_out))) - except FileExistsError: pass + page_out = "{}.html".format(page.title).replace(" ", "_") + base = "" + if "/" in page_out: + base = "../" * page_out.count("/") + try: + os.makedirs(os.path.dirname(os.path.join(out_dir, page_out))) + except FileExistsError: pass - logger.info("Archiving page %s to %s", page.title, page_out) - renderer.render_template_to_file("page", page_out, { - "title": " - {}".format(page.title), - "page": page, - "base": base, - "text": mwparserfromhell.parse(page.get_latest().text) - }) + if page.redirect: + logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out) + renderer.render_template_to_file("redirect", page_out, { + "target": "{}{}{}.html".format(base, page.redirect[0].upper(), page.redirect[1:].replace(" ", "_")) + }) + else: + logger.info("Archiving page %s to %s", page.title, page_out) + renderer.render_template_to_file("page", page_out, { + "title": " - {}".format(page.title), + "page": page, + "base": base, + "text": wikitext_renderer.render(page.get_latest().text) + }) + except Exception as e: + logger.error("Error encountered when archiving %s: %s", page.title, e) def generate_forum (self, forum, out_dir): logger.info("Archiving forum to %s", out_dir) diff --git a/epilogue/wiki.py b/epilogue/wiki.py index fc7d236..7a53fbd 100644 --- a/epilogue/wiki.py +++ b/epilogue/wiki.py @@ -1,5 +1,8 @@ from xml.etree import ElementTree +import mwparserfromhell +from mwparserfromhell.nodes import Wikilink, Comment, ExternalLink, Heading, Tag, Template, Text + NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}" PAGE_TAG = "{}page".format(NAMESPACE) ID_TAG = "{}id".format(NAMESPACE) @@ -22,6 +25,14 @@ NAMESPACES = { "TEMPLATE": 10 } +INTERWIKI_NAMESPACES = { + "bp:": "https://bulbapedia.bulbagarden.net/wiki/{}", + "wikipedia:": "https://en.wikipedia.org/wiki/{}" +} + +FILE_NAMESPACES = ["File:", "Image:"] +CATEGORY_NAMESPACE = "Category:" + class Wiki(): def __init__ (self, xml_path): self.xml_path = xml_path @@ -71,4 +82,70 @@ class Contributor(): if child.tag == ID_TAG: self.id = child.text elif child.tag == USERNAME_TAG: - self.username = child.text \ No newline at end of file + self.username = child.text + +class Renderer(): + def __init__ (self, templates={}): + self.templates = templates + + def render (self, wikitext): + rendered = [] + wikitext = mwparserfromhell.parse(wikitext) + for node in wikitext.ifilter(False): + # node types: + # https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text + node_type = type(node) + if node_type is Wikilink: + image_name = self.translate_image_title(node.title) + if image_name: + rendered.append(''.format( + image_name, + self.render(node.text) + )) + elif node.title.startswith(CATEGORY_NAMESPACE): + pass # todo: generate category links + else: + rendered.append('{}'.format( + self.translate_page_title(node.title), + self.render(node.text if node.text else node.title) + )) + elif node_type is ExternalLink: + rendered.append('{}'.format( + node.url, + self.render(node.title if node.title else node.url) + )) + elif node_type is Tag: + rendered.append("<{}>{}".format( + self.render(node.tag), + self.render(node.contents), + self.render(node.tag) + )) + elif node_type is Heading: + rendered.append("{}".format( + node.level, + self.render(node.title), + node.level + )) + elif node_type is Text: + rendered.append(node.value) + elif node_type is Template: # todo: template substitution + rendered.append("{{") + rendered.append(node.name) + rendered.append(node.params) + rendered.append("}}") + return "".join(rendered).strip().replace("\n\n", "

") + + def translate_page_title (self, page_title): + for namespace, url in INTERWIKI_NAMESPACES.items(): + if page_title.startswith(namespace): + return url.format(page_title[len(namespace):]) + + return "{}.html".format(self.reformat_page_title(page_title)) + + def translate_image_title (self, page_title): + for namespace in FILE_NAMESPACES: + if page_title.startswith(namespace): + return self.reformat_page_title(page_title[len(FILE_NAMESPACE):]) + + def reformat_page_title (self, page_title): + return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_")) \ No newline at end of file diff --git a/static/style.css b/static/style.css index 0ecc2c8..766b24e 100644 --- a/static/style.css +++ b/static/style.css @@ -11,4 +11,7 @@ ul.boards { margin-left: 0; padding-left: 0; } .label { font-weight: bold } article { border-top: 1px solid black; } section { margin-top: 15px; margin-bottom: 15px; } -.next { float: right; } \ No newline at end of file +.next { float: right; } + +.page { padding-top: 15px; } +.page table { width: 100%; } \ No newline at end of file diff --git a/templates/index.mustache b/templates/index.mustache index 8c8b922..8d14e5c 100644 --- a/templates/index.mustache +++ b/templates/index.mustache @@ -2,7 +2,7 @@ Welcome to the Glitch City Laboratories Archives.

Archives

{{>footer}} \ No newline at end of file diff --git a/templates/page.mustache b/templates/page.mustache index 71b8eae..e41af9f 100644 --- a/templates/page.mustache +++ b/templates/page.mustache @@ -1,6 +1,6 @@ {{>header}}

{{page.title}}

-
- {{text}} +
+ {{{text}}}
{{>footer}} \ No newline at end of file