diff --git a/deploy_archives b/deploy_archives
index 3e21965..264cc4e 100755
--- a/deploy_archives
+++ b/deploy_archives
@@ -6,13 +6,15 @@ cd archives
scp index.html style.css $HOSTNAME:$ARCHIVE_PATH
if [ -d "forums" ]; then
- tar -cf forums.tar forums && gzip -f forums.tar
- scp forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
+ cat ../forum/structure.sql ../forum/categories.sql ../forum/boards.sql ../forum/threads.sql ../forum/misc_data.sql > forums.sql
+ cp ../forum/forum.sqlite forums.sqlite # forum or forums?
+ tar -cf forums.tar forums && gzip -f forums.tar forums.sqlite forums.sql
+ scp forums.sql.gz forums.sqlite.gz forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz"
fi;
if [ -d "wiki" ]; then
- tar -cf wiki.tar wiki && gzip -f wiki.tar
- scp wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
+ tar -cf wiki.tar wiki && gzip -f wiki.tar wiki.xml
+ scp wiki.xml.gz wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz"
fi;
\ No newline at end of file
diff --git a/epilogue/__init__.py b/epilogue/__init__.py
index 7092cff..3dbe00b 100644
--- a/epilogue/__init__.py
+++ b/epilogue/__init__.py
@@ -4,6 +4,7 @@ from .wiki import Wiki
from .archive_generator import ArchiveGenerator
import sys
+import shutil
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
@@ -39,4 +40,5 @@ def archive_wiki():
wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
if wiki:
+ shutil.copyfile(wiki.xml_path, os.path.join(ARCHIVES_BASEDIR, "wiki.xml"))
ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES)
\ No newline at end of file
diff --git a/epilogue/archive_generator.py b/epilogue/archive_generator.py
index 5392ac4..5b3760b 100644
--- a/epilogue/archive_generator.py
+++ b/epilogue/archive_generator.py
@@ -6,8 +6,7 @@ import chevron
import bbcode
import html
-from .wiki import NAMESPACES as WIKI_NAMESPACES
-import mwparserfromhell
+from .wiki import Renderer, NAMESPACES as WIKI_NAMESPACES
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ArchiveGenerator")
@@ -49,28 +48,35 @@ class ArchiveGenerator():
"target": "Main_Page.html"
})
+ wikitext_renderer = Renderer()
for page in wiki.get_pages():
- if page.redirect:
- continue
-
- if page.namespace != WIKI_NAMESPACES['MAIN']:
- continue
+ try:
+ if page.namespace != WIKI_NAMESPACES['MAIN']:
+ continue
- page_out = "{}.html".format(page.title).replace(" ", "_")
- base = ""
- if "/" in page_out:
- base = "../" * page_out.count("/")
- try:
- os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
- except FileExistsError: pass
+ page_out = "{}.html".format(page.title).replace(" ", "_")
+ base = ""
+ if "/" in page_out:
+ base = "../" * page_out.count("/")
+ try:
+ os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
+ except FileExistsError: pass
- logger.info("Archiving page %s to %s", page.title, page_out)
- renderer.render_template_to_file("page", page_out, {
- "title": " - {}".format(page.title),
- "page": page,
- "base": base,
- "text": mwparserfromhell.parse(page.get_latest().text)
- })
+ if page.redirect:
+ logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
+ renderer.render_template_to_file("redirect", page_out, {
+ "target": "{}{}{}.html".format(base, page.redirect[0].upper(), page.redirect[1:].replace(" ", "_"))
+ })
+ else:
+ logger.info("Archiving page %s to %s", page.title, page_out)
+ renderer.render_template_to_file("page", page_out, {
+ "title": " - {}".format(page.title),
+ "page": page,
+ "base": base,
+ "text": wikitext_renderer.render(page.get_latest().text)
+ })
+ except Exception as e:
+ logger.error("Error encountered when archiving %s: %s", page.title, e)
def generate_forum (self, forum, out_dir):
logger.info("Archiving forum to %s", out_dir)
diff --git a/epilogue/wiki.py b/epilogue/wiki.py
index fc7d236..7a53fbd 100644
--- a/epilogue/wiki.py
+++ b/epilogue/wiki.py
@@ -1,5 +1,8 @@
from xml.etree import ElementTree
+import mwparserfromhell
+from mwparserfromhell.nodes import Wikilink, Comment, ExternalLink, Heading, Tag, Template, Text
+
NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
PAGE_TAG = "{}page".format(NAMESPACE)
ID_TAG = "{}id".format(NAMESPACE)
@@ -22,6 +25,14 @@ NAMESPACES = {
"TEMPLATE": 10
}
+INTERWIKI_NAMESPACES = {
+ "bp:": "https://bulbapedia.bulbagarden.net/wiki/{}",
+ "wikipedia:": "https://en.wikipedia.org/wiki/{}"
+}
+
+FILE_NAMESPACES = ["File:", "Image:"]
+CATEGORY_NAMESPACE = "Category:"
+
class Wiki():
def __init__ (self, xml_path):
self.xml_path = xml_path
@@ -71,4 +82,70 @@ class Contributor():
if child.tag == ID_TAG:
self.id = child.text
elif child.tag == USERNAME_TAG:
- self.username = child.text
\ No newline at end of file
+ self.username = child.text
+
+class Renderer():
+ def __init__ (self, templates={}):
+ self.templates = templates
+
+ def render (self, wikitext):
+ rendered = []
+ wikitext = mwparserfromhell.parse(wikitext)
+ for node in wikitext.ifilter(False):
+ # node types:
+ # https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
+ node_type = type(node)
+ if node_type is Wikilink:
+ image_name = self.translate_image_title(node.title)
+ if image_name:
+ rendered.append(''.format(
+ image_name,
+ self.render(node.text)
+ ))
+ elif node.title.startswith(CATEGORY_NAMESPACE):
+ pass # todo: generate category links
+ else:
+ rendered.append('{}'.format(
+ self.translate_page_title(node.title),
+ self.render(node.text if node.text else node.title)
+ ))
+ elif node_type is ExternalLink:
+ rendered.append('{}'.format(
+ node.url,
+ self.render(node.title if node.title else node.url)
+ ))
+ elif node_type is Tag:
+ rendered.append("<{}>{}{}>".format(
+ self.render(node.tag),
+ self.render(node.contents),
+ self.render(node.tag)
+ ))
+ elif node_type is Heading:
+ rendered.append("
{{")
+ rendered.append(node.name)
+ rendered.append(node.params)
+ rendered.append("}}
")
+ return "".join(rendered).strip().replace("\n\n", "
")
+
+ def translate_page_title (self, page_title):
+ for namespace, url in INTERWIKI_NAMESPACES.items():
+ if page_title.startswith(namespace):
+ return url.format(page_title[len(namespace):])
+
+ return "{}.html".format(self.reformat_page_title(page_title))
+
+ def translate_image_title (self, page_title):
+ for namespace in FILE_NAMESPACES:
+ if page_title.startswith(namespace):
+ return self.reformat_page_title(page_title[len(FILE_NAMESPACE):])
+
+ def reformat_page_title (self, page_title):
+ return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_"))
\ No newline at end of file
diff --git a/static/style.css b/static/style.css
index 0ecc2c8..766b24e 100644
--- a/static/style.css
+++ b/static/style.css
@@ -11,4 +11,7 @@ ul.boards { margin-left: 0; padding-left: 0; }
.label { font-weight: bold }
article { border-top: 1px solid black; }
section { margin-top: 15px; margin-bottom: 15px; }
-.next { float: right; }
\ No newline at end of file
+.next { float: right; }
+
+.page { padding-top: 15px; }
+.page table { width: 100%; }
\ No newline at end of file
diff --git a/templates/index.mustache b/templates/index.mustache
index 8c8b922..8d14e5c 100644
--- a/templates/index.mustache
+++ b/templates/index.mustache
@@ -2,7 +2,7 @@
Welcome to the Glitch City Laboratories Archives.