Implement rudimentary wikitext renderer and buff deploy_archives script to be able to deploy the wiki archive, as well as sql/sqlite/xml data dumps.

This commit is contained in:
Adrian Kuschelyagi Malacoda 2020-08-17 01:51:09 -05:00
parent 0b1320a9da
commit 54fa852897
7 changed files with 121 additions and 31 deletions

View File

@ -6,13 +6,15 @@ cd archives
scp index.html style.css $HOSTNAME:$ARCHIVE_PATH
if [ -d "forums" ]; then
tar -cf forums.tar forums && gzip -f forums.tar
scp forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
cat ../forum/structure.sql ../forum/categories.sql ../forum/boards.sql ../forum/threads.sql ../forum/misc_data.sql > forums.sql
cp ../forum/forum.sqlite forums.sqlite # forum or forums?
tar -cf forums.tar forums && gzip -f forums.tar forums.sqlite forums.sql
scp forums.sql.gz forums.sqlite.gz forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz"
fi;
if [ -d "wiki" ]; then
tar -cf wiki.tar wiki && gzip -f wiki.tar
scp wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
tar -cf wiki.tar wiki && gzip -f wiki.tar wiki.xml
scp wiki.xml.gz wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz"
fi;

View File

@ -4,6 +4,7 @@ from .wiki import Wiki
from .archive_generator import ArchiveGenerator
import sys
import shutil
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
@ -39,4 +40,5 @@ def archive_wiki():
wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
if wiki:
shutil.copyfile(wiki.xml_path, os.path.join(ARCHIVES_BASEDIR, "wiki.xml"))
ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES)

View File

@ -6,8 +6,7 @@ import chevron
import bbcode
import html
from .wiki import NAMESPACES as WIKI_NAMESPACES
import mwparserfromhell
from .wiki import Renderer, NAMESPACES as WIKI_NAMESPACES
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ArchiveGenerator")
@ -49,28 +48,35 @@ class ArchiveGenerator():
"target": "Main_Page.html"
})
wikitext_renderer = Renderer()
for page in wiki.get_pages():
if page.redirect:
continue
if page.namespace != WIKI_NAMESPACES['MAIN']:
continue
try:
if page.namespace != WIKI_NAMESPACES['MAIN']:
continue
page_out = "{}.html".format(page.title).replace(" ", "_")
base = ""
if "/" in page_out:
base = "../" * page_out.count("/")
try:
os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
except FileExistsError: pass
page_out = "{}.html".format(page.title).replace(" ", "_")
base = ""
if "/" in page_out:
base = "../" * page_out.count("/")
try:
os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
except FileExistsError: pass
logger.info("Archiving page %s to %s", page.title, page_out)
renderer.render_template_to_file("page", page_out, {
"title": " - {}".format(page.title),
"page": page,
"base": base,
"text": mwparserfromhell.parse(page.get_latest().text)
})
if page.redirect:
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
renderer.render_template_to_file("redirect", page_out, {
"target": "{}{}{}.html".format(base, page.redirect[0].upper(), page.redirect[1:].replace(" ", "_"))
})
else:
logger.info("Archiving page %s to %s", page.title, page_out)
renderer.render_template_to_file("page", page_out, {
"title": " - {}".format(page.title),
"page": page,
"base": base,
"text": wikitext_renderer.render(page.get_latest().text)
})
except Exception as e:
logger.error("Error encountered when archiving %s: %s", page.title, e)
def generate_forum (self, forum, out_dir):
logger.info("Archiving forum to %s", out_dir)

View File

@ -1,5 +1,8 @@
from xml.etree import ElementTree
import mwparserfromhell
from mwparserfromhell.nodes import Wikilink, Comment, ExternalLink, Heading, Tag, Template, Text
NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
PAGE_TAG = "{}page".format(NAMESPACE)
ID_TAG = "{}id".format(NAMESPACE)
@ -22,6 +25,14 @@ NAMESPACES = {
"TEMPLATE": 10
}
INTERWIKI_NAMESPACES = {
"bp:": "https://bulbapedia.bulbagarden.net/wiki/{}",
"wikipedia:": "https://en.wikipedia.org/wiki/{}"
}
FILE_NAMESPACES = ["File:", "Image:"]
CATEGORY_NAMESPACE = "Category:"
class Wiki():
def __init__ (self, xml_path):
self.xml_path = xml_path
@ -71,4 +82,70 @@ class Contributor():
if child.tag == ID_TAG:
self.id = child.text
elif child.tag == USERNAME_TAG:
self.username = child.text
self.username = child.text
class Renderer():
def __init__ (self, templates={}):
self.templates = templates
def render (self, wikitext):
rendered = []
wikitext = mwparserfromhell.parse(wikitext)
for node in wikitext.ifilter(False):
# node types:
# https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
node_type = type(node)
if node_type is Wikilink:
image_name = self.translate_image_title(node.title)
if image_name:
rendered.append('<img src="{}" />'.format(
image_name,
self.render(node.text)
))
elif node.title.startswith(CATEGORY_NAMESPACE):
pass # todo: generate category links
else:
rendered.append('<a href="{}">{}</a>'.format(
self.translate_page_title(node.title),
self.render(node.text if node.text else node.title)
))
elif node_type is ExternalLink:
rendered.append('<a href="{}">{}</a>'.format(
node.url,
self.render(node.title if node.title else node.url)
))
elif node_type is Tag:
rendered.append("<{}>{}</{}>".format(
self.render(node.tag),
self.render(node.contents),
self.render(node.tag)
))
elif node_type is Heading:
rendered.append("<h{}>{}</h{}>".format(
node.level,
self.render(node.title),
node.level
))
elif node_type is Text:
rendered.append(node.value)
elif node_type is Template: # todo: template substitution
rendered.append("<code>{{")
rendered.append(node.name)
rendered.append(node.params)
rendered.append("}}</code>")
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
def translate_page_title (self, page_title):
for namespace, url in INTERWIKI_NAMESPACES.items():
if page_title.startswith(namespace):
return url.format(page_title[len(namespace):])
return "{}.html".format(self.reformat_page_title(page_title))
def translate_image_title (self, page_title):
for namespace in FILE_NAMESPACES:
if page_title.startswith(namespace):
return self.reformat_page_title(page_title[len(FILE_NAMESPACE):])
def reformat_page_title (self, page_title):
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_"))

View File

@ -11,4 +11,7 @@ ul.boards { margin-left: 0; padding-left: 0; }
.label { font-weight: bold }
article { border-top: 1px solid black; }
section { margin-top: 15px; margin-bottom: 15px; }
.next { float: right; }
.next { float: right; }
.page { padding-top: 15px; }
.page table { width: 100%; }

View File

@ -2,7 +2,7 @@
Welcome to the <b>Glitch City Laboratories Archives</b>.
<h2>Archives</h2>
<ul>
<li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>)</li>
<li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>)</li>
<li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>) (<a href="forums.sql.gz">.sql.gz</a>) (<a href="forums.sqlite.gz">.sqlite.gz</a>)</li>
<li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>) (<a href="wiki.xml.gz">.xml.gz</a>)</li>
</ul>
{{>footer}}

View File

@ -1,6 +1,6 @@
{{>header}}
<h2>{{page.title}}</h2>
<article>
{{text}}
<article class="page">
{{{text}}}
</article>
{{>footer}}