Implement rudimentary wikitext renderer and buff deploy_archives script to be able to deploy the wiki archive, as well as sql/sqlite/xml data dumps.
This commit is contained in:
parent
0b1320a9da
commit
54fa852897
@ -6,13 +6,15 @@ cd archives
|
||||
scp index.html style.css $HOSTNAME:$ARCHIVE_PATH
|
||||
|
||||
if [ -d "forums" ]; then
|
||||
tar -cf forums.tar forums && gzip -f forums.tar
|
||||
scp forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
||||
cat ../forum/structure.sql ../forum/categories.sql ../forum/boards.sql ../forum/threads.sql ../forum/misc_data.sql > forums.sql
|
||||
cp ../forum/forum.sqlite forums.sqlite # forum or forums?
|
||||
tar -cf forums.tar forums && gzip -f forums.tar forums.sqlite forums.sql
|
||||
scp forums.sql.gz forums.sqlite.gz forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
||||
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz"
|
||||
fi;
|
||||
|
||||
if [ -d "wiki" ]; then
|
||||
tar -cf wiki.tar wiki && gzip -f wiki.tar
|
||||
scp wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
||||
tar -cf wiki.tar wiki && gzip -f wiki.tar wiki.xml
|
||||
scp wiki.xml.gz wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
||||
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz"
|
||||
fi;
|
@ -4,6 +4,7 @@ from .wiki import Wiki
|
||||
from .archive_generator import ArchiveGenerator
|
||||
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
|
||||
@ -39,4 +40,5 @@ def archive_wiki():
|
||||
wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
|
||||
|
||||
if wiki:
|
||||
shutil.copyfile(wiki.xml_path, os.path.join(ARCHIVES_BASEDIR, "wiki.xml"))
|
||||
ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES)
|
@ -6,8 +6,7 @@ import chevron
|
||||
import bbcode
|
||||
import html
|
||||
|
||||
from .wiki import NAMESPACES as WIKI_NAMESPACES
|
||||
import mwparserfromhell
|
||||
from .wiki import Renderer, NAMESPACES as WIKI_NAMESPACES
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("ArchiveGenerator")
|
||||
@ -49,28 +48,35 @@ class ArchiveGenerator():
|
||||
"target": "Main_Page.html"
|
||||
})
|
||||
|
||||
wikitext_renderer = Renderer()
|
||||
for page in wiki.get_pages():
|
||||
if page.redirect:
|
||||
continue
|
||||
try:
|
||||
if page.namespace != WIKI_NAMESPACES['MAIN']:
|
||||
continue
|
||||
|
||||
if page.namespace != WIKI_NAMESPACES['MAIN']:
|
||||
continue
|
||||
page_out = "{}.html".format(page.title).replace(" ", "_")
|
||||
base = ""
|
||||
if "/" in page_out:
|
||||
base = "../" * page_out.count("/")
|
||||
try:
|
||||
os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
|
||||
except FileExistsError: pass
|
||||
|
||||
page_out = "{}.html".format(page.title).replace(" ", "_")
|
||||
base = ""
|
||||
if "/" in page_out:
|
||||
base = "../" * page_out.count("/")
|
||||
try:
|
||||
os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
|
||||
except FileExistsError: pass
|
||||
|
||||
logger.info("Archiving page %s to %s", page.title, page_out)
|
||||
renderer.render_template_to_file("page", page_out, {
|
||||
"title": " - {}".format(page.title),
|
||||
"page": page,
|
||||
"base": base,
|
||||
"text": mwparserfromhell.parse(page.get_latest().text)
|
||||
})
|
||||
if page.redirect:
|
||||
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
|
||||
renderer.render_template_to_file("redirect", page_out, {
|
||||
"target": "{}{}{}.html".format(base, page.redirect[0].upper(), page.redirect[1:].replace(" ", "_"))
|
||||
})
|
||||
else:
|
||||
logger.info("Archiving page %s to %s", page.title, page_out)
|
||||
renderer.render_template_to_file("page", page_out, {
|
||||
"title": " - {}".format(page.title),
|
||||
"page": page,
|
||||
"base": base,
|
||||
"text": wikitext_renderer.render(page.get_latest().text)
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("Error encountered when archiving %s: %s", page.title, e)
|
||||
|
||||
def generate_forum (self, forum, out_dir):
|
||||
logger.info("Archiving forum to %s", out_dir)
|
||||
|
@ -1,5 +1,8 @@
|
||||
from xml.etree import ElementTree
|
||||
|
||||
import mwparserfromhell
|
||||
from mwparserfromhell.nodes import Wikilink, Comment, ExternalLink, Heading, Tag, Template, Text
|
||||
|
||||
NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
|
||||
PAGE_TAG = "{}page".format(NAMESPACE)
|
||||
ID_TAG = "{}id".format(NAMESPACE)
|
||||
@ -22,6 +25,14 @@ NAMESPACES = {
|
||||
"TEMPLATE": 10
|
||||
}
|
||||
|
||||
INTERWIKI_NAMESPACES = {
|
||||
"bp:": "https://bulbapedia.bulbagarden.net/wiki/{}",
|
||||
"wikipedia:": "https://en.wikipedia.org/wiki/{}"
|
||||
}
|
||||
|
||||
FILE_NAMESPACES = ["File:", "Image:"]
|
||||
CATEGORY_NAMESPACE = "Category:"
|
||||
|
||||
class Wiki():
|
||||
def __init__ (self, xml_path):
|
||||
self.xml_path = xml_path
|
||||
@ -72,3 +83,69 @@ class Contributor():
|
||||
self.id = child.text
|
||||
elif child.tag == USERNAME_TAG:
|
||||
self.username = child.text
|
||||
|
||||
class Renderer():
|
||||
def __init__ (self, templates={}):
|
||||
self.templates = templates
|
||||
|
||||
def render (self, wikitext):
|
||||
rendered = []
|
||||
wikitext = mwparserfromhell.parse(wikitext)
|
||||
for node in wikitext.ifilter(False):
|
||||
# node types:
|
||||
# https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
|
||||
node_type = type(node)
|
||||
if node_type is Wikilink:
|
||||
image_name = self.translate_image_title(node.title)
|
||||
if image_name:
|
||||
rendered.append('<img src="{}" />'.format(
|
||||
image_name,
|
||||
self.render(node.text)
|
||||
))
|
||||
elif node.title.startswith(CATEGORY_NAMESPACE):
|
||||
pass # todo: generate category links
|
||||
else:
|
||||
rendered.append('<a href="{}">{}</a>'.format(
|
||||
self.translate_page_title(node.title),
|
||||
self.render(node.text if node.text else node.title)
|
||||
))
|
||||
elif node_type is ExternalLink:
|
||||
rendered.append('<a href="{}">{}</a>'.format(
|
||||
node.url,
|
||||
self.render(node.title if node.title else node.url)
|
||||
))
|
||||
elif node_type is Tag:
|
||||
rendered.append("<{}>{}</{}>".format(
|
||||
self.render(node.tag),
|
||||
self.render(node.contents),
|
||||
self.render(node.tag)
|
||||
))
|
||||
elif node_type is Heading:
|
||||
rendered.append("<h{}>{}</h{}>".format(
|
||||
node.level,
|
||||
self.render(node.title),
|
||||
node.level
|
||||
))
|
||||
elif node_type is Text:
|
||||
rendered.append(node.value)
|
||||
elif node_type is Template: # todo: template substitution
|
||||
rendered.append("<code>{{")
|
||||
rendered.append(node.name)
|
||||
rendered.append(node.params)
|
||||
rendered.append("}}</code>")
|
||||
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
||||
|
||||
def translate_page_title (self, page_title):
|
||||
for namespace, url in INTERWIKI_NAMESPACES.items():
|
||||
if page_title.startswith(namespace):
|
||||
return url.format(page_title[len(namespace):])
|
||||
|
||||
return "{}.html".format(self.reformat_page_title(page_title))
|
||||
|
||||
def translate_image_title (self, page_title):
|
||||
for namespace in FILE_NAMESPACES:
|
||||
if page_title.startswith(namespace):
|
||||
return self.reformat_page_title(page_title[len(FILE_NAMESPACE):])
|
||||
|
||||
def reformat_page_title (self, page_title):
|
||||
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_"))
|
@ -12,3 +12,6 @@ ul.boards { margin-left: 0; padding-left: 0; }
|
||||
article { border-top: 1px solid black; }
|
||||
section { margin-top: 15px; margin-bottom: 15px; }
|
||||
.next { float: right; }
|
||||
|
||||
.page { padding-top: 15px; }
|
||||
.page table { width: 100%; }
|
@ -2,7 +2,7 @@
|
||||
Welcome to the <b>Glitch City Laboratories Archives</b>.
|
||||
<h2>Archives</h2>
|
||||
<ul>
|
||||
<li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>)</li>
|
||||
<li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>)</li>
|
||||
<li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>) (<a href="forums.sql.gz">.sql.gz</a>) (<a href="forums.sqlite.gz">.sqlite.gz</a>)</li>
|
||||
<li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>) (<a href="wiki.xml.gz">.xml.gz</a>)</li>
|
||||
</ul>
|
||||
{{>footer}}
|
@ -1,6 +1,6 @@
|
||||
{{>header}}
|
||||
<h2>{{page.title}}</h2>
|
||||
<article>
|
||||
{{text}}
|
||||
<article class="page">
|
||||
{{{text}}}
|
||||
</article>
|
||||
{{>footer}}
|
Loading…
x
Reference in New Issue
Block a user