Implement rudimentary wikitext renderer and buff deploy_archives script to be able to deploy the wiki archive, as well as sql/sqlite/xml data dumps.
This commit is contained in:
parent
0b1320a9da
commit
54fa852897
@ -6,13 +6,15 @@ cd archives
|
|||||||
scp index.html style.css $HOSTNAME:$ARCHIVE_PATH
|
scp index.html style.css $HOSTNAME:$ARCHIVE_PATH
|
||||||
|
|
||||||
if [ -d "forums" ]; then
|
if [ -d "forums" ]; then
|
||||||
tar -cf forums.tar forums && gzip -f forums.tar
|
cat ../forum/structure.sql ../forum/categories.sql ../forum/boards.sql ../forum/threads.sql ../forum/misc_data.sql > forums.sql
|
||||||
scp forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
cp ../forum/forum.sqlite forums.sqlite # forum or forums?
|
||||||
|
tar -cf forums.tar forums && gzip -f forums.tar forums.sqlite forums.sql
|
||||||
|
scp forums.sql.gz forums.sqlite.gz forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
||||||
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz"
|
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz"
|
||||||
fi;
|
fi;
|
||||||
|
|
||||||
if [ -d "wiki" ]; then
|
if [ -d "wiki" ]; then
|
||||||
tar -cf wiki.tar wiki && gzip -f wiki.tar
|
tar -cf wiki.tar wiki && gzip -f wiki.tar wiki.xml
|
||||||
scp wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
scp wiki.xml.gz wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
||||||
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz"
|
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz"
|
||||||
fi;
|
fi;
|
@ -4,6 +4,7 @@ from .wiki import Wiki
|
|||||||
from .archive_generator import ArchiveGenerator
|
from .archive_generator import ArchiveGenerator
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
import shutil
|
||||||
|
|
||||||
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
|
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
|
||||||
@ -39,4 +40,5 @@ def archive_wiki():
|
|||||||
wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
|
wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
|
||||||
|
|
||||||
if wiki:
|
if wiki:
|
||||||
|
shutil.copyfile(wiki.xml_path, os.path.join(ARCHIVES_BASEDIR, "wiki.xml"))
|
||||||
ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES)
|
ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES)
|
@ -6,8 +6,7 @@ import chevron
|
|||||||
import bbcode
|
import bbcode
|
||||||
import html
|
import html
|
||||||
|
|
||||||
from .wiki import NAMESPACES as WIKI_NAMESPACES
|
from .wiki import Renderer, NAMESPACES as WIKI_NAMESPACES
|
||||||
import mwparserfromhell
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger("ArchiveGenerator")
|
logger = logging.getLogger("ArchiveGenerator")
|
||||||
@ -49,28 +48,35 @@ class ArchiveGenerator():
|
|||||||
"target": "Main_Page.html"
|
"target": "Main_Page.html"
|
||||||
})
|
})
|
||||||
|
|
||||||
|
wikitext_renderer = Renderer()
|
||||||
for page in wiki.get_pages():
|
for page in wiki.get_pages():
|
||||||
if page.redirect:
|
try:
|
||||||
continue
|
if page.namespace != WIKI_NAMESPACES['MAIN']:
|
||||||
|
continue
|
||||||
if page.namespace != WIKI_NAMESPACES['MAIN']:
|
|
||||||
continue
|
|
||||||
|
|
||||||
page_out = "{}.html".format(page.title).replace(" ", "_")
|
page_out = "{}.html".format(page.title).replace(" ", "_")
|
||||||
base = ""
|
base = ""
|
||||||
if "/" in page_out:
|
if "/" in page_out:
|
||||||
base = "../" * page_out.count("/")
|
base = "../" * page_out.count("/")
|
||||||
try:
|
try:
|
||||||
os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
|
os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
|
||||||
except FileExistsError: pass
|
except FileExistsError: pass
|
||||||
|
|
||||||
logger.info("Archiving page %s to %s", page.title, page_out)
|
if page.redirect:
|
||||||
renderer.render_template_to_file("page", page_out, {
|
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
|
||||||
"title": " - {}".format(page.title),
|
renderer.render_template_to_file("redirect", page_out, {
|
||||||
"page": page,
|
"target": "{}{}{}.html".format(base, page.redirect[0].upper(), page.redirect[1:].replace(" ", "_"))
|
||||||
"base": base,
|
})
|
||||||
"text": mwparserfromhell.parse(page.get_latest().text)
|
else:
|
||||||
})
|
logger.info("Archiving page %s to %s", page.title, page_out)
|
||||||
|
renderer.render_template_to_file("page", page_out, {
|
||||||
|
"title": " - {}".format(page.title),
|
||||||
|
"page": page,
|
||||||
|
"base": base,
|
||||||
|
"text": wikitext_renderer.render(page.get_latest().text)
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error encountered when archiving %s: %s", page.title, e)
|
||||||
|
|
||||||
def generate_forum (self, forum, out_dir):
|
def generate_forum (self, forum, out_dir):
|
||||||
logger.info("Archiving forum to %s", out_dir)
|
logger.info("Archiving forum to %s", out_dir)
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
|
import mwparserfromhell
|
||||||
|
from mwparserfromhell.nodes import Wikilink, Comment, ExternalLink, Heading, Tag, Template, Text
|
||||||
|
|
||||||
NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
|
NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
|
||||||
PAGE_TAG = "{}page".format(NAMESPACE)
|
PAGE_TAG = "{}page".format(NAMESPACE)
|
||||||
ID_TAG = "{}id".format(NAMESPACE)
|
ID_TAG = "{}id".format(NAMESPACE)
|
||||||
@ -22,6 +25,14 @@ NAMESPACES = {
|
|||||||
"TEMPLATE": 10
|
"TEMPLATE": 10
|
||||||
}
|
}
|
||||||
|
|
||||||
|
INTERWIKI_NAMESPACES = {
|
||||||
|
"bp:": "https://bulbapedia.bulbagarden.net/wiki/{}",
|
||||||
|
"wikipedia:": "https://en.wikipedia.org/wiki/{}"
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE_NAMESPACES = ["File:", "Image:"]
|
||||||
|
CATEGORY_NAMESPACE = "Category:"
|
||||||
|
|
||||||
class Wiki():
|
class Wiki():
|
||||||
def __init__ (self, xml_path):
|
def __init__ (self, xml_path):
|
||||||
self.xml_path = xml_path
|
self.xml_path = xml_path
|
||||||
@ -71,4 +82,70 @@ class Contributor():
|
|||||||
if child.tag == ID_TAG:
|
if child.tag == ID_TAG:
|
||||||
self.id = child.text
|
self.id = child.text
|
||||||
elif child.tag == USERNAME_TAG:
|
elif child.tag == USERNAME_TAG:
|
||||||
self.username = child.text
|
self.username = child.text
|
||||||
|
|
||||||
|
class Renderer():
|
||||||
|
def __init__ (self, templates={}):
|
||||||
|
self.templates = templates
|
||||||
|
|
||||||
|
def render (self, wikitext):
|
||||||
|
rendered = []
|
||||||
|
wikitext = mwparserfromhell.parse(wikitext)
|
||||||
|
for node in wikitext.ifilter(False):
|
||||||
|
# node types:
|
||||||
|
# https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
|
||||||
|
node_type = type(node)
|
||||||
|
if node_type is Wikilink:
|
||||||
|
image_name = self.translate_image_title(node.title)
|
||||||
|
if image_name:
|
||||||
|
rendered.append('<img src="{}" />'.format(
|
||||||
|
image_name,
|
||||||
|
self.render(node.text)
|
||||||
|
))
|
||||||
|
elif node.title.startswith(CATEGORY_NAMESPACE):
|
||||||
|
pass # todo: generate category links
|
||||||
|
else:
|
||||||
|
rendered.append('<a href="{}">{}</a>'.format(
|
||||||
|
self.translate_page_title(node.title),
|
||||||
|
self.render(node.text if node.text else node.title)
|
||||||
|
))
|
||||||
|
elif node_type is ExternalLink:
|
||||||
|
rendered.append('<a href="{}">{}</a>'.format(
|
||||||
|
node.url,
|
||||||
|
self.render(node.title if node.title else node.url)
|
||||||
|
))
|
||||||
|
elif node_type is Tag:
|
||||||
|
rendered.append("<{}>{}</{}>".format(
|
||||||
|
self.render(node.tag),
|
||||||
|
self.render(node.contents),
|
||||||
|
self.render(node.tag)
|
||||||
|
))
|
||||||
|
elif node_type is Heading:
|
||||||
|
rendered.append("<h{}>{}</h{}>".format(
|
||||||
|
node.level,
|
||||||
|
self.render(node.title),
|
||||||
|
node.level
|
||||||
|
))
|
||||||
|
elif node_type is Text:
|
||||||
|
rendered.append(node.value)
|
||||||
|
elif node_type is Template: # todo: template substitution
|
||||||
|
rendered.append("<code>{{")
|
||||||
|
rendered.append(node.name)
|
||||||
|
rendered.append(node.params)
|
||||||
|
rendered.append("}}</code>")
|
||||||
|
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
||||||
|
|
||||||
|
def translate_page_title (self, page_title):
|
||||||
|
for namespace, url in INTERWIKI_NAMESPACES.items():
|
||||||
|
if page_title.startswith(namespace):
|
||||||
|
return url.format(page_title[len(namespace):])
|
||||||
|
|
||||||
|
return "{}.html".format(self.reformat_page_title(page_title))
|
||||||
|
|
||||||
|
def translate_image_title (self, page_title):
|
||||||
|
for namespace in FILE_NAMESPACES:
|
||||||
|
if page_title.startswith(namespace):
|
||||||
|
return self.reformat_page_title(page_title[len(FILE_NAMESPACE):])
|
||||||
|
|
||||||
|
def reformat_page_title (self, page_title):
|
||||||
|
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_"))
|
@ -11,4 +11,7 @@ ul.boards { margin-left: 0; padding-left: 0; }
|
|||||||
.label { font-weight: bold }
|
.label { font-weight: bold }
|
||||||
article { border-top: 1px solid black; }
|
article { border-top: 1px solid black; }
|
||||||
section { margin-top: 15px; margin-bottom: 15px; }
|
section { margin-top: 15px; margin-bottom: 15px; }
|
||||||
.next { float: right; }
|
.next { float: right; }
|
||||||
|
|
||||||
|
.page { padding-top: 15px; }
|
||||||
|
.page table { width: 100%; }
|
@ -2,7 +2,7 @@
|
|||||||
Welcome to the <b>Glitch City Laboratories Archives</b>.
|
Welcome to the <b>Glitch City Laboratories Archives</b>.
|
||||||
<h2>Archives</h2>
|
<h2>Archives</h2>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>)</li>
|
<li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>) (<a href="forums.sql.gz">.sql.gz</a>) (<a href="forums.sqlite.gz">.sqlite.gz</a>)</li>
|
||||||
<li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>)</li>
|
<li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>) (<a href="wiki.xml.gz">.xml.gz</a>)</li>
|
||||||
</ul>
|
</ul>
|
||||||
{{>footer}}
|
{{>footer}}
|
@ -1,6 +1,6 @@
|
|||||||
{{>header}}
|
{{>header}}
|
||||||
<h2>{{page.title}}</h2>
|
<h2>{{page.title}}</h2>
|
||||||
<article>
|
<article class="page">
|
||||||
{{text}}
|
{{{text}}}
|
||||||
</article>
|
</article>
|
||||||
{{>footer}}
|
{{>footer}}
|
Loading…
x
Reference in New Issue
Block a user