Rudimentary support for building wiki archives. The content is dumped to html but the wikitext isn't parsed yet.
mwparserfromhell is used for parsing wikitext but it has no support for rendering to HTML so we'll have to build it manually.
This commit is contained in:
parent
3cb08e2d2f
commit
dc0191a04a
@ -8,8 +8,11 @@ This repository contains the tickets, scripts, and documentation for the end of
|
|||||||
#### `deploy_archives`
|
#### `deploy_archives`
|
||||||
Run this once the archives have been built to tar them up and scp them to the server.
|
Run this once the archives have been built to tar them up and scp them to the server.
|
||||||
|
|
||||||
#### Wiki Data
|
#### Wiki Data (`wiki` directory)
|
||||||
##### `find_data`
|
##### `wiki_pages`
|
||||||
|
Not a script, just a listing of all the pages in the wiki (as of the 27 July 2020 lockdown). Use this and Special:Export to create an XML dump of wiki pages and place it in the `wiki` directory.
|
||||||
|
|
||||||
|
##### `find_pages`
|
||||||
Run this locally (it uses the MediaWiki HTTP API). Finds all pages in categories related to Pokemon generations 1 - 4 that have been edited since 31 March 2020.
|
Run this locally (it uses the MediaWiki HTTP API). Finds all pages in categories related to Pokemon generations 1 - 4 that have been edited since 31 March 2020.
|
||||||
|
|
||||||
#### Forum Data (`forum` directory)
|
#### Forum Data (`forum` directory)
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
import os
|
import os
|
||||||
from .forum import Forum
|
from .forum import Forum
|
||||||
|
from .wiki import Wiki
|
||||||
from .archive_generator import ArchiveGenerator
|
from .archive_generator import ArchiveGenerator
|
||||||
|
|
||||||
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
|
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
|
||||||
|
WIKI_DIRECTORY = os.path.join(BASEDIR, "wiki")
|
||||||
TEMPLATES_DIR = os.path.join(BASEDIR, "templates")
|
TEMPLATES_DIR = os.path.join(BASEDIR, "templates")
|
||||||
STATIC_DIR = os.path.join(BASEDIR, "static")
|
STATIC_DIR = os.path.join(BASEDIR, "static")
|
||||||
|
|
||||||
@ -13,5 +15,14 @@ WIKI_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "wiki")
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
forum = Forum(FORUM_DATABASE)
|
forum = Forum(FORUM_DATABASE)
|
||||||
|
|
||||||
|
wiki = None
|
||||||
|
for entry in os.listdir(WIKI_DIRECTORY):
|
||||||
|
if entry.endswith(".xml"):
|
||||||
|
wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
|
||||||
|
|
||||||
generator = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR)
|
generator = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR)
|
||||||
generator.generate_forum(forum, FORUM_ARCHIVES)
|
#generator.generate_forum(forum, FORUM_ARCHIVES)
|
||||||
|
|
||||||
|
if wiki:
|
||||||
|
generator.generate_wiki(wiki, WIKI_ARCHIVES)
|
@ -6,6 +6,9 @@ from datetime import datetime
|
|||||||
import chevron
|
import chevron
|
||||||
import bbcode
|
import bbcode
|
||||||
|
|
||||||
|
from .wiki import NAMESPACES as WIKI_NAMESPACES
|
||||||
|
import mwparserfromhell
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger("ArchiveGenerator")
|
logger = logging.getLogger("ArchiveGenerator")
|
||||||
|
|
||||||
@ -29,6 +32,38 @@ class ArchiveGenerator():
|
|||||||
self.template_dir = template_dir
|
self.template_dir = template_dir
|
||||||
self.static_dir = static_dir
|
self.static_dir = static_dir
|
||||||
|
|
||||||
|
def generate_wiki (self, wiki, out_dir):
|
||||||
|
logger.info("Archiving wiki to %s", out_dir)
|
||||||
|
try:
|
||||||
|
os.makedirs(out_dir)
|
||||||
|
except FileExistsError: pass
|
||||||
|
|
||||||
|
shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
|
||||||
|
renderer = TemplateRenderer(self.template_dir, out_dir)
|
||||||
|
|
||||||
|
for page in wiki.get_pages():
|
||||||
|
if page.redirect:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if page.namespace != WIKI_NAMESPACES['MAIN']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
page_out = "{}.html".format(page.title).replace(" ", "_")
|
||||||
|
base = ""
|
||||||
|
if "/" in page_out:
|
||||||
|
base = "../" * page_out.count("/")
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
|
||||||
|
except FileExistsError: pass
|
||||||
|
|
||||||
|
logger.info("Archiving page %s to %s", page.title, page_out)
|
||||||
|
renderer.render_template_to_file("page", page_out, {
|
||||||
|
"title": " - {}".format(page.title),
|
||||||
|
"page": page,
|
||||||
|
"base": base,
|
||||||
|
"text": mwparserfromhell.parse(page.get_latest().text)
|
||||||
|
})
|
||||||
|
|
||||||
def generate_forum (self, forum, out_dir):
|
def generate_forum (self, forum, out_dir):
|
||||||
logger.info("Archiving forum to %s", out_dir)
|
logger.info("Archiving forum to %s", out_dir)
|
||||||
try:
|
try:
|
||||||
|
74
epilogue/wiki.py
Normal file
74
epilogue/wiki.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
|
NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
|
||||||
|
PAGE_TAG = "{}page".format(NAMESPACE)
|
||||||
|
ID_TAG = "{}id".format(NAMESPACE)
|
||||||
|
TITLE_TAG = "{}title".format(NAMESPACE)
|
||||||
|
REVISION_TAG = "{}revision".format(NAMESPACE)
|
||||||
|
NS_TAG = "{}ns".format(NAMESPACE)
|
||||||
|
REDIRECT_TAG = "{}redirect".format(NAMESPACE)
|
||||||
|
|
||||||
|
TEXT_TAG = "{}text".format(NAMESPACE)
|
||||||
|
FORMAT_TAG = "{}format".format(NAMESPACE)
|
||||||
|
MODEL_TAG = "{}model".format(NAMESPACE)
|
||||||
|
TIMESTAMP_TAG = "{}timestamp".format(NAMESPACE)
|
||||||
|
COMMENT_TAG = "{}comment".format(NAMESPACE)
|
||||||
|
CONTRIBUTOR_TAG = "{}contributor".format(NAMESPACE)
|
||||||
|
|
||||||
|
USERNAME_TAG = "{}username".format(NAMESPACE)
|
||||||
|
|
||||||
|
NAMESPACES = {
|
||||||
|
"MAIN": 0,
|
||||||
|
"TEMPLATE": 10
|
||||||
|
}
|
||||||
|
|
||||||
|
class Wiki():
|
||||||
|
def __init__ (self, xml_path):
|
||||||
|
self.xml_path = xml_path
|
||||||
|
|
||||||
|
def get_pages (self):
|
||||||
|
tree = ElementTree.parse(self.xml_path)
|
||||||
|
return (Page(element) for element in tree.getroot() if element.tag == PAGE_TAG)
|
||||||
|
|
||||||
|
class Page():
|
||||||
|
def __init__ (self, element):
|
||||||
|
self.redirect = None
|
||||||
|
self.revisions = []
|
||||||
|
for child in element:
|
||||||
|
if child.tag == ID_TAG:
|
||||||
|
self.id = child.text
|
||||||
|
elif child.tag == NS_TAG:
|
||||||
|
self.namespace = int(child.text)
|
||||||
|
elif child.tag == TITLE_TAG:
|
||||||
|
self.title = child.text
|
||||||
|
elif child.tag == REVISION_TAG:
|
||||||
|
self.revisions.append(Revision(child))
|
||||||
|
elif child.tag == REDIRECT_TAG:
|
||||||
|
self.redirect = child.attrib['title']
|
||||||
|
|
||||||
|
def get_latest (self):
|
||||||
|
return self.revisions[0]
|
||||||
|
|
||||||
|
class Revision():
|
||||||
|
def __init__ (self, element):
|
||||||
|
for child in element:
|
||||||
|
if child.tag == ID_TAG:
|
||||||
|
self.id = child.text
|
||||||
|
elif child.tag == TEXT_TAG:
|
||||||
|
self.text = child.text
|
||||||
|
elif child.tag == CONTRIBUTOR_TAG:
|
||||||
|
self.contributor = Contributor(child)
|
||||||
|
elif child.tag == TIMESTAMP_TAG:
|
||||||
|
self.timestamp = child.text
|
||||||
|
elif child.tag == MODEL_TAG:
|
||||||
|
self.model = child.text
|
||||||
|
elif child.tag == COMMENT_TAG:
|
||||||
|
self.comment = child.text
|
||||||
|
|
||||||
|
class Contributor():
|
||||||
|
def __init__ (self, element):
|
||||||
|
for child in element:
|
||||||
|
if child.tag == ID_TAG:
|
||||||
|
self.id = child.text
|
||||||
|
elif child.tag == USERNAME_TAG:
|
||||||
|
self.username = child.text
|
2
setup.py
2
setup.py
@ -8,7 +8,7 @@ setup(
|
|||||||
description='Tools for exporting and creating archives of Glitch City Labs data',
|
description='Tools for exporting and creating archives of Glitch City Labs data',
|
||||||
author='Adrian Kuschelyagi Malacoda',
|
author='Adrian Kuschelyagi Malacoda',
|
||||||
packages=['epilogue'],
|
packages=['epilogue'],
|
||||||
install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1', 'bbcode >= 1.1.0'],
|
install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1', 'bbcode >= 1.1.0', 'mwparserfromhell >= 0.5.4'],
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'epilogue = epilogue:main'
|
'epilogue = epilogue:main'
|
||||||
|
6
templates/page.mustache
Normal file
6
templates/page.mustache
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{{>header}}
|
||||||
|
<h2>{{page.title}}</h2>
|
||||||
|
<article>
|
||||||
|
{{text}}
|
||||||
|
</article>
|
||||||
|
{{>footer}}
|
5593
wiki/wiki_pages
Normal file
5593
wiki/wiki_pages
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user