Bolden names of successor websites.

Add link to GCW on the archive index page.
Prefix image paths with images subdirectory (which does not yet exist)
2020-09-20 05:48:15 -05:00 · 2020-09-20 05:47:35 -05:00 · 2020-09-20 05:45:49 -05:00 · 2020-09-20 05:40:57 -05:00 · 2020-09-20 05:31:55 -05:00 · 2020-09-20 05:28:37 -05:00
25 changed files with 6311 additions and 68 deletions
--- a/README.md
+++ b/README.md
@@ -5,10 +5,14 @@ This repository contains the tickets, scripts, and documentation for the end of
 `pip install -e .`

 ## Standalone Epilogue Scripts
-Run these before running commands from the epilogue package.
+#### `deploy_archives`
+Run this once the archives have been built to tar them up and scp them to the server.

-#### Wiki Data
-##### `find_data`
+#### Wiki Data (`wiki` directory)
+##### `wiki_pages`
+Not a script, just a listing of all the pages in the wiki (as of the 27 July 2020 lockdown). Use this and Special:Export to create an XML dump of wiki pages and place it in the `wiki` directory.
+
+##### `find_pages`
 Run this locally (it uses the MediaWiki HTTP API). Finds all pages in categories related to Pokemon generations 1 - 4 that have been edited since 31 March 2020.

 #### Forum Data (`forum` directory)
--- a/20
+++ b/20
@@ -0,0 +1,20 @@
+#!/bin/sh -x
+HOSTNAME=glitchcity.info
+ARCHIVE_PATH=/var/www/html/gclarchives
+
+cd archives
+scp index.html style.css $HOSTNAME:$ARCHIVE_PATH
+
+if [ -d "forums" ]; then
+    cat ../forum/structure.sql ../forum/categories.sql ../forum/boards.sql ../forum/threads.sql ../forum/misc_data.sql > forums.sql
+    cp ../forum/forum.sqlite forums.sqlite # forum or forums?
+    tar -cf forums.tar forums && gzip -f forums.tar forums.sqlite forums.sql
+    scp forums.sql.gz forums.sqlite.gz forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
+    ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz"
+fi;
+
+if [ -d "wiki" ]; then
+    tar -cf wiki.tar wiki && gzip -f wiki.tar wiki.xml
+    scp wiki.xml.gz wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
+    ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz"
+fi;
--- a/epilogue/init.py
+++ b/epilogue/init.py
@@ -1,17 +1,44 @@
 import os
 from .forum import Forum
+from .wiki import Wiki
 from .archive_generator import ArchiveGenerator

+import sys
+import shutil
+
 BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
+WIKI_DIRECTORY = os.path.join(BASEDIR, "wiki")
 TEMPLATES_DIR = os.path.join(BASEDIR, "templates")
 STATIC_DIR = os.path.join(BASEDIR, "static")

+ARCHIVE_GENERATOR = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR)
+
 ARCHIVES_BASEDIR = "archives"
 FORUM_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "forums")
 WIKI_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "wiki")

+DEFAULT_ARGUMENTS = ["wiki", "forum"]
+
 def main():
-    forum = Forum(FORUM_DATABASE)
-    generator = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR)
-    generator.generate_forum(forum, FORUM_ARCHIVES)
+    args = sys.argv[1:]
+    if not args:
+        args = DEFAULT_ARGUMENTS
+
+    ARCHIVE_GENERATOR.generate_index(ARCHIVES_BASEDIR)
+
+    if "forum" in args or "forums" in args:
+        ARCHIVE_GENERATOR.generate_forum(Forum(FORUM_DATABASE), FORUM_ARCHIVES)
+
+    if "wiki" in args:
+        archive_wiki()
+
+def archive_wiki():
+    wiki = None
+    for entry in os.listdir(WIKI_DIRECTORY):
+        if entry.endswith(".xml"):
+            wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
+
+    if wiki:
+        shutil.copyfile(wiki.xml_path, os.path.join(ARCHIVES_BASEDIR, "wiki.xml"))        
+        ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES)
--- a/epilogue/archive_generator.py
+++ b/epilogue/archive_generator.py
@@ -1,28 +1,171 @@
 import os
-import chevron
 import logging
 import shutil
-from datetime import datetime
+import math
+import json
+import gzip
+
+from itertools import chain
+from traceback import print_exc
+
+import chevron
+import bbcode
+import html
+
+from .forum import DEFAULT_POSTS_PER_PAGE
+from .wiki import Template, Renderer, Linker, NAMESPACES as WIKI_NAMESPACES

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("ArchiveGenerator")

-def format_datetime (timestamp):
-    return datetime.fromtimestamp(timestamp)
+DEX_LANGUAGES = ["", "DE", "ES", "FR", "IT", "JP", "KO"]
+DEX_TYPES = [
+    "GlitchDex", "AttackDex", "DexDex", "AreaDex", "TrainerDex", "FieldMoveDex", "ItemDex", "FamilyDex", "DecDex", "DayDex",
+    "MDIGlitchDex", "MetascriptDex", "TMHMDex", "StatDex", "PosterDex", "TypeDex", "UnownDex", "DollDex", "DefaultNameDex",
+    "BattleTypeDe", "BadgeDescriptionDex", "FacingDex"
+]
+DEXES = list(chain.from_iterable([["{}{}".format(dex_type, language) for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
+
+FORUM_THREAD_INDEX = "thread_index.json.gz"
+IMAGE_DIRECTORY = "images"
+
+class ArchiveLinker(Linker):
+    def __init__ (self, directory_names=[]):
+        super().__init__()
+        self.directory_names = directory_names
+        self.image_directory = IMAGE_DIRECTORY
+        self.replacements = {
+            "/": "+",
+            #":": ""
+        }
+
+    def translate_page_title (self, page_title):
+        page_title = super().translate_page_title(page_title)
+        fragment = ""
+
+        if "#" in page_title:
+            fragment = page_title[page_title.find("#"):]
+            page_title = page_title[:-len(fragment)]
+
+        directory_name = ""
+        for name in self.directory_names:
+            if page_title.startswith("{}/".format(name)):
+                directory_name = name
+                page_title = page_title[len(directory_name) + 1:]
+                break
+
+        for key, value in self.replacements.items():
+            page_title = page_title.replace(key, value)
+
+        return "{}{}{}.html{}".format(directory_name, '/' if directory_name else '', page_title, fragment)
+
+    def translate_image_title (self, page_title):
+        image_title = super().translate_image_title(page_title)
+        if not image_title:
+            return
+        
+        return "{}/{}".format(self.image_directory, image_title)

 def prepare_thread (thread):
-    thread = dict(thread)
-    thread['datetime'] = format_datetime(thread['poster_time'])
+    thread.subject = html.unescape(thread.subject)
    return thread

 def prepare_post (post):
-    return prepare_thread(post)
+    post = prepare_thread(post)
+    parser = bbcode.Parser()
+    post.body = html.unescape(parser.format(post.body))
+    return post

 class ArchiveGenerator():
    def __init__ (self, template_dir, static_dir):
        self.template_dir = template_dir
        self.static_dir = static_dir
    
+    def generate_index (self, out_dir):
+        logger.info("Generating index page at %s", out_dir)
+        try:
+            os.makedirs(out_dir)
+        except FileExistsError: pass
+
+        shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
+        renderer = TemplateRenderer(self.template_dir, out_dir)
+        renderer.render_template_to_file("index", "index.html", {})
+
+    def generate_wiki (self, wiki, out_dir):
+        logger.info("Archiving wiki to %s", out_dir)
+        try:
+            os.makedirs(out_dir)
+        except FileExistsError: pass
+
+        shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
+        renderer = TemplateRenderer(self.template_dir, out_dir)
+        renderer.render_template_to_file("redirect", "index.html", {
+            "target": "Main_Page.html"
+        })
+
+        categories = {}
+        templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
+        linker = ArchiveLinker(directory_names=DEXES)
+        wikitext_renderer = Renderer(templates, linker)
+        for page in wiki.get_pages():
+            try:
+                if page.namespace != WIKI_NAMESPACES['MAIN']:
+                    continue
+
+                page_out = linker.translate_page_title(page.title)
+                base = "./"
+                if "/" in page_out:
+                    base = "../" * page_out.count("/")
+                    try:
+                        os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
+                    except FileExistsError: pass
+
+                if page.redirect:
+                    logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
+                    renderer.render_template_to_file("redirect", page_out, {
+                        "target": "{}{}".format(base, linker.translate_page_title(page.redirect))
+                    })
+                else:
+                    logger.info("Archiving page %s to %s", page.title, page_out)
+                    (rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, base, page=page)
+
+                    for category in page_categories:
+                        if not category in categories:
+                            categories[category] = []
+                        
+                        categories[category].append({
+                            "url": page_out,
+                            "title": page.title
+                        })
+
+                    renderer.render_template_to_file("page", page_out, {
+                        "title": " - {}".format(page.title),
+                        "pagename": page.title,
+                        "page": page,
+                        "base": base,
+                        "text": rendered
+                    })
+            except Exception as e:
+                logger.error("Error encountered when archiving %s: %s", page.title, e)
+                print_exc()
+                if isinstance(e, ValueError):
+                    raise e
+
+        for category, pages in categories.items():
+            category_out = "Category:{}".format(linker.translate_page_title(category))
+            logger.info("Archiving category %s to %s", category, category_out)
+
+            try:
+                renderer.render_template_to_file("category", category_out, {
+                    "title": " - {}".format(category),
+                    "pagename": "Category:{}".format(category),
+                    "category": category,
+                    "pages": pages
+                })
+            except Exception as e:
+                logger.error("Error encountered when archiving %s: %s", category, e)
+                print_exc()
+
    def generate_forum (self, forum, out_dir):
        logger.info("Archiving forum to %s", out_dir)
        try:
@@ -35,20 +178,25 @@ class ArchiveGenerator():
            "categories": forum.get_board_tree()
        })

+        threads = []
        for board in forum.get_boards():
-            self.generate_forum_board(forum, board, out_dir)
+            forum_threads = forum.get_threads_in_board(board)
+            threads = threads + forum_threads
+            self.generate_forum_board(forum, board, forum_threads, out_dir)

-    def generate_forum_board (self, forum, board, out_dir):
-        board_out_dir = os.path.join(out_dir, "board-{}".format(board['id_board']))
-        logger.info("Archiving board %s to %s", board['name'], board_out_dir)
+        self.generate_thread_index(threads, os.path.join(out_dir, FORUM_THREAD_INDEX))
+
+    def generate_forum_board (self, forum, board, threads, out_dir):
+        board_out_dir = os.path.join(out_dir, "board-{}".format(board.id))
+        logger.info("Archiving board %s to %s", board.name, board_out_dir)
        try:
            os.makedirs(board_out_dir)
        except FileExistsError: pass

        renderer = TemplateRenderer(self.template_dir, board_out_dir)
-        threads = [prepare_thread(thread) for thread in forum.get_threads_in_board(board)]
+        threads = [prepare_thread(thread) for thread in threads]
        renderer.render_template_to_file("threads", "index.html", {
-            "title": " - {}".format(board['name']),
+            "title": " - {}".format(board.name),
            "base": "../",
            "board": board,
            "threads": threads
@@ -58,34 +206,46 @@ class ArchiveGenerator():
            self.generate_forum_thread(forum, board, thread, board_out_dir)

    def generate_forum_thread (self, forum, board, thread, out_dir):
-        thread_out_dir = os.path.join(out_dir, "thread-{}".format(thread['id_topic']))
-        logger.info("Archiving thread %s to %s", thread['subject'], thread_out_dir)
+        thread_out_dir = os.path.join(out_dir, "thread-{}".format(thread.id))
+        logger.info("Archiving thread %s to %s", thread.subject, thread_out_dir)
        try:
            os.makedirs(thread_out_dir)
        except FileExistsError: pass

        renderer = TemplateRenderer(self.template_dir, thread_out_dir)
-        renderer.render_template_to_file("page-0-redirect", "index.html")
+        renderer.render_template_to_file("redirect", "index.html", {
+            "target": "page-0.html"
+        })

+        total_pages = math.ceil((thread.num_replies + 1) / DEFAULT_POSTS_PER_PAGE)
+        page_links = [{"label": page + 1, "link": "page-{}.html".format(page)} for page in range(total_pages)]
        page = 0
        while True:
            posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
            if len(posts) < 1:
                break
            
-            logger.info("Archiving page %s of thread %s", page, thread['subject'])
+            logger.info("Archiving page %s of thread %s", page, thread.subject)
            renderer.render_template_to_file("posts", "page-{}.html".format(page), {
-                "title": " - {} - Page {}".format(thread['subject'], page + 1),
+                "title": " - {} - Page {}".format(thread.subject, page + 1),
                "base": "../../",
                "board": board,
                "thread": thread,
                "page": page,
                "next": page + 1,
+                "page_links": page_links,
                "prev": page - 1,
                "posts": posts
            })
            page = page + 1

+    def generate_thread_index (self,threads, out_path):
+#        with open(out_path, "wb") as out:
+#            pickle.dump({thread.id: {"parent": thread.parent} for thread in threads}, out, protocol=4)        
+        threads = {thread.id: {"parent": thread.parent} for thread in threads}
+        with gzip.open(out_path, "w") as out:
+            out.write(json.dumps(threads).encode())
+
 class TemplateRenderer():
    def __init__ (self, template_dir, out_dir):
        self.template_dir = template_dir
--- a/epilogue/forum.py
+++ b/epilogue/forum.py
@@ -1,8 +1,15 @@
 import sqlite3
+from datetime import datetime

 PREFIX = "smf_"
-GET_BOARDS = "SELECT * FROM `{}boards`".format(PREFIX)
-GET_CATEGORIES = "SELECT * FROM `{}categories`".format(PREFIX)
+GET_BOARDS = """
+    SELECT * FROM `{}boards`
+    ORDER BY `board_order` ASC
+""".format(PREFIX)
+GET_CATEGORIES = """
+    SELECT * FROM `{}categories`
+    ORDER BY `cat_order` ASC
+    """.format(PREFIX)
 GET_THREADS = """
    SELECT * FROM `{}topics` AS `topics`, `{}messages` AS `messages`
    WHERE `topics`.`id_board`=? AND `messages`.`id_msg`=`topics`.`id_first_msg`
@@ -16,42 +23,82 @@ GET_POSTS = """
    LIMIT ? OFFSET ?
 """.format(PREFIX)

+DEFAULT_POSTS_PER_PAGE = 15
+DEFAULT_THREADS_PER_PAGE = 2000
+
+def fix_encoding (string):
+    return string.encode("latin1", errors="ignore").decode(errors="ignore")
+
 class Forum():
    def __init__ (self, db_path):
        self.connection = sqlite3.connect(db_path)
        self.connection.row_factory = sqlite3.Row

    def get_board_tree (self):
-        categories = [dict(category) for category in self.get_categories()]
-        boards = [dict(board) for board in self.get_boards()]
+        categories = self.get_categories()
+        boards = self.get_boards()
        for category in categories:
-            category['children'] = [board for board in boards if board['id_cat'] == category['id_cat']]
+            category.children = [child for child in boards if child.category == category.id and child.child_level == 0]
        for board in boards:
-            board['children'] = [board for board in boards if board['id_parent'] == board['id_board']]
+            board.children = [child for child in boards if child.parent_board == board.id]
        return categories

    def get_categories (self):
        cursor = self.connection.cursor()
        cursor.execute(GET_CATEGORIES)
-        return cursor.fetchall()
+        return [Category(category) for category in cursor.fetchall()]

    def get_boards (self):
        cursor = self.connection.cursor()
        cursor.execute(GET_BOARDS)
-        return cursor.fetchall()
+        return [Board(board) for board in cursor.fetchall()]

-    def get_threads_in_board (self, board, page=0, per_page=2000):
+    def get_threads_in_board (self, board, page=0, per_page=DEFAULT_THREADS_PER_PAGE):
        try:
-            board = board['id_board']
+            board = board.id
        except ValueError: pass
        cursor = self.connection.cursor()
        cursor.execute(GET_THREADS, (board, per_page, page * per_page))
-        return cursor.fetchall()
+        return [Thread(thread) for thread in cursor.fetchall()]

-    def get_posts_in_thread (self, thread, page=0, per_page=15):
+    def get_posts_in_thread (self, thread, page=0, per_page=DEFAULT_POSTS_PER_PAGE):
        try:
-            thread = thread['id_topic']
+            thread = thread.id
        except ValueError: pass
        cursor = self.connection.cursor()
        cursor.execute(GET_POSTS, (thread, per_page, page * per_page))
-        return cursor.fetchall()
+        return [Post(post) for post in cursor.fetchall()]
+
+class Category():
+    def __init__ (self, row):
+        self.id = row['id_cat']
+        self.name = fix_encoding(row['name'])
+        self.children = []
+
+class Board():
+    def __init__ (self, row):
+        self.id = row['id_board']
+        self.category = row['id_cat']
+        self.parent_board = row['id_parent']
+        self.child_level = row['child_level']
+        self.name = fix_encoding(row['name'])
+        self.description = fix_encoding(row['description'])
+        self.children = []
+
+class Thread():
+    def __init__ (self, row):
+        self.id = row['id_topic']
+        self.parent = row['id_board']
+        self.datetime = datetime.fromtimestamp(row['poster_time'])
+        self.subject = fix_encoding(row['subject'])
+        self.poster_name = fix_encoding(row['poster_name'])
+        self.num_replies = row['num_replies']
+
+class Post():
+    def __init__ (self, row):
+        self.id = row['id_msg']
+        self.parent = row['id_topic']
+        self.datetime = datetime.fromtimestamp(row['poster_time'])
+        self.subject = fix_encoding(row['subject'])
+        self.body = fix_encoding(row['body'])
+        self.poster_name = fix_encoding(row['poster_name'])
--- a/epilogue/redirector.py
+++ b/epilogue/redirector.py
@@ -0,0 +1,98 @@
+import argparse
+import gzip
+import urllib.request
+import json
+
+from .archive_generator import ArchiveLinker, DEXES, FORUM_THREAD_INDEX
+
+from flask import Flask, redirect, request
+app = Flask(__name__)
+
+DEFAULT_ARCHIVES_DOMAIN = "https://archives.glitchcity.info/"
+DEFAULT_FORUMS_ARCHIVE = "{}forums".format(DEFAULT_ARCHIVES_DOMAIN)
+DEFAULT_WIKI_ARCHIVE = "{}wiki".format(DEFAULT_ARCHIVES_DOMAIN)
+
+## Wiki redirector
+@app.route("/wiki/")
+def redirect_wiki_main ():
+    return redirect_wiki("Main Page")
+
+@app.route("/wiki/<path:path>")
+def redirect_wiki (path):
+    return redirect(make_wiki_url(path))
+
+def make_wiki_url (path):
+    if path.endswith("/"):
+        path = path[:-1]
+
+    return app.args.wiki_archive + app.wiki_linker.translate_page_title(path)
+
+## Forum redirector
+@app.route('/forums/')
+def redirect_forums_index ():
+    return redirect_forums("")
+
+@app.route('/forums/<path:path>')
+def redirect_forums (path):
+    return redirect(make_forum_url(request))
+
+def make_forum_url (request):
+    thread_id = request.args.get("topic", None)
+    board_id = request.args.get("board", None)
+    post_id = None
+
+    if thread_id:
+        thread_id = strip_extension(thread_id)
+        
+        if "." in thread_id:
+            (thread_id, post_id) = thread_id.split(".")
+            post_id = post_id[len("msg"):] 
+
+        if not board_id:
+            board_id = app.thread_index[thread_id]['parent']
+
+    try:
+        if "." in board_id:
+            board_id = board_id.split(".")[0]
+    except TypeError: pass
+
+    url = app.args.forums_archive
+
+    if board_id:
+        url = url + "board-{}".format(board_id)
+
+    if thread_id:
+        url = url + "/thread-{}".format(thread_id)
+
+    if not url.endswith("/"):
+        url = url + "/"
+
+    return url
+
+def strip_extension (item):
+    for extension in [".html"]:
+        if item.endswith(extension):
+            item = item[:-len(extension)]
+    return item
+
+def read_thread_index (forums_archive):
+    with urllib.request.urlopen("{}{}".format(forums_archive, FORUM_THREAD_INDEX)) as gzipped_in:
+        data = gzipped_in.read()
+        return json.loads(gzip.decompress(data).decode())
+
+def main ():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--wiki-archive",  help="URL to wiki archive", default=DEFAULT_WIKI_ARCHIVE)
+    parser.add_argument("--forums-archive",  help="URL to forums archive", default=DEFAULT_FORUMS_ARCHIVE)
+    args = parser.parse_args()
+
+    if not args.wiki_archive.endswith("/"):
+        args.wiki_archive = args.wiki_archive + "/"
+
+    if not args.forums_archive.endswith("/"):
+        args.forums_archive = args.forums_archive + "/"
+
+    app.args = args
+    app.thread_index = read_thread_index(args.forums_archive)
+    app.wiki_linker = ArchiveLinker(directory_names=DEXES)
+    app.run()
--- a/epilogue/wiki.py
+++ b/epilogue/wiki.py
@@ -0,0 +1,228 @@
+from xml.etree import ElementTree
+
+import mwparserfromhell
+from mwparserfromhell.nodes import Wikilink, Comment, ExternalLink, Heading, Tag, Template, Text
+
+NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
+PAGE_TAG = "{}page".format(NAMESPACE)
+ID_TAG = "{}id".format(NAMESPACE)
+TITLE_TAG = "{}title".format(NAMESPACE)
+REVISION_TAG = "{}revision".format(NAMESPACE)
+NS_TAG = "{}ns".format(NAMESPACE)
+REDIRECT_TAG = "{}redirect".format(NAMESPACE)
+
+TEXT_TAG = "{}text".format(NAMESPACE)
+FORMAT_TAG = "{}format".format(NAMESPACE)
+MODEL_TAG = "{}model".format(NAMESPACE)
+TIMESTAMP_TAG = "{}timestamp".format(NAMESPACE)
+COMMENT_TAG = "{}comment".format(NAMESPACE)
+CONTRIBUTOR_TAG = "{}contributor".format(NAMESPACE)
+
+USERNAME_TAG = "{}username".format(NAMESPACE)
+
+NAMESPACES = {
+    "MAIN": 0,
+    "TEMPLATE": 10
+}
+
+INTERWIKI_NAMESPACES = {
+    "bp:": "https://bulbapedia.bulbagarden.net/wiki/{}",
+    "wikipedia:": "https://en.wikipedia.org/wiki/{}"
+}
+
+FILE_NAMESPACES = ["File:", "Image:"]
+CATEGORY_NAMESPACE = "Category:"
+CATEGORY_LINK_NAMESPACE = ":{}".format(CATEGORY_NAMESPACE)
+
+class Wiki():
+    def __init__ (self, xml_path):
+        self.xml_path = xml_path
+
+    def get_pages (self):
+        tree = ElementTree.parse(self.xml_path)
+        return (Page(element) for element in tree.getroot() if element.tag == PAGE_TAG)
+
+class Page():
+    def __init__ (self, element):
+        self.redirect = None
+        self.revisions = []
+        for child in element:
+            if child.tag == ID_TAG:
+                self.id = child.text
+            elif child.tag == NS_TAG:
+                self.namespace = int(child.text)
+            elif child.tag == TITLE_TAG:
+                self.title = child.text
+            elif child.tag == REVISION_TAG:
+                self.revisions.append(Revision(child))
+            elif child.tag == REDIRECT_TAG:
+                self.redirect = child.attrib['title']
+
+    def get_latest (self):
+        return self.revisions[0]
+
+class Revision():
+    def __init__ (self, element):
+        for child in element:
+            if child.tag == ID_TAG:
+                self.id = child.text
+            elif child.tag == TEXT_TAG:
+                self.text = child.text
+            elif child.tag == CONTRIBUTOR_TAG:
+                self.contributor = Contributor(child)
+            elif child.tag == TIMESTAMP_TAG:
+                self.timestamp = child.text
+            elif child.tag == MODEL_TAG:
+                self.model = child.text
+            elif child.tag == COMMENT_TAG:
+                self.comment = child.text
+
+class Contributor():
+    def __init__ (self, element):
+        for child in element:
+            if child.tag == ID_TAG:
+                self.id = child.text
+            elif child.tag == USERNAME_TAG:
+                self.username = child.text
+
+class Renderer():
+    def __init__ (self, templates={}, linker=None):
+        self.templates = templates
+        self.linker = linker if linker else Linker()
+
+    def render (self, wikitext, base="", *args, **kwargs):
+        categories = []
+        wikitext = self.transclude_templates(wikitext, *args, **kwargs)
+        
+        # parse out categories
+        for link in wikitext.ifilter_wikilinks():
+            if not link.title.startswith(CATEGORY_NAMESPACE):
+                continue
+
+            wikitext.remove(link)
+            categories.append(link.title[len(CATEGORY_NAMESPACE):])
+        
+        rendered = [render(wikitext, base, self.linker)]
+        if categories:
+            rendered.append('<h2>Categories</h2><ul class="categories">')
+            for category in categories:
+                rendered.append('<li><a href="{}Category:{}">{}</a></li>'.format(
+                    base,
+                    self.linker.translate_page_title(category),
+                    category
+                ))
+            rendered.append("</ul>")
+        
+        return ("".join(rendered), categories)
+
+    def transclude_templates (self, wikitext, *args, **kwargs):
+        wikitext = mwparserfromhell.parse(wikitext)
+        for inclusion in wikitext.ifilter_templates():
+            template_key = str(inclusion.name)
+            template = self.templates.get(template_key, self.templates.get(template_key[0].upper() + template_key[1:], None))
+            result = None
+            if template:
+                result = template(inclusion, *args, **kwargs)
+            else:
+                result = "<span class='unknown-template'>Template:{0}</span>".format(inclusion.name)
+            
+            try:
+                wikitext.replace(inclusion, result) #self.transclude_templates(result))
+            except ValueError: pass
+        return wikitext
+
+def render (wikitext, base="", linker=None):
+    rendered = []
+
+    if not linker:
+        linker = Linker()
+
+    for node in wikitext.ifilter(False):
+        # node types:
+        # https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
+        node_type = type(node)
+        if node_type is Wikilink:
+            image_name = linker.translate_image_title(node.title)
+            if image_name:
+                rendered.append('<img src="{}{}" />'.format(
+                    base,
+                    image_name,
+                    render(mwparserfromhell.parse(node.text), base, linker)
+                ))
+            else:
+                url = linker.translate_interwiki_title(node.title)
+                if not url:
+                    url = "{}{}".format(base, linker.translate_page_title(node.title))
+                
+                rendered.append('<a href="{}">{}</a>'.format(
+                    url,
+                    render(node.text if node.text else node.title, base, linker)
+                ))
+        elif node_type is ExternalLink:
+            rendered.append('<a href="{}">{}</a>'.format(
+                node.url,
+                render(node.title if node.title else node.url)
+            ))
+        elif node_type is Tag:
+            rendered.append("<{}>{}</{}>".format(
+                render(node.tag),
+                render(node.contents, base, linker),
+                render(node.tag)
+            ))
+        elif node_type is Heading:
+            rendered.append('<h{} id="{}">{}</h{}>'.format(
+                node.level,
+                reformat_page_title(node.title),
+                render(node.title, base, linker),
+                node.level
+            ))
+        elif node_type is Text:
+            rendered.append(node.value)
+
+    return "".join(rendered).strip().replace("\n\n", "<br /><br />")
+
+class Linker():
+    def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES):
+        self.file_namespaces = file_namespaces
+        self.interwiki_namespaces = interwiki_namespaces
+
+    def translate_interwiki_title (self, page_title):
+        for namespace, url in self.interwiki_namespaces.items():
+            if page_title.startswith(namespace):
+                return url.format(page_title[len(namespace):])
+
+    def translate_page_title (self, page_title):
+        if page_title.startswith(CATEGORY_LINK_NAMESPACE):
+            page_title = page_title[1:]
+
+        return reformat_page_title(page_title)
+
+    def translate_image_title (self, page_title):
+        for namespace in self.file_namespaces:
+            if page_title.startswith(namespace):
+                return reformat_page_title(page_title[len(namespace):])
+
+def reformat_page_title (page_title):
+    if not page_title:
+        return ""
+    
+    return "{}{}".format(page_title[0].upper(), page_title[1:].replace(' ', '_'))
+
+class Template():
+    def __init__ (self, wikicode):
+        self.wikicode = mwparserfromhell.parse(wikicode)
+        for tag in self.wikicode.ifilter_tags():
+            if tag.tag == "noinclude":
+                self.wikicode.remove(tag)
+
+    def __call__ (self, inclusion, *args, **kwargs):
+        parsed_wikicode = mwparserfromhell.parse(self.wikicode)
+        for argument in parsed_wikicode.ifilter_arguments():
+            value = argument.default if argument.default else argument.name
+            if inclusion.has(argument.name):
+                value = inclusion.get(argument.name)
+            
+            try:
+                parsed_wikicode.replace(argument, value)
+            except ValueError: pass
+        return parsed_wikicode
--- a/forum/dump_forum_data
+++ b/forum/dump_forum_data
@@ -68,6 +68,7 @@ TOPICS_DUMP = "threads.sql"
 # Categories we are not interested in archiving.
 # `id_cat` in (1, 2)
 DO_NOT_ARCHIVE_CATEGORIES = [
+    7,  # Links
    12, # Epsilon: ?????
    6,  # Sigma: Higher Access
    8   # Omega: Garbage
@@ -76,6 +77,7 @@ DO_NOT_ARCHIVE_CATEGORIES = [
 # Boards we are not interested in archiving.
 # `id_board` in (1, 2)
 DO_NOT_ARCHIVE_BOARDS = [
+    24, 94, 118, 121, # Links
    40,               # Exclusive Board
    65,               # Requests for Moderatorship
    66,               # Requests for Membership+
@@ -84,7 +86,10 @@ DO_NOT_ARCHIVE_BOARDS = [
    22,               # Admins Only Board
    89,               # Test Board
    86,               # Omega Archives
-    51, 37, 79, 26, 47, 44, 99, 93, 119, 96,
+    51, 37, 79, 26, 47, 44, 45, 99, 93, 119, 96,
+    62, 60, 80, 84,   # Submit-A-Glitch Archives
+    3, 4, 5, 57, 58, 59, 38, 54, 63, 64,
+    68, 69, 70, 81, 82, 83,
    28,               # The Dumpster Out Back
    123               # ?????
 ]
--- a/setup.py
+++ b/setup.py
@@ -8,10 +8,11 @@ setup(
    description='Tools for exporting and creating archives of Glitch City Labs data',
    author='Adrian Kuschelyagi Malacoda',
    packages=['epilogue'],
-    install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1'],
+    install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1', 'bbcode >= 1.1.0', 'mwparserfromhell >= 0.5.4', 'flask >= 1.1.2'],
    entry_points={
        'console_scripts': [
-            'epilogue = epilogue:main'
+            'epilogue = epilogue:main',
+            'gclredirector = epilogue.redirector:main'
        ]
    }
 )
--- a/static/style.css
+++ b/static/style.css
@@ -1,12 +1,21 @@
 body { font-family: monospace; }

+ul.boards { margin-left: 0; padding-left: 0; }
+.board { margin-bottom: 5px; }
 .category, .board { list-style-type: none;}
 .category .name, .board .name { font-weight: bold; }
-.board .board { margin-left: 10px; }
+.board .board { margin-left: 15px; }

 #threads { width: 100%; }

 .label { font-weight: bold }
 article { border-top: 1px solid black; }
 section { margin-top: 15px; margin-bottom: 15px; }
+
 .next { float: right; }
+.pagination { margin-bottom: 10px; }
+.pagination ul { list-style-type: none; margin-left: 0; padding-left: 0; display: inline; }
+.pagination li { display: inline; }
+
+.page { padding-top: 15px; }
+.page table { width: 100%; }
--- a/templates/boards.mustache
+++ b/templates/boards.mustache
@@ -1,4 +1,5 @@
 {{>header}}
+{{>forums_notice}}
 {{#categories}}
    <h2 class="category-name">{{name}}</h2>
    {{>child_boards}}
--- a/templates/category.mustache
+++ b/templates/category.mustache
@@ -0,0 +1,9 @@
+{{>header}}
+{{>wiki_notice}}
+<h2>{{pagename}}</h2>
+<ul>
+    {{#pages}}
+    <li><a href="{{url}}">{{title}}</a></li>
+    {{/pages}}
+</ul>
+{{>footer}}
--- a/templates/index.mustache
+++ b/templates/index.mustache
@@ -0,0 +1,11 @@
+{{>header}}
+Welcome to the <b>Glitch City Laboratories Archives</b>.
+<p>Glitch City Laboratories was a Pok&eacute;mon glitch website that existed from March 2006 to September 2020 (<a href="forums/board-2/thread-9114/page-0.html">announcement of closure</a>). This is an <b>archive</b> of content from the website prior to its closure.</p>
+<p>Further development and discussion is happening at <b><a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a></b>, the successor community.</p>
+<p>The <b><a href="https://glitchcity.wiki/">Glitch City Wiki</a></b> is the continuation of the Glitch City Laboratories wiki.</p>
+<h2>Archives</h2>
+<ul>
+    <li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>) (<a href="forums.sql.gz">.sql.gz</a>) (<a href="forums.sqlite.gz">.sqlite.gz</a>)</li>
+    <li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>) (<a href="wiki.xml.gz">.xml.gz</a>)</li>
+</ul>
+{{>footer}}
--- a/templates/page-0-redirect.mustache
+++ b/templates/page-0-redirect.mustache
@@ -1,4 +0,0 @@
-<html>
-    <head><meta http-equiv="refresh" content="0; url=page-0.html" /></head>
-    <body><p><a href="page-0.html">Redirect</a></p></body>
-</html>
--- a/templates/page.mustache
+++ b/templates/page.mustache
@@ -0,0 +1,7 @@
+{{>header}}
+{{>wiki_notice}}
+<h2>{{page.title}}</h2>
+<article class="page">
+    {{{text}}}
+</article>
+{{>footer}}
--- a/templates/partials/child_boards.mustache
+++ b/templates/partials/child_boards.mustache
@@ -1,7 +1,9 @@
+<ul class="boards">
    {{#children}}
    <li class="board">
-    <div class="name"><a href="board-{{id_board}}">{{name}}</a></div>
-    <div class="description">{{description}}</div>
+        <div class="name"><a href="board-{{id}}">{{name}}</a></div>
+        <div class="description">{{{description}}}</div>
        {{>child_boards}}
    </li>
    {{/children}}
+</ul>
--- a/templates/partials/forums_notice.mustache
+++ b/templates/partials/forums_notice.mustache
@@ -0,0 +1,5 @@
+<div class="notice">
+<p>Glitch City Laboratories closed on 1 September 2020 (<a href="{{base}}board-2/thread-9114/page-0.html">announcement</a>). This is an <b>archived</b> copy of a thread from Glitch City Laboratories Forums.</p>
+<p>You can join <a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a> to ask questions or discuss current developments.</p>
+<p>You may also download the archive of this forum in <a href="{{base}}../forums.tar.gz">.tar.gz</a>, <a href="{{base}}../forums.sql.gz">.sql.gz</a>, or <a href="{{base}}../forums.sqlite.gz">.sqlite.gz</a> formats.</p>
+</div>
--- a/templates/partials/header.mustache
+++ b/templates/partials/header.mustache
@@ -2,6 +2,7 @@
    <head>
        <title>Glitch City Laboratories Archives{{title}}</title>
        <link href="{{base}}style.css" rel="stylesheet" type="text/css" />
+        <meta charset="UTF-8" />
    </head>
    <body>
    <h1><a href="{{base}}">Glitch City Laboratories Archives</a></h1>
--- a/templates/partials/pagination.mustache
+++ b/templates/partials/pagination.mustache
@@ -1,4 +1,9 @@
 <div class="pagination">
    <a class="prev" href="page-{{prev}}.html">Previous Page</a>
+    <ul>
+    {{#page_links}}
+        <li><a href="{{link}}">{{label}}</a></li>
+    {{/page_links}}
+    </ul>
    <a class="next" href="page-{{next}}.html">Next Page</a>
 </div>
--- a/templates/partials/wiki_notice.mustache
+++ b/templates/partials/wiki_notice.mustache
@@ -0,0 +1,6 @@
+<div class="notice">
+<p>Glitch City Laboratories closed on 1 September 2020 (<a href="{{base}}../forums/board-2/thread-9114/page-0.html">announcement</a>). This is an <b>archived</b> copy of an article from Glitch City Laboratories wiki.</p>
+<p><b>A live version of this article is available at the <a href="https://glitchcity.wiki/">Glitch City Wiki</a> <a href="https://glitchcity.wiki/{{pagename}}">here</a>.</b></p>
+<p>You can join <a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a> to ask questions or discuss current developments.</p>
+<p>You may also download the archive of the wiki in <a href="{{base}}../wiki.tar.gz">.tar.gz</a> or <a href="{{base}}../wiki.xml.gz">.xml.gz</a> formats.</p>
+</div>
--- a/templates/posts.mustache
+++ b/templates/posts.mustache
@@ -1,9 +1,10 @@
 {{>header}}
+{{>forums_notice}}
 <h2><a href="../">{{board.name}}</a></h2>
 <h3>{{thread.subject}} - Page {{next}}</h3>
 {{>pagination}}
 {{#posts}}
-<article id="post-{{id_msg}}">
+<article id="msg{{id}}">
    <header>
        <h4>{{subject}}</h4>
        <div><span class="label">Posted by:</span> {{poster_name}}</div>
--- a/templates/redirect.mustache
+++ b/templates/redirect.mustache
@@ -0,0 +1,4 @@
+<html>
+    <head><meta http-equiv="refresh" content="0; url={{target}}" /></head>
+    <body><p><a href="{{target}}">Redirect</a></p></body>
+</html>
--- a/templates/threads.mustache
+++ b/templates/threads.mustache
@@ -1,16 +1,19 @@
 {{>header}}
+{{>forums_notice}}
 <h2>{{board.name}}</h2>
 <table id="threads">
    <tr>
        <th>Title</th>
        <th>Poster</th>        
        <th>Date</th>
+        <th>Replies</th>
    </tr>
    {{#threads}}
    <tr>
-        <td class="thread-subject"><a href="thread-{{id_topic}}">{{subject}}</a></td>
+        <td class="thread-subject"><a href="thread-{{id}}">{{subject}}</a></td>
        <td class="thread-poster">{{poster_name}}</td>
        <td class="thread-date">{{datetime}}</td>
+        <td class="replies">{{num_replies}}</td>
    </tr>
    {{/threads}}
 </table>
--- a/wiki/find_pages
+++ b/wiki/find_pages
--- a/wiki/wiki_pages
+++ b/wiki/wiki_pages
Author	SHA1	Message	Date
Adrian Malacoda	eff22ff325	Bolden names of successor websites.	2020-09-20 05:48:15 -05:00
Adrian Malacoda	6ab90d5fff	Add link to GCW on the archive index page.	2020-09-20 05:47:35 -05:00
Adrian Malacoda	d98354046b	Prefix image paths with `images` subdirectory (which does not yet exist)	2020-09-20 05:45:49 -05:00
Adrian Malacoda	f1ae73b737	Correctly parse out fragment from html archive link and append it in the correct place.	2020-09-20 05:40:57 -05:00
Adrian Malacoda	d062ca6787	Add anchors to headings.	2020-09-20 05:31:55 -05:00
Adrian Malacoda	a9adf51453	Add GCW link to archived wiki pages, for the live version of said archived article.	2020-09-20 05:28:37 -05:00
Adrian Malacoda	7157757d43	redirector: handle case where thread id can have an .html extension, because of course it can...	2020-09-03 04:12:54 -05:00
Adrian Malacoda	5859ee0408	Add notice to the top of archived pages that this is an archive.	2020-09-03 03:34:22 -05:00
Adrian Malacoda	b33ea016d5	convert all f-strings to old-style format strings for python 3.5 compatibility (since that is what's available on the server)	2020-09-01 02:46:37 -05:00
Adrian Malacoda	ab29250b74	add "added submissions" boards from Submit-A-Glitch to exclusion filter	2020-09-01 02:25:32 -05:00
Adrian Malacoda	167a03be3c	fix syntax error	2020-09-01 01:31:39 -05:00
Adrian Malacoda	f65361e06b	Implement forums redirector and default archive urls (since it's unlikely these will be changed). Also add redirectors for the index/main pages since those don't get handled by default.	2020-08-30 16:51:55 -05:00
Adrian Malacoda	c37cf4fc44	Implement thread index for mapping thread ids back to board ids, for use with the redirector. The archive domain (archives.glitchcity.info) will host this file and the redirector will pull and unpack it when it starts up.	2020-08-30 16:50:21 -05:00
Adrian Malacoda	ef3f3dd60c	Add pagination to forums archives.	2020-08-28 14:19:09 -05:00
Adrian Malacoda	0e3f1274cc	Exclude links and submit-a-glitch archives from the archive.	2020-08-28 02:29:30 -05:00
Adrian Malacoda	1b7e3ce08b	Since the wiki linker code is modularized the redirector can just import it	2020-08-27 02:24:24 -05:00
Adrian Malacoda	646b840be4	Use '+' as the substitution for '/' rather than '%2F' since nginx seems to want to normalize the %2F back into / which would defeat the purpose.	2020-08-27 02:11:56 -05:00
Adrian Malacoda	a382e6d4fd	Extract more archive generator specific functionality from linker (e.g. the .html suffix, / -> %2F replacement)	2020-08-27 02:02:43 -05:00
Adrian Malacoda	ade44491d4	Extract archive generator specific linker functionality into ArchiveLinker subclass.	2020-08-27 01:52:17 -05:00
Adrian Malacoda	2e73ecd59f	Begin work on redirector webapp. The logic for redirecting wiki pages -seems- consistent with how the archives are generated but... nginx is normalizing the %2Fs into slashes and therefore not able to access any files with that escape sequence (e.g. Gold%2FSilver). Might need to find another character to escape / with. + might work, it's semantically acceptable (e.g. "Gold+Silver" in place of "Gold/Silver") although this character is sometimes interpreted equivalently to a space. Regardless, nginx seems to be happy with it so might go with it. May also need to test on a web host e.g. my old tripod account to see if assumptions hold up there too.	2020-08-24 00:24:45 -05:00
Adrian Malacoda	2aa1a7cf47	improved handling of categories and refactored Linker stuff into a class, also use ./ as default base path so Category: links will work correctly	2020-08-23 06:40:23 -05:00
Adrian Malacoda	77b160a35b	Add all dex types and make wiki render aware of them, so that dex subpages will be rendered as GlitchDex/RB:000 while phrases like G/S will be rendered as G%2FS. This unfortunately means the redirector will need to have special logic to be aware of dexes too, but it could probably be as simple as "if directory name contains 'Dex' then treat it as a directory name"	2020-08-23 06:02:28 -05:00
Adrian Malacoda	ea7e1294b1	Escape / with %2F in wiki URLs. For now this includes things which actually should be subdirectories, such as the various dexes.	2020-08-23 05:01:11 -05:00
Adrian Malacoda	df25b09eb7	improvements to template substitution, begin parsing out and dumping category links	2020-08-23 04:48:38 -05:00
Adrian Malacoda	43a36ba730	(Attempt to) strip out noinclude tags.	2020-08-21 02:52:43 -05:00
Adrian Malacoda	d19ca39838	Add a (very rudimentary) template transcluder. Some work will have to be done before it is ready.	2020-08-21 02:39:50 -05:00
Adrian Malacoda	38cdb811b0	Specify that charset is UTF-8. On the server side we are specifying the charset as UTF-8 so the encoding is already correct on the archives.glitchcity.info site, but the intent is for this to be portable to any web host.	2020-08-17 01:57:27 -05:00
Adrian Malacoda	54fa852897	Implement rudimentary wikitext renderer and buff deploy_archives script to be able to deploy the wiki archive, as well as sql/sqlite/xml data dumps.	2020-08-17 01:51:09 -05:00
Adrian Malacoda	0b1320a9da	Add archives index page, and deploy wiki archives.	2020-08-16 21:11:59 -05:00
Adrian Malacoda	6f05f05410	Re-enable archival of wiki by default.	2020-08-16 18:53:10 -05:00
Adrian Malacoda	31bdd806ad	Allow "forums" as a synonym of "forum"	2020-08-16 18:52:45 -05:00
Adrian Malacoda	943563a8db	Use new field names.	2020-08-16 18:52:32 -05:00
Adrian Malacoda	23f4789599	Fix broken utf-8 encoding and unescape html entities.	2020-08-16 18:52:05 -05:00
Adrian Malacoda	4045473e65	Gzip forums.tar to save space and time. Remove verbose output to save time.	2020-08-16 18:51:29 -05:00
Adrian Malacoda	161dd19d36	Increas spacing between board elements & increase padding of sub-boards	2020-08-16 18:50:59 -05:00
Adrian Malacoda	f709ce4810	split archiving wiki and forum into separate commands, only run the forum one by default for now	2020-08-16 04:54:27 -05:00
Adrian Malacoda	bf4a5f2b5d	Order categories and boards correctly; correctly organize them by nesting level	2020-08-16 04:28:07 -05:00
Adrian Malacoda	dc0191a04a	Rudimentary support for building wiki archives. The content is dumped to html but the wikitext isn't parsed yet. mwparserfromhell is used for parsing wikitext but it has no support for rendering to HTML so we'll have to build it manually.	2020-08-11 10:44:06 -05:00
Adrian Malacoda	3cb08e2d2f	Add bbcode library and some bbcode parsing. SMF's bbcode is somewhat different to what this library expects so some of it does not come out correctly.	2020-08-10 02:00:00 -05:00
Adrian Malacoda	0116646dd9	Add script to deploy the generated html archive to a server.	2020-08-10 01:59:48 -05:00