Bolden names of successor websites.

Add link to GCW on the archive index page.
Prefix image paths with images subdirectory (which does not yet exist)
2020-09-20 05:48:15 -05:00 · 2020-09-20 05:47:35 -05:00 · 2020-09-20 05:45:49 -05:00 · 2020-09-20 05:40:57 -05:00 · 2020-09-20 05:31:55 -05:00 · 2020-09-20 05:28:37 -05:00
30 changed files with 6877 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/**/__pycache__
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "mysql2sqlite"]
+	path = mysql2sqlite
+	url = https://github.com/dumblob/mysql2sqlite.git
--- a/README.md
+++ b/README.md
@@ -1,2 +1,31 @@
 # Glitch City Laboratories (2003 - 2020) - Epilogue
 This repository contains the tickets, scripts, and documentation for the end of Glitch City Laboratories.
+
+## Installation
+`pip install -e .`
+
+## Standalone Epilogue Scripts
+#### `deploy_archives`
+Run this once the archives have been built to tar them up and scp them to the server.
+
+#### Wiki Data (`wiki` directory)
+##### `wiki_pages`
+Not a script, just a listing of all the pages in the wiki (as of the 27 July 2020 lockdown). Use this and Special:Export to create an XML dump of wiki pages and place it in the `wiki` directory.
+
+##### `find_pages`
+Run this locally (it uses the MediaWiki HTTP API). Finds all pages in categories related to Pokemon generations 1 - 4 that have been edited since 31 March 2020.
+
+#### Forum Data (`forum` directory)
+#### `dump_forum_data`
+Run this on a server with access to a Glitch City Laboratories database. It will dump the table schemas and data into several .sql files. Place these in the `forum` directory.
+
+#### `create_sqlite_database`
+Run this locally, after the `dump_forum_data` script has been run on the Glitch City Laboratories server and the dumps copied to the `forum` directory (same directory as this script). A file called `forum.sqlite` will be generated from the MariaDB dumps. This sqlite database is required for generating the static archive later.
+
+Make sure the `mysql2sqlite` submodule is initialized and install `mawk` for the best results.
+
+## Epilogue Package
+
+## Misc Scripts
+### `get_pms.php`
+This is installed on Glitch City Laboratories and allows any logged-in user to export their PM inbox into a HTML export.
--- a/20
+++ b/20
@@ -0,0 +1,20 @@
+#!/bin/sh -x
+HOSTNAME=glitchcity.info
+ARCHIVE_PATH=/var/www/html/gclarchives
+
+cd archives
+scp index.html style.css $HOSTNAME:$ARCHIVE_PATH
+
+if [ -d "forums" ]; then
+    cat ../forum/structure.sql ../forum/categories.sql ../forum/boards.sql ../forum/threads.sql ../forum/misc_data.sql > forums.sql
+    cp ../forum/forum.sqlite forums.sqlite # forum or forums?
+    tar -cf forums.tar forums && gzip -f forums.tar forums.sqlite forums.sql
+    scp forums.sql.gz forums.sqlite.gz forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
+    ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz"
+fi;
+
+if [ -d "wiki" ]; then
+    tar -cf wiki.tar wiki && gzip -f wiki.tar wiki.xml
+    scp wiki.xml.gz wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
+    ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz"
+fi;
--- a/epilogue/init.py
+++ b/epilogue/init.py
@@ -0,0 +1,44 @@
+import os
+from .forum import Forum
+from .wiki import Wiki
+from .archive_generator import ArchiveGenerator
+
+import sys
+import shutil
+
+BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
+WIKI_DIRECTORY = os.path.join(BASEDIR, "wiki")
+TEMPLATES_DIR = os.path.join(BASEDIR, "templates")
+STATIC_DIR = os.path.join(BASEDIR, "static")
+
+ARCHIVE_GENERATOR = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR)
+
+ARCHIVES_BASEDIR = "archives"
+FORUM_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "forums")
+WIKI_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "wiki")
+
+DEFAULT_ARGUMENTS = ["wiki", "forum"]
+
+def main():
+    args = sys.argv[1:]
+    if not args:
+        args = DEFAULT_ARGUMENTS
+
+    ARCHIVE_GENERATOR.generate_index(ARCHIVES_BASEDIR)
+
+    if "forum" in args or "forums" in args:
+        ARCHIVE_GENERATOR.generate_forum(Forum(FORUM_DATABASE), FORUM_ARCHIVES)
+
+    if "wiki" in args:
+        archive_wiki()
+
+def archive_wiki():
+    wiki = None
+    for entry in os.listdir(WIKI_DIRECTORY):
+        if entry.endswith(".xml"):
+            wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
+
+    if wiki:
+        shutil.copyfile(wiki.xml_path, os.path.join(ARCHIVES_BASEDIR, "wiki.xml"))        
+        ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES)
--- a/epilogue/archive_generator.py
+++ b/epilogue/archive_generator.py
@@ -0,0 +1,262 @@
+import os
+import logging
+import shutil
+import math
+import json
+import gzip
+
+from itertools import chain
+from traceback import print_exc
+
+import chevron
+import bbcode
+import html
+
+from .forum import DEFAULT_POSTS_PER_PAGE
+from .wiki import Template, Renderer, Linker, NAMESPACES as WIKI_NAMESPACES
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("ArchiveGenerator")
+
+DEX_LANGUAGES = ["", "DE", "ES", "FR", "IT", "JP", "KO"]
+DEX_TYPES = [
+    "GlitchDex", "AttackDex", "DexDex", "AreaDex", "TrainerDex", "FieldMoveDex", "ItemDex", "FamilyDex", "DecDex", "DayDex",
+    "MDIGlitchDex", "MetascriptDex", "TMHMDex", "StatDex", "PosterDex", "TypeDex", "UnownDex", "DollDex", "DefaultNameDex",
+    "BattleTypeDe", "BadgeDescriptionDex", "FacingDex"
+]
+DEXES = list(chain.from_iterable([["{}{}".format(dex_type, language) for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
+
+FORUM_THREAD_INDEX = "thread_index.json.gz"
+IMAGE_DIRECTORY = "images"
+
+class ArchiveLinker(Linker):
+    def __init__ (self, directory_names=[]):
+        super().__init__()
+        self.directory_names = directory_names
+        self.image_directory = IMAGE_DIRECTORY
+        self.replacements = {
+            "/": "+",
+            #":": ""
+        }
+
+    def translate_page_title (self, page_title):
+        page_title = super().translate_page_title(page_title)
+        fragment = ""
+
+        if "#" in page_title:
+            fragment = page_title[page_title.find("#"):]
+            page_title = page_title[:-len(fragment)]
+
+        directory_name = ""
+        for name in self.directory_names:
+            if page_title.startswith("{}/".format(name)):
+                directory_name = name
+                page_title = page_title[len(directory_name) + 1:]
+                break
+
+        for key, value in self.replacements.items():
+            page_title = page_title.replace(key, value)
+
+        return "{}{}{}.html{}".format(directory_name, '/' if directory_name else '', page_title, fragment)
+
+    def translate_image_title (self, page_title):
+        image_title = super().translate_image_title(page_title)
+        if not image_title:
+            return
+        
+        return "{}/{}".format(self.image_directory, image_title)
+
+def prepare_thread (thread):
+    thread.subject = html.unescape(thread.subject)
+    return thread
+
+def prepare_post (post):
+    post = prepare_thread(post)
+    parser = bbcode.Parser()
+    post.body = html.unescape(parser.format(post.body))
+    return post
+
+class ArchiveGenerator():
+    def __init__ (self, template_dir, static_dir):
+        self.template_dir = template_dir
+        self.static_dir = static_dir
+    
+    def generate_index (self, out_dir):
+        logger.info("Generating index page at %s", out_dir)
+        try:
+            os.makedirs(out_dir)
+        except FileExistsError: pass
+
+        shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
+        renderer = TemplateRenderer(self.template_dir, out_dir)
+        renderer.render_template_to_file("index", "index.html", {})
+
+    def generate_wiki (self, wiki, out_dir):
+        logger.info("Archiving wiki to %s", out_dir)
+        try:
+            os.makedirs(out_dir)
+        except FileExistsError: pass
+
+        shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
+        renderer = TemplateRenderer(self.template_dir, out_dir)
+        renderer.render_template_to_file("redirect", "index.html", {
+            "target": "Main_Page.html"
+        })
+
+        categories = {}
+        templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
+        linker = ArchiveLinker(directory_names=DEXES)
+        wikitext_renderer = Renderer(templates, linker)
+        for page in wiki.get_pages():
+            try:
+                if page.namespace != WIKI_NAMESPACES['MAIN']:
+                    continue
+
+                page_out = linker.translate_page_title(page.title)
+                base = "./"
+                if "/" in page_out:
+                    base = "../" * page_out.count("/")
+                    try:
+                        os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
+                    except FileExistsError: pass
+
+                if page.redirect:
+                    logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
+                    renderer.render_template_to_file("redirect", page_out, {
+                        "target": "{}{}".format(base, linker.translate_page_title(page.redirect))
+                    })
+                else:
+                    logger.info("Archiving page %s to %s", page.title, page_out)
+                    (rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, base, page=page)
+
+                    for category in page_categories:
+                        if not category in categories:
+                            categories[category] = []
+                        
+                        categories[category].append({
+                            "url": page_out,
+                            "title": page.title
+                        })
+
+                    renderer.render_template_to_file("page", page_out, {
+                        "title": " - {}".format(page.title),
+                        "pagename": page.title,
+                        "page": page,
+                        "base": base,
+                        "text": rendered
+                    })
+            except Exception as e:
+                logger.error("Error encountered when archiving %s: %s", page.title, e)
+                print_exc()
+                if isinstance(e, ValueError):
+                    raise e
+
+        for category, pages in categories.items():
+            category_out = "Category:{}".format(linker.translate_page_title(category))
+            logger.info("Archiving category %s to %s", category, category_out)
+
+            try:
+                renderer.render_template_to_file("category", category_out, {
+                    "title": " - {}".format(category),
+                    "pagename": "Category:{}".format(category),
+                    "category": category,
+                    "pages": pages
+                })
+            except Exception as e:
+                logger.error("Error encountered when archiving %s: %s", category, e)
+                print_exc()
+
+    def generate_forum (self, forum, out_dir):
+        logger.info("Archiving forum to %s", out_dir)
+        try:
+            os.makedirs(out_dir)
+        except FileExistsError: pass
+
+        shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
+        renderer = TemplateRenderer(self.template_dir, out_dir)
+        renderer.render_template_to_file("boards", "index.html", {
+            "categories": forum.get_board_tree()
+        })
+
+        threads = []
+        for board in forum.get_boards():
+            forum_threads = forum.get_threads_in_board(board)
+            threads = threads + forum_threads
+            self.generate_forum_board(forum, board, forum_threads, out_dir)
+
+        self.generate_thread_index(threads, os.path.join(out_dir, FORUM_THREAD_INDEX))
+
+    def generate_forum_board (self, forum, board, threads, out_dir):
+        board_out_dir = os.path.join(out_dir, "board-{}".format(board.id))
+        logger.info("Archiving board %s to %s", board.name, board_out_dir)
+        try:
+            os.makedirs(board_out_dir)
+        except FileExistsError: pass
+
+        renderer = TemplateRenderer(self.template_dir, board_out_dir)
+        threads = [prepare_thread(thread) for thread in threads]
+        renderer.render_template_to_file("threads", "index.html", {
+            "title": " - {}".format(board.name),
+            "base": "../",
+            "board": board,
+            "threads": threads
+        })
+
+        for thread in threads:
+            self.generate_forum_thread(forum, board, thread, board_out_dir)
+
+    def generate_forum_thread (self, forum, board, thread, out_dir):
+        thread_out_dir = os.path.join(out_dir, "thread-{}".format(thread.id))
+        logger.info("Archiving thread %s to %s", thread.subject, thread_out_dir)
+        try:
+            os.makedirs(thread_out_dir)
+        except FileExistsError: pass
+
+        renderer = TemplateRenderer(self.template_dir, thread_out_dir)
+        renderer.render_template_to_file("redirect", "index.html", {
+            "target": "page-0.html"
+        })
+
+        total_pages = math.ceil((thread.num_replies + 1) / DEFAULT_POSTS_PER_PAGE)
+        page_links = [{"label": page + 1, "link": "page-{}.html".format(page)} for page in range(total_pages)]
+        page = 0
+        while True:
+            posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
+            if len(posts) < 1:
+                break
+            
+            logger.info("Archiving page %s of thread %s", page, thread.subject)
+            renderer.render_template_to_file("posts", "page-{}.html".format(page), {
+                "title": " - {} - Page {}".format(thread.subject, page + 1),
+                "base": "../../",
+                "board": board,
+                "thread": thread,
+                "page": page,
+                "next": page + 1,
+                "page_links": page_links,
+                "prev": page - 1,
+                "posts": posts
+            })
+            page = page + 1
+
+    def generate_thread_index (self,threads, out_path):
+#        with open(out_path, "wb") as out:
+#            pickle.dump({thread.id: {"parent": thread.parent} for thread in threads}, out, protocol=4)        
+        threads = {thread.id: {"parent": thread.parent} for thread in threads}
+        with gzip.open(out_path, "w") as out:
+            out.write(json.dumps(threads).encode())
+
+class TemplateRenderer():
+    def __init__ (self, template_dir, out_dir):
+        self.template_dir = template_dir
+        self.partials_dir = os.path.join(template_dir, "partials")
+        self.out_dir = out_dir
+        self.extension = "mustache"
+
+    def open_template (self, name):
+        return open(os.path.join(self.template_dir, "{}.{}".format(name, self.extension)))
+
+    def render_template_to_file (self, template_name, out_file, data={}):
+        with self.open_template(template_name) as template:
+            with open(os.path.join(self.out_dir, out_file), "w") as out:
+                out.write(chevron.render(template, data, self.partials_dir, self.extension))
--- a/epilogue/forum.py
+++ b/epilogue/forum.py
@@ -0,0 +1,104 @@
+import sqlite3
+from datetime import datetime
+
+PREFIX = "smf_"
+GET_BOARDS = """
+    SELECT * FROM `{}boards`
+    ORDER BY `board_order` ASC
+""".format(PREFIX)
+GET_CATEGORIES = """
+    SELECT * FROM `{}categories`
+    ORDER BY `cat_order` ASC
+    """.format(PREFIX)
+GET_THREADS = """
+    SELECT * FROM `{}topics` AS `topics`, `{}messages` AS `messages`
+    WHERE `topics`.`id_board`=? AND `messages`.`id_msg`=`topics`.`id_first_msg`
+    ORDER BY `id_last_msg` DESC
+    LIMIT ? OFFSET ?
+""".format(PREFIX, PREFIX)
+GET_POSTS = """
+    SELECT * FROM `{}messages`
+    WHERE `id_topic`=?
+    ORDER BY `poster_time` ASC
+    LIMIT ? OFFSET ?
+""".format(PREFIX)
+
+DEFAULT_POSTS_PER_PAGE = 15
+DEFAULT_THREADS_PER_PAGE = 2000
+
+def fix_encoding (string):
+    return string.encode("latin1", errors="ignore").decode(errors="ignore")
+
+class Forum():
+    def __init__ (self, db_path):
+        self.connection = sqlite3.connect(db_path)
+        self.connection.row_factory = sqlite3.Row
+
+    def get_board_tree (self):
+        categories = self.get_categories()
+        boards = self.get_boards()
+        for category in categories:
+            category.children = [child for child in boards if child.category == category.id and child.child_level == 0]
+        for board in boards:
+            board.children = [child for child in boards if child.parent_board == board.id]
+        return categories
+
+    def get_categories (self):
+        cursor = self.connection.cursor()
+        cursor.execute(GET_CATEGORIES)
+        return [Category(category) for category in cursor.fetchall()]
+
+    def get_boards (self):
+        cursor = self.connection.cursor()
+        cursor.execute(GET_BOARDS)
+        return [Board(board) for board in cursor.fetchall()]
+
+    def get_threads_in_board (self, board, page=0, per_page=DEFAULT_THREADS_PER_PAGE):
+        try:
+            board = board.id
+        except ValueError: pass
+        cursor = self.connection.cursor()
+        cursor.execute(GET_THREADS, (board, per_page, page * per_page))
+        return [Thread(thread) for thread in cursor.fetchall()]
+
+    def get_posts_in_thread (self, thread, page=0, per_page=DEFAULT_POSTS_PER_PAGE):
+        try:
+            thread = thread.id
+        except ValueError: pass
+        cursor = self.connection.cursor()
+        cursor.execute(GET_POSTS, (thread, per_page, page * per_page))
+        return [Post(post) for post in cursor.fetchall()]
+
+class Category():
+    def __init__ (self, row):
+        self.id = row['id_cat']
+        self.name = fix_encoding(row['name'])
+        self.children = []
+
+class Board():
+    def __init__ (self, row):
+        self.id = row['id_board']
+        self.category = row['id_cat']
+        self.parent_board = row['id_parent']
+        self.child_level = row['child_level']
+        self.name = fix_encoding(row['name'])
+        self.description = fix_encoding(row['description'])
+        self.children = []
+
+class Thread():
+    def __init__ (self, row):
+        self.id = row['id_topic']
+        self.parent = row['id_board']
+        self.datetime = datetime.fromtimestamp(row['poster_time'])
+        self.subject = fix_encoding(row['subject'])
+        self.poster_name = fix_encoding(row['poster_name'])
+        self.num_replies = row['num_replies']
+
+class Post():
+    def __init__ (self, row):
+        self.id = row['id_msg']
+        self.parent = row['id_topic']
+        self.datetime = datetime.fromtimestamp(row['poster_time'])
+        self.subject = fix_encoding(row['subject'])
+        self.body = fix_encoding(row['body'])
+        self.poster_name = fix_encoding(row['poster_name'])
--- a/epilogue/redirector.py
+++ b/epilogue/redirector.py
@@ -0,0 +1,98 @@
+import argparse
+import gzip
+import urllib.request
+import json
+
+from .archive_generator import ArchiveLinker, DEXES, FORUM_THREAD_INDEX
+
+from flask import Flask, redirect, request
+app = Flask(__name__)
+
+DEFAULT_ARCHIVES_DOMAIN = "https://archives.glitchcity.info/"
+DEFAULT_FORUMS_ARCHIVE = "{}forums".format(DEFAULT_ARCHIVES_DOMAIN)
+DEFAULT_WIKI_ARCHIVE = "{}wiki".format(DEFAULT_ARCHIVES_DOMAIN)
+
+## Wiki redirector
+@app.route("/wiki/")
+def redirect_wiki_main ():
+    return redirect_wiki("Main Page")
+
+@app.route("/wiki/<path:path>")
+def redirect_wiki (path):
+    return redirect(make_wiki_url(path))
+
+def make_wiki_url (path):
+    if path.endswith("/"):
+        path = path[:-1]
+
+    return app.args.wiki_archive + app.wiki_linker.translate_page_title(path)
+
+## Forum redirector
+@app.route('/forums/')
+def redirect_forums_index ():
+    return redirect_forums("")
+
+@app.route('/forums/<path:path>')
+def redirect_forums (path):
+    return redirect(make_forum_url(request))
+
+def make_forum_url (request):
+    thread_id = request.args.get("topic", None)
+    board_id = request.args.get("board", None)
+    post_id = None
+
+    if thread_id:
+        thread_id = strip_extension(thread_id)
+        
+        if "." in thread_id:
+            (thread_id, post_id) = thread_id.split(".")
+            post_id = post_id[len("msg"):] 
+
+        if not board_id:
+            board_id = app.thread_index[thread_id]['parent']
+
+    try:
+        if "." in board_id:
+            board_id = board_id.split(".")[0]
+    except TypeError: pass
+
+    url = app.args.forums_archive
+
+    if board_id:
+        url = url + "board-{}".format(board_id)
+
+    if thread_id:
+        url = url + "/thread-{}".format(thread_id)
+
+    if not url.endswith("/"):
+        url = url + "/"
+
+    return url
+
+def strip_extension (item):
+    for extension in [".html"]:
+        if item.endswith(extension):
+            item = item[:-len(extension)]
+    return item
+
+def read_thread_index (forums_archive):
+    with urllib.request.urlopen("{}{}".format(forums_archive, FORUM_THREAD_INDEX)) as gzipped_in:
+        data = gzipped_in.read()
+        return json.loads(gzip.decompress(data).decode())
+
+def main ():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--wiki-archive",  help="URL to wiki archive", default=DEFAULT_WIKI_ARCHIVE)
+    parser.add_argument("--forums-archive",  help="URL to forums archive", default=DEFAULT_FORUMS_ARCHIVE)
+    args = parser.parse_args()
+
+    if not args.wiki_archive.endswith("/"):
+        args.wiki_archive = args.wiki_archive + "/"
+
+    if not args.forums_archive.endswith("/"):
+        args.forums_archive = args.forums_archive + "/"
+
+    app.args = args
+    app.thread_index = read_thread_index(args.forums_archive)
+    app.wiki_linker = ArchiveLinker(directory_names=DEXES)
+    app.run()
--- a/epilogue/wiki.py
+++ b/epilogue/wiki.py
@@ -0,0 +1,228 @@
+from xml.etree import ElementTree
+
+import mwparserfromhell
+from mwparserfromhell.nodes import Wikilink, Comment, ExternalLink, Heading, Tag, Template, Text
+
+NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
+PAGE_TAG = "{}page".format(NAMESPACE)
+ID_TAG = "{}id".format(NAMESPACE)
+TITLE_TAG = "{}title".format(NAMESPACE)
+REVISION_TAG = "{}revision".format(NAMESPACE)
+NS_TAG = "{}ns".format(NAMESPACE)
+REDIRECT_TAG = "{}redirect".format(NAMESPACE)
+
+TEXT_TAG = "{}text".format(NAMESPACE)
+FORMAT_TAG = "{}format".format(NAMESPACE)
+MODEL_TAG = "{}model".format(NAMESPACE)
+TIMESTAMP_TAG = "{}timestamp".format(NAMESPACE)
+COMMENT_TAG = "{}comment".format(NAMESPACE)
+CONTRIBUTOR_TAG = "{}contributor".format(NAMESPACE)
+
+USERNAME_TAG = "{}username".format(NAMESPACE)
+
+NAMESPACES = {
+    "MAIN": 0,
+    "TEMPLATE": 10
+}
+
+INTERWIKI_NAMESPACES = {
+    "bp:": "https://bulbapedia.bulbagarden.net/wiki/{}",
+    "wikipedia:": "https://en.wikipedia.org/wiki/{}"
+}
+
+FILE_NAMESPACES = ["File:", "Image:"]
+CATEGORY_NAMESPACE = "Category:"
+CATEGORY_LINK_NAMESPACE = ":{}".format(CATEGORY_NAMESPACE)
+
+class Wiki():
+    def __init__ (self, xml_path):
+        self.xml_path = xml_path
+
+    def get_pages (self):
+        tree = ElementTree.parse(self.xml_path)
+        return (Page(element) for element in tree.getroot() if element.tag == PAGE_TAG)
+
+class Page():
+    def __init__ (self, element):
+        self.redirect = None
+        self.revisions = []
+        for child in element:
+            if child.tag == ID_TAG:
+                self.id = child.text
+            elif child.tag == NS_TAG:
+                self.namespace = int(child.text)
+            elif child.tag == TITLE_TAG:
+                self.title = child.text
+            elif child.tag == REVISION_TAG:
+                self.revisions.append(Revision(child))
+            elif child.tag == REDIRECT_TAG:
+                self.redirect = child.attrib['title']
+
+    def get_latest (self):
+        return self.revisions[0]
+
+class Revision():
+    def __init__ (self, element):
+        for child in element:
+            if child.tag == ID_TAG:
+                self.id = child.text
+            elif child.tag == TEXT_TAG:
+                self.text = child.text
+            elif child.tag == CONTRIBUTOR_TAG:
+                self.contributor = Contributor(child)
+            elif child.tag == TIMESTAMP_TAG:
+                self.timestamp = child.text
+            elif child.tag == MODEL_TAG:
+                self.model = child.text
+            elif child.tag == COMMENT_TAG:
+                self.comment = child.text
+
+class Contributor():
+    def __init__ (self, element):
+        for child in element:
+            if child.tag == ID_TAG:
+                self.id = child.text
+            elif child.tag == USERNAME_TAG:
+                self.username = child.text
+
+class Renderer():
+    def __init__ (self, templates={}, linker=None):
+        self.templates = templates
+        self.linker = linker if linker else Linker()
+
+    def render (self, wikitext, base="", *args, **kwargs):
+        categories = []
+        wikitext = self.transclude_templates(wikitext, *args, **kwargs)
+        
+        # parse out categories
+        for link in wikitext.ifilter_wikilinks():
+            if not link.title.startswith(CATEGORY_NAMESPACE):
+                continue
+
+            wikitext.remove(link)
+            categories.append(link.title[len(CATEGORY_NAMESPACE):])
+        
+        rendered = [render(wikitext, base, self.linker)]
+        if categories:
+            rendered.append('<h2>Categories</h2><ul class="categories">')
+            for category in categories:
+                rendered.append('<li><a href="{}Category:{}">{}</a></li>'.format(
+                    base,
+                    self.linker.translate_page_title(category),
+                    category
+                ))
+            rendered.append("</ul>")
+        
+        return ("".join(rendered), categories)
+
+    def transclude_templates (self, wikitext, *args, **kwargs):
+        wikitext = mwparserfromhell.parse(wikitext)
+        for inclusion in wikitext.ifilter_templates():
+            template_key = str(inclusion.name)
+            template = self.templates.get(template_key, self.templates.get(template_key[0].upper() + template_key[1:], None))
+            result = None
+            if template:
+                result = template(inclusion, *args, **kwargs)
+            else:
+                result = "<span class='unknown-template'>Template:{0}</span>".format(inclusion.name)
+            
+            try:
+                wikitext.replace(inclusion, result) #self.transclude_templates(result))
+            except ValueError: pass
+        return wikitext
+
+def render (wikitext, base="", linker=None):
+    rendered = []
+
+    if not linker:
+        linker = Linker()
+
+    for node in wikitext.ifilter(False):
+        # node types:
+        # https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
+        node_type = type(node)
+        if node_type is Wikilink:
+            image_name = linker.translate_image_title(node.title)
+            if image_name:
+                rendered.append('<img src="{}{}" />'.format(
+                    base,
+                    image_name,
+                    render(mwparserfromhell.parse(node.text), base, linker)
+                ))
+            else:
+                url = linker.translate_interwiki_title(node.title)
+                if not url:
+                    url = "{}{}".format(base, linker.translate_page_title(node.title))
+                
+                rendered.append('<a href="{}">{}</a>'.format(
+                    url,
+                    render(node.text if node.text else node.title, base, linker)
+                ))
+        elif node_type is ExternalLink:
+            rendered.append('<a href="{}">{}</a>'.format(
+                node.url,
+                render(node.title if node.title else node.url)
+            ))
+        elif node_type is Tag:
+            rendered.append("<{}>{}</{}>".format(
+                render(node.tag),
+                render(node.contents, base, linker),
+                render(node.tag)
+            ))
+        elif node_type is Heading:
+            rendered.append('<h{} id="{}">{}</h{}>'.format(
+                node.level,
+                reformat_page_title(node.title),
+                render(node.title, base, linker),
+                node.level
+            ))
+        elif node_type is Text:
+            rendered.append(node.value)
+
+    return "".join(rendered).strip().replace("\n\n", "<br /><br />")
+
+class Linker():
+    def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES):
+        self.file_namespaces = file_namespaces
+        self.interwiki_namespaces = interwiki_namespaces
+
+    def translate_interwiki_title (self, page_title):
+        for namespace, url in self.interwiki_namespaces.items():
+            if page_title.startswith(namespace):
+                return url.format(page_title[len(namespace):])
+
+    def translate_page_title (self, page_title):
+        if page_title.startswith(CATEGORY_LINK_NAMESPACE):
+            page_title = page_title[1:]
+
+        return reformat_page_title(page_title)
+
+    def translate_image_title (self, page_title):
+        for namespace in self.file_namespaces:
+            if page_title.startswith(namespace):
+                return reformat_page_title(page_title[len(namespace):])
+
+def reformat_page_title (page_title):
+    if not page_title:
+        return ""
+    
+    return "{}{}".format(page_title[0].upper(), page_title[1:].replace(' ', '_'))
+
+class Template():
+    def __init__ (self, wikicode):
+        self.wikicode = mwparserfromhell.parse(wikicode)
+        for tag in self.wikicode.ifilter_tags():
+            if tag.tag == "noinclude":
+                self.wikicode.remove(tag)
+
+    def __call__ (self, inclusion, *args, **kwargs):
+        parsed_wikicode = mwparserfromhell.parse(self.wikicode)
+        for argument in parsed_wikicode.ifilter_arguments():
+            value = argument.default if argument.default else argument.name
+            if inclusion.has(argument.name):
+                value = inclusion.get(argument.name)
+            
+            try:
+                parsed_wikicode.replace(argument, value)
+            except ValueError: pass
+        return parsed_wikicode
--- a/forum/create_sqlite_database
+++ b/forum/create_sqlite_database
@@ -0,0 +1,14 @@
+#!/bin/sh
+SQLITE_OUT=forum.sqlite
+AWK=awk
+
+if command -v mawk &> /dev/null; then
+   AWK=mawk
+fi
+
+$AWK -f ../mysql2sqlite/mysql2sqlite structure.sql | sqlite3 -echo $SQLITE_OUT
+$AWK -f ../mysql2sqlite/mysql2sqlite misc_data.sql | sqlite3 -echo $SQLITE_OUT
+$AWK -f ../mysql2sqlite/mysql2sqlite categories.sql | sqlite3 -echo $SQLITE_OUT
+$AWK -f ../mysql2sqlite/mysql2sqlite boards.sql | sqlite3 -echo $SQLITE_OUT
+$AWK -f ../mysql2sqlite/mysql2sqlite threads.sql | sqlite3 -echo $SQLITE_OUT
+#cat threads.sql | sqlite3 $SQLITE_OUT
--- a/forum/dump_forum_data
+++ b/forum/dump_forum_data
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+import argparse
+import os
+import re
+
+from xml.etree import ElementTree
+from subprocess import check_output, check_call
+
+# Tables we don't want data from. These are usually populated with default data from an SMF install.
+DUMP_STRUCTURE_ONLY = [
+    "smf_admin_info_files", "smf_ads", "smf_ads_settings", "smf_approval_queue", "smf_awards", "smf_ban_groups", "smf_ban_items",
+    "smf_bbcodes", "smf_board_permissions", "smf_buddies", "smf_calendar", "smf_calendar_holidays", "smf_cf_fields", "smf_cf_forms",
+    "smf_collapsed_categories", "smf_custom_actions", "smf_custom_fields", "smf_down_cat", "smf_down_catperm", "smf_down_comment",
+    "smf_down_creport", "smf_down_custom_field", "smf_down_custom_field_data", "smf_down_file", "smf_down_groupquota", "smf_down_rating",
+    "smf_down_report", "smf_down_userquota", "smf_global_announcements", "smf_global_announcements_boards", "smf_groupmods", 
+    "smf_group_moderators", "smf_login", "smf_log_actions", "smf_log_activity", "smf_log_banned", "smf_log_boards", "smf_log_comments",
+    "smf_log_digest", "smf_log_errors", "smf_log_floodcontrol", "smf_log_group_requests", "smf_log_issues", "smf_log_karma",
+    "smf_log_mark_read", "smf_log_member_notices", "smf_log_notify", "smf_log_notify_projects", "smf_log_online", "smf_log_packages",
+    "smf_log_polls", "smf_log_projects", "smf_log_project_mark_read", "smf_log_reported", "smf_log_reported_comments",
+    "smf_log_scheduled_tasks", "smf_log_search_messages", "smf_log_search_results", "smf_log_search_subjects", "smf_log_search_topics",
+    "smf_log_spider_hits", "smf_log_spider_stats", "smf_log_subscribed", "smf_log_topics", "smf_mail_queue", "smf_membergroups", "smf_members",
+    "smf_moderators", "smf_openid_assoc", "smf_package_servers", "smf_permissions", "smf_permission_profiles", "smf_personal_messages",
+    "smf_picture_comments","smf_pm_attachments", "smf_pm_recipients", "smf_pm_rules", "smf_profile_albums", "smf_profile_comments",
+    "smf_profile_pictures", "", "smf_scheduled_tasks", "smf_sessions", "smf_settings", "smf_smileys", "smf_spiders", "smf_subscriptions", "smf_themes"	
+]
+
+# Tables we want all the data from. Some legacy data that may be of interest is also here.
+DUMP_ALL_DATA = [
+    # actual forum content
+    "smf_polls", "smf_poll_choices", "smf_message_icons", "smf_attachments",
+
+    # pre-wiki (orange glove)
+    "comments", "dirs", "docs", "glitchdex", "glitchdex2", "groups", "old", "staff", "statsbydex",
+    
+    # felblog (old blogging system)
+    "smf_felblog_categories", "smf_felblog_cmnt_log", "smf_felblog_comments", "smf_felblog_content", "smf_felblog_cont_log", "smf_felblog_manager",
+    "smf_felblog_ratings", "smf_felblog_settings",
+
+    # arcade system
+    "smf_games", "smf_games_category", "smf_games_challenge", "smf_games_favorite", "smf_games_high", "smf_games_rate",
+    "smf_games_settings", "smf_games_tournament", "smf_games_tournament_players", "smf_games_tournament_results", "smf_games_tournament_scores",
+
+    # shop system
+    "smf_shop_inventory", "smf_shop_items", "smf_shop_shops",
+
+    # project management system (we had this?)
+    "smf_projects", "smf_projects_settings", "smf_project_developer", "smf_project_permissions", "smf_project_profiles", "smf_project_timeline",
+    "smf_project_trackers", "smf_project_versions", "smf_issues", "smf_issue_attachments", "smf_issue_category", "smf_issue_comments",
+	"smf_issue_tags",
+
+    # used for the IRC bridge
+    "smf_slartibartfast"
+]
+
+# Special tables we need to filter.
+CATEGORIES = "smf_categories"
+BOARDS = "smf_boards"
+TOPICS = "smf_topics"
+MESSAGES = "smf_messages"
+
+# Dump filenames
+STRUCTURE_DUMP = "structure.sql"
+MISC_DUMP = "misc_data.sql"
+CATEGORIES_DUMP = "categories.sql"
+BOARDS_DUMP = "boards.sql"
+TOPICS_DUMP = "threads.sql"
+
+# Categories we are not interested in archiving.
+# `id_cat` in (1, 2)
+DO_NOT_ARCHIVE_CATEGORIES = [
+    7,  # Links
+    12, # Epsilon: ?????
+    6,  # Sigma: Higher Access
+    8   # Omega: Garbage
+]
+
+# Boards we are not interested in archiving.
+# `id_board` in (1, 2)
+DO_NOT_ARCHIVE_BOARDS = [
+    24, 94, 118, 121, # Links
+    40,               # Exclusive Board
+    65,               # Requests for Moderatorship
+    66,               # Requests for Membership+
+    67,               # Requests for Distinguished Membership
+    23,               # M.A.S.K. HQ (Staff Board)
+    22,               # Admins Only Board
+    89,               # Test Board
+    86,               # Omega Archives
+    51, 37, 79, 26, 47, 44, 45, 99, 93, 119, 96,
+    62, 60, 80, 84,   # Submit-A-Glitch Archives
+    3, 4, 5, 57, 58, 59, 38, 54, 63, 64,
+    68, 69, 70, 81, 82, 83,
+    28,               # The Dumpster Out Back
+    123               # ?????
+]
+
+# Regexes for sensitive information
+EMAIL_REGEX = re.compile(r"'[^'\s]+@[^'\s]+'")
+IP_REGEX = re.compile(r"'\d+\.\d+\.\d+\.\d+'")
+
+class Database():
+    def __init__(self, host, database, username, password):
+        self.host = host
+        self.database = database
+        self.username = username
+        self.password = password
+
+    def dump(self, tables, filename, *args):
+        command = ["mysqldump"] + list(args) + self.auth()
+        
+        if filename:
+            command.append("--result-file={}".format(filename))
+        
+        command.append(self.database)
+        command = command + tables
+
+        print(">> {}".format(format_command(command)))
+        if filename:
+            check_call(command)
+            return filename
+        else:
+            return check_output(command).strip().decode()
+
+    def query(self, query):
+        command = ["mysql"] + self.auth() + [
+            "--xml",
+            self.database,
+            "-e", query
+        ]
+
+        print(">> {}".format(format_command(command)))
+        result = check_output(command).strip()
+        return [row_from_xml(element) for element in ElementTree.fromstring(result)]
+
+    def auth(self):
+        return [
+            "--user={}".format(self.username),
+            "--password={}".format(self.password),
+            "--host={}".format(self.host)
+        ]
+
+def format_command(command):
+    return "{}".format([item for item in command if not item.startswith("--password")])
+
+def row_from_xml(element):
+    row = {}
+    for child in element:
+        row[child.attrib['name']] = child.text
+    return row
+
+parser = argparse.ArgumentParser(description="Forum scraper")
+parser.add_argument(
+    "--host",
+    dest="host",
+    default="127.0.0.1",
+    help="Database host"
+)
+parser.add_argument(
+    "--db",
+    dest="db",
+    default="glitchcity",
+    help="Database name"
+)
+parser.add_argument(
+    "--username",
+    dest="username",
+    default="glitchcity",
+    help="Database username"
+)
+parser.add_argument(
+    "--password",
+    dest="password",
+    required=True,
+    help="Database password"
+)
+args = parser.parse_args()
+
+database = Database(args.host, args.db, args.username, args.password)
+
+# Select which topics we DON'T want, based on the board ids we don't want
+do_not_archive_thread_ids = [row['id_topic'] for row in database.query("SELECT id_topic FROM smf_topics WHERE id_board IN ({})".format(",".join([str(id) for id in DO_NOT_ARCHIVE_BOARDS])))]
+
+if not os.path.exists(STRUCTURE_DUMP):
+    database.dump(DUMP_STRUCTURE_ONLY + DUMP_ALL_DATA + [CATEGORIES, BOARDS, TOPICS, MESSAGES], STRUCTURE_DUMP, "--no-data")
+
+if not os.path.exists(MISC_DUMP):
+    database.dump(DUMP_ALL_DATA, MISC_DUMP, "--no-create-info")
+
+category_filter = ",".join([str(id) for id in DO_NOT_ARCHIVE_CATEGORIES])
+board_filter = ",".join([str(id) for id in DO_NOT_ARCHIVE_BOARDS])
+thread_filter = ",".join([str(id) for id in do_not_archive_thread_ids])
+
+if not os.path.exists(CATEGORIES_DUMP):
+    database.dump([CATEGORIES], CATEGORIES_DUMP, "--where=NOT id_cat in ({})".format(category_filter), "--no-create-info")
+
+if not os.path.exists(BOARDS_DUMP):
+    database.dump([BOARDS], BOARDS_DUMP, "--where=NOT id_board in ({})".format(board_filter), "--no-create-info")
+
+with open(TOPICS_DUMP, "w", encoding="utf-8") as topics_dump:
+    dump_content = database.dump([TOPICS, MESSAGES], None, "--where=NOT id_topic in ({})".format(thread_filter), "--no-create-info")
+    dump_content = EMAIL_REGEX.sub("'*****@*****'", dump_content)
+    dump_content = IP_REGEX.sub("'***.***.***.***'", dump_content)
+    topics_dump.write(dump_content)
--- a/get_pms.php
+++ b/get_pms.php
@@ -0,0 +1,69 @@
+<?php
+//include("smf_api.php");
+include("SSI.php");
+global $smcFunc, $user_info, $db_prefix, $db_name;
+// SELECT * FROM `smf_personal_messages` AS `pms`, `smf_members` AS `senders`, `smf_pm_recipients` AS `recipients` WHERE `pms`.`id_member_from` = `senders`.`id_member` AND `pms`.`id_pm` = `recipients`.`id_pm` AND (`recipients`.`id_member` = 1 OR `pms`.`id_member_from` = 1);
+
+$request = $smcFunc['db_query']('', '
+    SELECT `pms`.`id_pm`, `pms`.`subject`, `pms`.`body`, `pms`.`msgtime`,
+           `senders`.`member_name` AS `sender_username`,
+           `senders`.`real_name` AS `sender_display_name`,
+           `recipient_members`.`member_name` AS `recipient_username`,
+           `recipient_members`.`real_name` AS `recipient_display_name`
+    FROM {db_prefix}personal_messages AS `pms`,
+         {db_prefix}members AS `senders`,
+         {db_prefix}members AS `recipient_members`,
+         {db_prefix}pm_recipients AS `recipients`
+    WHERE `pms`.`id_member_from` = `senders`.`id_member`
+    AND `recipient_members`.`id_member` = `recipients`.`id_member`
+    AND `pms`.`id_pm` = `recipients`.`id_pm`
+    AND (`recipients`.`id_member` = {int:id_member} OR `pms`.`id_member_from` = {int:id_member})',
+    array(
+        'id_member' => $user_info['id'],
+    )
+);
+
+function format_name ($result, $type) {
+    $username = $result["${type}_username"];
+    $display_name = $result["${type}_display_name"];
+    $name = $display_name;
+    if ($display_name != $username) {
+        $name = "$name ($username)";
+    }
+    return $name;
+}
+
+echo "<html>
+    <head>
+        <title>Personal Messages sent to/from ${user_info['name']}</title>
+        <style type='text/css'>
+            .label { font-weight: bold }
+            h2 { font-size: 17px }
+            article { border-top: 1px solid black; }
+            section { margin-top: 15px; margin-bottom: 15px; }
+        </style>
+    </head>
+    <body>
+    <h1>Personal Messages sent to/from ${user_info['name']}</h1>";
+
+while ($row =  $smcFunc['db_fetch_assoc']($request)) {
+    $to = format_name($row, "recipient");
+    $from = format_name($row, "sender");
+    $date = strftime("%c", $row['msgtime']);
+    $body = parse_bbc($row['body'], true, "pm" . $row['id_pm']);
+
+    echo "<article>
+        <header>
+            <h2><a href='#${row['id_pm']}' id='${row['id_pm']}'>${row['subject']}</a></h2>
+            <div><span class='label'>From:</span> $from</div>
+            <div><span class='label'>To:</span> $to</div>
+            <div><span class='label'>Date:</span> $date</div>
+        </header>
+        <section>$body</section>
+    </article>";
+
+    flush();
+}
+
+$smcFunc['db_free_result']($request);
+echo "</body></html>";
--- a/1
+++ b/1
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+
+from distutils.core import setup
+
+setup(
+    name='Glitch City Labs - Epilogue',
+    version='0.0.1',
+    description='Tools for exporting and creating archives of Glitch City Labs data',
+    author='Adrian Kuschelyagi Malacoda',
+    packages=['epilogue'],
+    install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1', 'bbcode >= 1.1.0', 'mwparserfromhell >= 0.5.4', 'flask >= 1.1.2'],
+    entry_points={
+        'console_scripts': [
+            'epilogue = epilogue:main',
+            'gclredirector = epilogue.redirector:main'
+        ]
+    }
+)
--- a/static/style.css
+++ b/static/style.css
@@ -0,0 +1,21 @@
+body { font-family: monospace; }
+
+ul.boards { margin-left: 0; padding-left: 0; }
+.board { margin-bottom: 5px; }
+.category, .board { list-style-type: none;}
+.category .name, .board .name { font-weight: bold; }
+.board .board { margin-left: 15px; }
+
+#threads { width: 100%; }
+
+.label { font-weight: bold }
+article { border-top: 1px solid black; }
+section { margin-top: 15px; margin-bottom: 15px; }
+
+.next { float: right; }
+.pagination { margin-bottom: 10px; }
+.pagination ul { list-style-type: none; margin-left: 0; padding-left: 0; display: inline; }
+.pagination li { display: inline; }
+
+.page { padding-top: 15px; }
+.page table { width: 100%; }
--- a/templates/boards.mustache
+++ b/templates/boards.mustache
@@ -0,0 +1,7 @@
+{{>header}}
+{{>forums_notice}}
+{{#categories}}
+    <h2 class="category-name">{{name}}</h2>
+    {{>child_boards}}
+{{/categories}}
+{{>footer}}
--- a/templates/category.mustache
+++ b/templates/category.mustache
@@ -0,0 +1,9 @@
+{{>header}}
+{{>wiki_notice}}
+<h2>{{pagename}}</h2>
+<ul>
+    {{#pages}}
+    <li><a href="{{url}}">{{title}}</a></li>
+    {{/pages}}
+</ul>
+{{>footer}}
--- a/templates/index.mustache
+++ b/templates/index.mustache
@@ -0,0 +1,11 @@
+{{>header}}
+Welcome to the <b>Glitch City Laboratories Archives</b>.
+<p>Glitch City Laboratories was a Pok&eacute;mon glitch website that existed from March 2006 to September 2020 (<a href="forums/board-2/thread-9114/page-0.html">announcement of closure</a>). This is an <b>archive</b> of content from the website prior to its closure.</p>
+<p>Further development and discussion is happening at <b><a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a></b>, the successor community.</p>
+<p>The <b><a href="https://glitchcity.wiki/">Glitch City Wiki</a></b> is the continuation of the Glitch City Laboratories wiki.</p>
+<h2>Archives</h2>
+<ul>
+    <li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>) (<a href="forums.sql.gz">.sql.gz</a>) (<a href="forums.sqlite.gz">.sqlite.gz</a>)</li>
+    <li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>) (<a href="wiki.xml.gz">.xml.gz</a>)</li>
+</ul>
+{{>footer}}
--- a/templates/page.mustache
+++ b/templates/page.mustache
@@ -0,0 +1,7 @@
+{{>header}}
+{{>wiki_notice}}
+<h2>{{page.title}}</h2>
+<article class="page">
+    {{{text}}}
+</article>
+{{>footer}}
--- a/templates/partials/child_boards.mustache
+++ b/templates/partials/child_boards.mustache
@@ -0,0 +1,9 @@
+<ul class="boards">
+    {{#children}}
+    <li class="board">
+        <div class="name"><a href="board-{{id}}">{{name}}</a></div>
+        <div class="description">{{{description}}}</div>
+        {{>child_boards}}
+    </li>
+    {{/children}}
+</ul>
--- a/templates/partials/footer.mustache
+++ b/templates/partials/footer.mustache
@@ -0,0 +1,2 @@
+    </body>
+</html>
--- a/templates/partials/forums_notice.mustache
+++ b/templates/partials/forums_notice.mustache
@@ -0,0 +1,5 @@
+<div class="notice">
+<p>Glitch City Laboratories closed on 1 September 2020 (<a href="{{base}}board-2/thread-9114/page-0.html">announcement</a>). This is an <b>archived</b> copy of a thread from Glitch City Laboratories Forums.</p>
+<p>You can join <a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a> to ask questions or discuss current developments.</p>
+<p>You may also download the archive of this forum in <a href="{{base}}../forums.tar.gz">.tar.gz</a>, <a href="{{base}}../forums.sql.gz">.sql.gz</a>, or <a href="{{base}}../forums.sqlite.gz">.sqlite.gz</a> formats.</p>
+</div>
--- a/templates/partials/header.mustache
+++ b/templates/partials/header.mustache
@@ -0,0 +1,8 @@
+<html>
+    <head>
+        <title>Glitch City Laboratories Archives{{title}}</title>
+        <link href="{{base}}style.css" rel="stylesheet" type="text/css" />
+        <meta charset="UTF-8" />
+    </head>
+    <body>
+    <h1><a href="{{base}}">Glitch City Laboratories Archives</a></h1>
--- a/templates/partials/pagination.mustache
+++ b/templates/partials/pagination.mustache
@@ -0,0 +1,9 @@
+<div class="pagination">
+    <a class="prev" href="page-{{prev}}.html">Previous Page</a>
+    <ul>
+    {{#page_links}}
+        <li><a href="{{link}}">{{label}}</a></li>
+    {{/page_links}}
+    </ul>
+    <a class="next" href="page-{{next}}.html">Next Page</a>
+</div>
--- a/templates/partials/wiki_notice.mustache
+++ b/templates/partials/wiki_notice.mustache
@@ -0,0 +1,6 @@
+<div class="notice">
+<p>Glitch City Laboratories closed on 1 September 2020 (<a href="{{base}}../forums/board-2/thread-9114/page-0.html">announcement</a>). This is an <b>archived</b> copy of an article from Glitch City Laboratories wiki.</p>
+<p><b>A live version of this article is available at the <a href="https://glitchcity.wiki/">Glitch City Wiki</a> <a href="https://glitchcity.wiki/{{pagename}}">here</a>.</b></p>
+<p>You can join <a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a> to ask questions or discuss current developments.</p>
+<p>You may also download the archive of the wiki in <a href="{{base}}../wiki.tar.gz">.tar.gz</a> or <a href="{{base}}../wiki.xml.gz">.xml.gz</a> formats.</p>
+</div>
--- a/templates/posts.mustache
+++ b/templates/posts.mustache
@@ -0,0 +1,17 @@
+{{>header}}
+{{>forums_notice}}
+<h2><a href="../">{{board.name}}</a></h2>
+<h3>{{thread.subject}} - Page {{next}}</h3>
+{{>pagination}}
+{{#posts}}
+<article id="msg{{id}}">
+    <header>
+        <h4>{{subject}}</h4>
+        <div><span class="label">Posted by:</span> {{poster_name}}</div>
+        <div><span class="label">Date:</span> {{datetime}}</div>
+    </header>
+    <section>{{{body}}}</section>
+</article>
+{{/posts}}
+{{>pagination}}
+{{>footer}}
--- a/templates/redirect.mustache
+++ b/templates/redirect.mustache
@@ -0,0 +1,4 @@
+<html>
+    <head><meta http-equiv="refresh" content="0; url={{target}}" /></head>
+    <body><p><a href="{{target}}">Redirect</a></p></body>
+</html>
--- a/templates/threads.mustache
+++ b/templates/threads.mustache
@@ -0,0 +1,20 @@
+{{>header}}
+{{>forums_notice}}
+<h2>{{board.name}}</h2>
+<table id="threads">
+    <tr>
+        <th>Title</th>
+        <th>Poster</th>        
+        <th>Date</th>
+        <th>Replies</th>
+    </tr>
+    {{#threads}}
+    <tr>
+        <td class="thread-subject"><a href="thread-{{id}}">{{subject}}</a></td>
+        <td class="thread-poster">{{poster_name}}</td>
+        <td class="thread-date">{{datetime}}</td>
+        <td class="replies">{{num_replies}}</td>
+    </tr>
+    {{/threads}}
+</table>
+{{>footer}}
--- a/wiki/find_pages
+++ b/wiki/find_pages
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+from urllib.request import urlopen
+from datetime import date
+import json
+
+GCL_URL = "https://glitchcity.info/"
+API_ENDPOINT = "/w/api.php?action=query&generator=categorymembers&gcmtitle=Category:{}&prop=info&gcmlimit=100&format=json"
+
+CATEGORIES = [
+    "Generation I glitches", "Generation II glitches", "Generation III glitches", "Generation IV glitches",
+    "Generation I glitch Pokémon", "Generation II glitch Pokémon", "Generation III glitch Pokémon", "Generation IV glitch Pokémon",
+    "Pokémon Red and Blue glitch moves", "Pokémon Yellow glitch moves", "Generation II glitch moves",
+    "Generation I glitch items", "Generation II glitch items", "Generation IV glitch items",
+    "Generation I glitch Trainers", "Generation II glitch Trainers"
+]
+
+FILTER_MODIFIED_SINCE_THEN = date(2020, 3, 31)
+
+def get_pages_for_category(url, category):
+    category = category.replace(" ", "_").replace("é", "%C3%A9")
+    pages = {}
+    continue_param = None
+    while True:
+        api_url = url + API_ENDPOINT.format(category)
+
+        if continue_param:
+            api_url = api_url + "&gcmcontinue=" + continue_param
+
+        with urlopen(api_url) as result:
+            result_object = json.loads(result.read())
+            pages.update(result_object['query']['pages'])
+
+            if not "continue" in result_object:
+                break
+
+            continue_param = result_object['continue']['gcmcontinue']
+    return pages
+
+def filter_page(page):
+    touched = date(*[int(value) for value in page['touched'].split("T")[0].split("-")])
+    return touched >= FILTER_MODIFIED_SINCE_THEN
+
+all_pages = {}
+for category in CATEGORIES:
+    category_pages = get_pages_for_category(GCL_URL, category)
+    print("--> Found {} total pages in category {}".format(len(category_pages), category))
+    all_pages.update(category_pages)
+
+print("----> {} total pages to consider".format(len(all_pages)))
+filtered_pages = [page for page in all_pages.values() if filter_page(page)]
+print("----> {} filtered pages".format(len(filtered_pages)))
+
+for page in filtered_pages:
+    print(page['title'])
--- a/wiki/wiki_pages
+++ b/wiki/wiki_pages
Author	SHA1	Message	Date
Adrian Malacoda	eff22ff325	Bolden names of successor websites.	2020-09-20 05:48:15 -05:00
Adrian Malacoda	6ab90d5fff	Add link to GCW on the archive index page.	2020-09-20 05:47:35 -05:00
Adrian Malacoda	d98354046b	Prefix image paths with `images` subdirectory (which does not yet exist)	2020-09-20 05:45:49 -05:00
Adrian Malacoda	f1ae73b737	Correctly parse out fragment from html archive link and append it in the correct place.	2020-09-20 05:40:57 -05:00
Adrian Malacoda	d062ca6787	Add anchors to headings.	2020-09-20 05:31:55 -05:00
Adrian Malacoda	a9adf51453	Add GCW link to archived wiki pages, for the live version of said archived article.	2020-09-20 05:28:37 -05:00
Adrian Malacoda	7157757d43	redirector: handle case where thread id can have an .html extension, because of course it can...	2020-09-03 04:12:54 -05:00
Adrian Malacoda	5859ee0408	Add notice to the top of archived pages that this is an archive.	2020-09-03 03:34:22 -05:00
Adrian Malacoda	b33ea016d5	convert all f-strings to old-style format strings for python 3.5 compatibility (since that is what's available on the server)	2020-09-01 02:46:37 -05:00
Adrian Malacoda	ab29250b74	add "added submissions" boards from Submit-A-Glitch to exclusion filter	2020-09-01 02:25:32 -05:00
Adrian Malacoda	167a03be3c	fix syntax error	2020-09-01 01:31:39 -05:00
Adrian Malacoda	f65361e06b	Implement forums redirector and default archive urls (since it's unlikely these will be changed). Also add redirectors for the index/main pages since those don't get handled by default.	2020-08-30 16:51:55 -05:00
Adrian Malacoda	c37cf4fc44	Implement thread index for mapping thread ids back to board ids, for use with the redirector. The archive domain (archives.glitchcity.info) will host this file and the redirector will pull and unpack it when it starts up.	2020-08-30 16:50:21 -05:00
Adrian Malacoda	ef3f3dd60c	Add pagination to forums archives.	2020-08-28 14:19:09 -05:00
Adrian Malacoda	0e3f1274cc	Exclude links and submit-a-glitch archives from the archive.	2020-08-28 02:29:30 -05:00
Adrian Malacoda	1b7e3ce08b	Since the wiki linker code is modularized the redirector can just import it	2020-08-27 02:24:24 -05:00
Adrian Malacoda	646b840be4	Use '+' as the substitution for '/' rather than '%2F' since nginx seems to want to normalize the %2F back into / which would defeat the purpose.	2020-08-27 02:11:56 -05:00
Adrian Malacoda	a382e6d4fd	Extract more archive generator specific functionality from linker (e.g. the .html suffix, / -> %2F replacement)	2020-08-27 02:02:43 -05:00
Adrian Malacoda	ade44491d4	Extract archive generator specific linker functionality into ArchiveLinker subclass.	2020-08-27 01:52:17 -05:00
Adrian Malacoda	2e73ecd59f	Begin work on redirector webapp. The logic for redirecting wiki pages -seems- consistent with how the archives are generated but... nginx is normalizing the %2Fs into slashes and therefore not able to access any files with that escape sequence (e.g. Gold%2FSilver). Might need to find another character to escape / with. + might work, it's semantically acceptable (e.g. "Gold+Silver" in place of "Gold/Silver") although this character is sometimes interpreted equivalently to a space. Regardless, nginx seems to be happy with it so might go with it. May also need to test on a web host e.g. my old tripod account to see if assumptions hold up there too.	2020-08-24 00:24:45 -05:00
Adrian Malacoda	2aa1a7cf47	improved handling of categories and refactored Linker stuff into a class, also use ./ as default base path so Category: links will work correctly	2020-08-23 06:40:23 -05:00
Adrian Malacoda	77b160a35b	Add all dex types and make wiki render aware of them, so that dex subpages will be rendered as GlitchDex/RB:000 while phrases like G/S will be rendered as G%2FS. This unfortunately means the redirector will need to have special logic to be aware of dexes too, but it could probably be as simple as "if directory name contains 'Dex' then treat it as a directory name"	2020-08-23 06:02:28 -05:00
Adrian Malacoda	ea7e1294b1	Escape / with %2F in wiki URLs. For now this includes things which actually should be subdirectories, such as the various dexes.	2020-08-23 05:01:11 -05:00
Adrian Malacoda	df25b09eb7	improvements to template substitution, begin parsing out and dumping category links	2020-08-23 04:48:38 -05:00
Adrian Malacoda	43a36ba730	(Attempt to) strip out noinclude tags.	2020-08-21 02:52:43 -05:00
Adrian Malacoda	d19ca39838	Add a (very rudimentary) template transcluder. Some work will have to be done before it is ready.	2020-08-21 02:39:50 -05:00
Adrian Malacoda	38cdb811b0	Specify that charset is UTF-8. On the server side we are specifying the charset as UTF-8 so the encoding is already correct on the archives.glitchcity.info site, but the intent is for this to be portable to any web host.	2020-08-17 01:57:27 -05:00
Adrian Malacoda	54fa852897	Implement rudimentary wikitext renderer and buff deploy_archives script to be able to deploy the wiki archive, as well as sql/sqlite/xml data dumps.	2020-08-17 01:51:09 -05:00
Adrian Malacoda	0b1320a9da	Add archives index page, and deploy wiki archives.	2020-08-16 21:11:59 -05:00
Adrian Malacoda	6f05f05410	Re-enable archival of wiki by default.	2020-08-16 18:53:10 -05:00
Adrian Malacoda	31bdd806ad	Allow "forums" as a synonym of "forum"	2020-08-16 18:52:45 -05:00
Adrian Malacoda	943563a8db	Use new field names.	2020-08-16 18:52:32 -05:00
Adrian Malacoda	23f4789599	Fix broken utf-8 encoding and unescape html entities.	2020-08-16 18:52:05 -05:00
Adrian Malacoda	4045473e65	Gzip forums.tar to save space and time. Remove verbose output to save time.	2020-08-16 18:51:29 -05:00
Adrian Malacoda	161dd19d36	Increas spacing between board elements & increase padding of sub-boards	2020-08-16 18:50:59 -05:00
Adrian Malacoda	f709ce4810	split archiving wiki and forum into separate commands, only run the forum one by default for now	2020-08-16 04:54:27 -05:00
Adrian Malacoda	bf4a5f2b5d	Order categories and boards correctly; correctly organize them by nesting level	2020-08-16 04:28:07 -05:00
Adrian Malacoda	dc0191a04a	Rudimentary support for building wiki archives. The content is dumped to html but the wikitext isn't parsed yet. mwparserfromhell is used for parsing wikitext but it has no support for rendering to HTML so we'll have to build it manually.	2020-08-11 10:44:06 -05:00
Adrian Malacoda	3cb08e2d2f	Add bbcode library and some bbcode parsing. SMF's bbcode is somewhat different to what this library expects so some of it does not come out correctly.	2020-08-10 02:00:00 -05:00
Adrian Malacoda	0116646dd9	Add script to deploy the generated html archive to a server.	2020-08-10 01:59:48 -05:00
Adrian Malacoda	f3b0319d5a	Add rudimentary script for generating the forum archive.	2020-08-10 01:21:33 -05:00
Adrian Malacoda	7026bc2b34	Add mysql2sqlite submodule and script for making a sqlite database out of the mysql dumps. This will be used to create html archives of the forum.	2020-08-09 04:01:19 -05:00
Adrian Malacoda	51e9782c7e	refine email regex to not include whitespace	2020-08-04 10:10:48 -05:00
Adrian Malacoda	73dae54cf1	Filter out sensitive information (emails, IP addressses) from messages table.	2020-08-04 02:29:42 -05:00
Adrian Malacoda	eac65f6bda	Add dump_forum_data script: This connects to a GCL database and creates SQL dumps that can be distributed publicly. Specifically, it creates these scripts: - structure.sql: Database structure for all tables. - misc_data.sql: Misc. data that may be of interest, including legacy GCLF features such as the shop and the arcade, and The Orange Glove content. - categories.sql: Category data for public categories. - boards.sql: Board data for public boards. - threads.sql: Thread (topic) and message data. This is the meat of the forums. What's NOT included: - Personal/Private messages - Member accounts - Administrative stuff like error logs - Private categories and boards - Garbage category (Lab Omega)	2020-08-03 05:02:43 -05:00
Adrian Malacoda	d4002ec592	Add additional categories for gens 1 - 4.	2020-07-30 04:16:26 -05:00
Adrian Malacoda	191e1ebd13	Add find_pages script to determine through the MediaWiki API which pages need to be looked at.	2020-07-30 03:46:02 -05:00
Adrian Malacoda	1a2dbbe65b	Fix up PM export script	2020-07-27 02:57:19 -05:00
Adrian Malacoda	6716f274db	Initial stab at PM export script. It just prints out all PMs to/from current logged-in user.	2020-07-27 01:48:59 -05:00