Compare commits
47 Commits
1a2dbbe65b
...
master
Author | SHA1 | Date | |
---|---|---|---|
eff22ff325 | |||
6ab90d5fff | |||
d98354046b | |||
f1ae73b737 | |||
d062ca6787 | |||
a9adf51453 | |||
7157757d43 | |||
5859ee0408 | |||
b33ea016d5 | |||
ab29250b74 | |||
167a03be3c | |||
f65361e06b | |||
c37cf4fc44 | |||
ef3f3dd60c | |||
0e3f1274cc | |||
1b7e3ce08b | |||
646b840be4 | |||
a382e6d4fd | |||
ade44491d4 | |||
2e73ecd59f | |||
2aa1a7cf47 | |||
77b160a35b | |||
ea7e1294b1 | |||
df25b09eb7 | |||
43a36ba730 | |||
d19ca39838 | |||
38cdb811b0 | |||
54fa852897 | |||
0b1320a9da | |||
6f05f05410 | |||
31bdd806ad | |||
943563a8db | |||
23f4789599 | |||
4045473e65 | |||
161dd19d36 | |||
f709ce4810 | |||
bf4a5f2b5d | |||
dc0191a04a | |||
3cb08e2d2f | |||
0116646dd9 | |||
f3b0319d5a | |||
7026bc2b34 | |||
51e9782c7e | |||
73dae54cf1 | |||
eac65f6bda | |||
d4002ec592 | |||
191e1ebd13 |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/**/__pycache__
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "mysql2sqlite"]
|
||||
path = mysql2sqlite
|
||||
url = https://github.com/dumblob/mysql2sqlite.git
|
31
README.md
31
README.md
@@ -1,2 +1,31 @@
|
||||
# Glitch City Laboratories (2003 - 2020) - Epilogue
|
||||
This repository contains the tickets, scripts, and documentation for the end of Glitch City Laboratories.
|
||||
This repository contains the tickets, scripts, and documentation for the end of Glitch City Laboratories.
|
||||
|
||||
## Installation
|
||||
`pip install -e .`
|
||||
|
||||
## Standalone Epilogue Scripts
|
||||
#### `deploy_archives`
|
||||
Run this once the archives have been built to tar them up and scp them to the server.
|
||||
|
||||
#### Wiki Data (`wiki` directory)
|
||||
##### `wiki_pages`
|
||||
Not a script, just a listing of all the pages in the wiki (as of the 27 July 2020 lockdown). Use this and Special:Export to create an XML dump of wiki pages and place it in the `wiki` directory.
|
||||
|
||||
##### `find_pages`
|
||||
Run this locally (it uses the MediaWiki HTTP API). Finds all pages in categories related to Pokemon generations 1 - 4 that have been edited since 31 March 2020.
|
||||
|
||||
#### Forum Data (`forum` directory)
|
||||
#### `dump_forum_data`
|
||||
Run this on a server with access to a Glitch City Laboratories database. It will dump the table schemas and data into several .sql files. Place these in the `forum` directory.
|
||||
|
||||
#### `create_sqlite_database`
|
||||
Run this locally, after the `dump_forum_data` script has been run on the Glitch City Laboratories server and the dumps copied to the `forum` directory (same directory as this script). A file called `forum.sqlite` will be generated from the MariaDB dumps. This sqlite database is required for generating the static archive later.
|
||||
|
||||
Make sure the `mysql2sqlite` submodule is initialized and install `mawk` for the best results.
|
||||
|
||||
## Epilogue Package
|
||||
|
||||
## Misc Scripts
|
||||
### `get_pms.php`
|
||||
This is installed on Glitch City Laboratories and allows any logged-in user to export their PM inbox into a HTML export.
|
20
deploy_archives
Executable file
20
deploy_archives
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/sh -x
|
||||
HOSTNAME=glitchcity.info
|
||||
ARCHIVE_PATH=/var/www/html/gclarchives
|
||||
|
||||
cd archives
|
||||
scp index.html style.css $HOSTNAME:$ARCHIVE_PATH
|
||||
|
||||
if [ -d "forums" ]; then
|
||||
cat ../forum/structure.sql ../forum/categories.sql ../forum/boards.sql ../forum/threads.sql ../forum/misc_data.sql > forums.sql
|
||||
cp ../forum/forum.sqlite forums.sqlite # forum or forums?
|
||||
tar -cf forums.tar forums && gzip -f forums.tar forums.sqlite forums.sql
|
||||
scp forums.sql.gz forums.sqlite.gz forums.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
||||
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf forums.tar.gz"
|
||||
fi;
|
||||
|
||||
if [ -d "wiki" ]; then
|
||||
tar -cf wiki.tar wiki && gzip -f wiki.tar wiki.xml
|
||||
scp wiki.xml.gz wiki.tar.gz $HOSTNAME:$ARCHIVE_PATH
|
||||
ssh $HOSTNAME "cd $ARCHIVE_PATH; tar -xf wiki.tar.gz"
|
||||
fi;
|
44
epilogue/__init__.py
Normal file
44
epilogue/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import os
|
||||
from .forum import Forum
|
||||
from .wiki import Wiki
|
||||
from .archive_generator import ArchiveGenerator
|
||||
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
|
||||
WIKI_DIRECTORY = os.path.join(BASEDIR, "wiki")
|
||||
TEMPLATES_DIR = os.path.join(BASEDIR, "templates")
|
||||
STATIC_DIR = os.path.join(BASEDIR, "static")
|
||||
|
||||
ARCHIVE_GENERATOR = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR)
|
||||
|
||||
ARCHIVES_BASEDIR = "archives"
|
||||
FORUM_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "forums")
|
||||
WIKI_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "wiki")
|
||||
|
||||
DEFAULT_ARGUMENTS = ["wiki", "forum"]
|
||||
|
||||
def main():
|
||||
args = sys.argv[1:]
|
||||
if not args:
|
||||
args = DEFAULT_ARGUMENTS
|
||||
|
||||
ARCHIVE_GENERATOR.generate_index(ARCHIVES_BASEDIR)
|
||||
|
||||
if "forum" in args or "forums" in args:
|
||||
ARCHIVE_GENERATOR.generate_forum(Forum(FORUM_DATABASE), FORUM_ARCHIVES)
|
||||
|
||||
if "wiki" in args:
|
||||
archive_wiki()
|
||||
|
||||
def archive_wiki():
|
||||
wiki = None
|
||||
for entry in os.listdir(WIKI_DIRECTORY):
|
||||
if entry.endswith(".xml"):
|
||||
wiki = Wiki(os.path.join(WIKI_DIRECTORY, entry))
|
||||
|
||||
if wiki:
|
||||
shutil.copyfile(wiki.xml_path, os.path.join(ARCHIVES_BASEDIR, "wiki.xml"))
|
||||
ARCHIVE_GENERATOR.generate_wiki(wiki, WIKI_ARCHIVES)
|
262
epilogue/archive_generator.py
Normal file
262
epilogue/archive_generator.py
Normal file
@@ -0,0 +1,262 @@
|
||||
import os
|
||||
import logging
|
||||
import shutil
|
||||
import math
|
||||
import json
|
||||
import gzip
|
||||
|
||||
from itertools import chain
|
||||
from traceback import print_exc
|
||||
|
||||
import chevron
|
||||
import bbcode
|
||||
import html
|
||||
|
||||
from .forum import DEFAULT_POSTS_PER_PAGE
|
||||
from .wiki import Template, Renderer, Linker, NAMESPACES as WIKI_NAMESPACES
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("ArchiveGenerator")
|
||||
|
||||
DEX_LANGUAGES = ["", "DE", "ES", "FR", "IT", "JP", "KO"]
|
||||
DEX_TYPES = [
|
||||
"GlitchDex", "AttackDex", "DexDex", "AreaDex", "TrainerDex", "FieldMoveDex", "ItemDex", "FamilyDex", "DecDex", "DayDex",
|
||||
"MDIGlitchDex", "MetascriptDex", "TMHMDex", "StatDex", "PosterDex", "TypeDex", "UnownDex", "DollDex", "DefaultNameDex",
|
||||
"BattleTypeDe", "BadgeDescriptionDex", "FacingDex"
|
||||
]
|
||||
DEXES = list(chain.from_iterable([["{}{}".format(dex_type, language) for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
|
||||
|
||||
FORUM_THREAD_INDEX = "thread_index.json.gz"
|
||||
IMAGE_DIRECTORY = "images"
|
||||
|
||||
class ArchiveLinker(Linker):
|
||||
def __init__ (self, directory_names=[]):
|
||||
super().__init__()
|
||||
self.directory_names = directory_names
|
||||
self.image_directory = IMAGE_DIRECTORY
|
||||
self.replacements = {
|
||||
"/": "+",
|
||||
#":": ""
|
||||
}
|
||||
|
||||
def translate_page_title (self, page_title):
|
||||
page_title = super().translate_page_title(page_title)
|
||||
fragment = ""
|
||||
|
||||
if "#" in page_title:
|
||||
fragment = page_title[page_title.find("#"):]
|
||||
page_title = page_title[:-len(fragment)]
|
||||
|
||||
directory_name = ""
|
||||
for name in self.directory_names:
|
||||
if page_title.startswith("{}/".format(name)):
|
||||
directory_name = name
|
||||
page_title = page_title[len(directory_name) + 1:]
|
||||
break
|
||||
|
||||
for key, value in self.replacements.items():
|
||||
page_title = page_title.replace(key, value)
|
||||
|
||||
return "{}{}{}.html{}".format(directory_name, '/' if directory_name else '', page_title, fragment)
|
||||
|
||||
def translate_image_title (self, page_title):
|
||||
image_title = super().translate_image_title(page_title)
|
||||
if not image_title:
|
||||
return
|
||||
|
||||
return "{}/{}".format(self.image_directory, image_title)
|
||||
|
||||
def prepare_thread (thread):
|
||||
thread.subject = html.unescape(thread.subject)
|
||||
return thread
|
||||
|
||||
def prepare_post (post):
|
||||
post = prepare_thread(post)
|
||||
parser = bbcode.Parser()
|
||||
post.body = html.unescape(parser.format(post.body))
|
||||
return post
|
||||
|
||||
class ArchiveGenerator():
|
||||
def __init__ (self, template_dir, static_dir):
|
||||
self.template_dir = template_dir
|
||||
self.static_dir = static_dir
|
||||
|
||||
def generate_index (self, out_dir):
|
||||
logger.info("Generating index page at %s", out_dir)
|
||||
try:
|
||||
os.makedirs(out_dir)
|
||||
except FileExistsError: pass
|
||||
|
||||
shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
|
||||
renderer = TemplateRenderer(self.template_dir, out_dir)
|
||||
renderer.render_template_to_file("index", "index.html", {})
|
||||
|
||||
def generate_wiki (self, wiki, out_dir):
|
||||
logger.info("Archiving wiki to %s", out_dir)
|
||||
try:
|
||||
os.makedirs(out_dir)
|
||||
except FileExistsError: pass
|
||||
|
||||
shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
|
||||
renderer = TemplateRenderer(self.template_dir, out_dir)
|
||||
renderer.render_template_to_file("redirect", "index.html", {
|
||||
"target": "Main_Page.html"
|
||||
})
|
||||
|
||||
categories = {}
|
||||
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
|
||||
linker = ArchiveLinker(directory_names=DEXES)
|
||||
wikitext_renderer = Renderer(templates, linker)
|
||||
for page in wiki.get_pages():
|
||||
try:
|
||||
if page.namespace != WIKI_NAMESPACES['MAIN']:
|
||||
continue
|
||||
|
||||
page_out = linker.translate_page_title(page.title)
|
||||
base = "./"
|
||||
if "/" in page_out:
|
||||
base = "../" * page_out.count("/")
|
||||
try:
|
||||
os.makedirs(os.path.dirname(os.path.join(out_dir, page_out)))
|
||||
except FileExistsError: pass
|
||||
|
||||
if page.redirect:
|
||||
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
|
||||
renderer.render_template_to_file("redirect", page_out, {
|
||||
"target": "{}{}".format(base, linker.translate_page_title(page.redirect))
|
||||
})
|
||||
else:
|
||||
logger.info("Archiving page %s to %s", page.title, page_out)
|
||||
(rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, base, page=page)
|
||||
|
||||
for category in page_categories:
|
||||
if not category in categories:
|
||||
categories[category] = []
|
||||
|
||||
categories[category].append({
|
||||
"url": page_out,
|
||||
"title": page.title
|
||||
})
|
||||
|
||||
renderer.render_template_to_file("page", page_out, {
|
||||
"title": " - {}".format(page.title),
|
||||
"pagename": page.title,
|
||||
"page": page,
|
||||
"base": base,
|
||||
"text": rendered
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("Error encountered when archiving %s: %s", page.title, e)
|
||||
print_exc()
|
||||
if isinstance(e, ValueError):
|
||||
raise e
|
||||
|
||||
for category, pages in categories.items():
|
||||
category_out = "Category:{}".format(linker.translate_page_title(category))
|
||||
logger.info("Archiving category %s to %s", category, category_out)
|
||||
|
||||
try:
|
||||
renderer.render_template_to_file("category", category_out, {
|
||||
"title": " - {}".format(category),
|
||||
"pagename": "Category:{}".format(category),
|
||||
"category": category,
|
||||
"pages": pages
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("Error encountered when archiving %s: %s", category, e)
|
||||
print_exc()
|
||||
|
||||
def generate_forum (self, forum, out_dir):
|
||||
logger.info("Archiving forum to %s", out_dir)
|
||||
try:
|
||||
os.makedirs(out_dir)
|
||||
except FileExistsError: pass
|
||||
|
||||
shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
|
||||
renderer = TemplateRenderer(self.template_dir, out_dir)
|
||||
renderer.render_template_to_file("boards", "index.html", {
|
||||
"categories": forum.get_board_tree()
|
||||
})
|
||||
|
||||
threads = []
|
||||
for board in forum.get_boards():
|
||||
forum_threads = forum.get_threads_in_board(board)
|
||||
threads = threads + forum_threads
|
||||
self.generate_forum_board(forum, board, forum_threads, out_dir)
|
||||
|
||||
self.generate_thread_index(threads, os.path.join(out_dir, FORUM_THREAD_INDEX))
|
||||
|
||||
def generate_forum_board (self, forum, board, threads, out_dir):
|
||||
board_out_dir = os.path.join(out_dir, "board-{}".format(board.id))
|
||||
logger.info("Archiving board %s to %s", board.name, board_out_dir)
|
||||
try:
|
||||
os.makedirs(board_out_dir)
|
||||
except FileExistsError: pass
|
||||
|
||||
renderer = TemplateRenderer(self.template_dir, board_out_dir)
|
||||
threads = [prepare_thread(thread) for thread in threads]
|
||||
renderer.render_template_to_file("threads", "index.html", {
|
||||
"title": " - {}".format(board.name),
|
||||
"base": "../",
|
||||
"board": board,
|
||||
"threads": threads
|
||||
})
|
||||
|
||||
for thread in threads:
|
||||
self.generate_forum_thread(forum, board, thread, board_out_dir)
|
||||
|
||||
def generate_forum_thread (self, forum, board, thread, out_dir):
|
||||
thread_out_dir = os.path.join(out_dir, "thread-{}".format(thread.id))
|
||||
logger.info("Archiving thread %s to %s", thread.subject, thread_out_dir)
|
||||
try:
|
||||
os.makedirs(thread_out_dir)
|
||||
except FileExistsError: pass
|
||||
|
||||
renderer = TemplateRenderer(self.template_dir, thread_out_dir)
|
||||
renderer.render_template_to_file("redirect", "index.html", {
|
||||
"target": "page-0.html"
|
||||
})
|
||||
|
||||
total_pages = math.ceil((thread.num_replies + 1) / DEFAULT_POSTS_PER_PAGE)
|
||||
page_links = [{"label": page + 1, "link": "page-{}.html".format(page)} for page in range(total_pages)]
|
||||
page = 0
|
||||
while True:
|
||||
posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
|
||||
if len(posts) < 1:
|
||||
break
|
||||
|
||||
logger.info("Archiving page %s of thread %s", page, thread.subject)
|
||||
renderer.render_template_to_file("posts", "page-{}.html".format(page), {
|
||||
"title": " - {} - Page {}".format(thread.subject, page + 1),
|
||||
"base": "../../",
|
||||
"board": board,
|
||||
"thread": thread,
|
||||
"page": page,
|
||||
"next": page + 1,
|
||||
"page_links": page_links,
|
||||
"prev": page - 1,
|
||||
"posts": posts
|
||||
})
|
||||
page = page + 1
|
||||
|
||||
def generate_thread_index (self,threads, out_path):
|
||||
# with open(out_path, "wb") as out:
|
||||
# pickle.dump({thread.id: {"parent": thread.parent} for thread in threads}, out, protocol=4)
|
||||
threads = {thread.id: {"parent": thread.parent} for thread in threads}
|
||||
with gzip.open(out_path, "w") as out:
|
||||
out.write(json.dumps(threads).encode())
|
||||
|
||||
class TemplateRenderer():
|
||||
def __init__ (self, template_dir, out_dir):
|
||||
self.template_dir = template_dir
|
||||
self.partials_dir = os.path.join(template_dir, "partials")
|
||||
self.out_dir = out_dir
|
||||
self.extension = "mustache"
|
||||
|
||||
def open_template (self, name):
|
||||
return open(os.path.join(self.template_dir, "{}.{}".format(name, self.extension)))
|
||||
|
||||
def render_template_to_file (self, template_name, out_file, data={}):
|
||||
with self.open_template(template_name) as template:
|
||||
with open(os.path.join(self.out_dir, out_file), "w") as out:
|
||||
out.write(chevron.render(template, data, self.partials_dir, self.extension))
|
104
epilogue/forum.py
Normal file
104
epilogue/forum.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
PREFIX = "smf_"
|
||||
GET_BOARDS = """
|
||||
SELECT * FROM `{}boards`
|
||||
ORDER BY `board_order` ASC
|
||||
""".format(PREFIX)
|
||||
GET_CATEGORIES = """
|
||||
SELECT * FROM `{}categories`
|
||||
ORDER BY `cat_order` ASC
|
||||
""".format(PREFIX)
|
||||
GET_THREADS = """
|
||||
SELECT * FROM `{}topics` AS `topics`, `{}messages` AS `messages`
|
||||
WHERE `topics`.`id_board`=? AND `messages`.`id_msg`=`topics`.`id_first_msg`
|
||||
ORDER BY `id_last_msg` DESC
|
||||
LIMIT ? OFFSET ?
|
||||
""".format(PREFIX, PREFIX)
|
||||
GET_POSTS = """
|
||||
SELECT * FROM `{}messages`
|
||||
WHERE `id_topic`=?
|
||||
ORDER BY `poster_time` ASC
|
||||
LIMIT ? OFFSET ?
|
||||
""".format(PREFIX)
|
||||
|
||||
DEFAULT_POSTS_PER_PAGE = 15
|
||||
DEFAULT_THREADS_PER_PAGE = 2000
|
||||
|
||||
def fix_encoding (string):
|
||||
return string.encode("latin1", errors="ignore").decode(errors="ignore")
|
||||
|
||||
class Forum():
|
||||
def __init__ (self, db_path):
|
||||
self.connection = sqlite3.connect(db_path)
|
||||
self.connection.row_factory = sqlite3.Row
|
||||
|
||||
def get_board_tree (self):
|
||||
categories = self.get_categories()
|
||||
boards = self.get_boards()
|
||||
for category in categories:
|
||||
category.children = [child for child in boards if child.category == category.id and child.child_level == 0]
|
||||
for board in boards:
|
||||
board.children = [child for child in boards if child.parent_board == board.id]
|
||||
return categories
|
||||
|
||||
def get_categories (self):
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(GET_CATEGORIES)
|
||||
return [Category(category) for category in cursor.fetchall()]
|
||||
|
||||
def get_boards (self):
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(GET_BOARDS)
|
||||
return [Board(board) for board in cursor.fetchall()]
|
||||
|
||||
def get_threads_in_board (self, board, page=0, per_page=DEFAULT_THREADS_PER_PAGE):
|
||||
try:
|
||||
board = board.id
|
||||
except ValueError: pass
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(GET_THREADS, (board, per_page, page * per_page))
|
||||
return [Thread(thread) for thread in cursor.fetchall()]
|
||||
|
||||
def get_posts_in_thread (self, thread, page=0, per_page=DEFAULT_POSTS_PER_PAGE):
|
||||
try:
|
||||
thread = thread.id
|
||||
except ValueError: pass
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(GET_POSTS, (thread, per_page, page * per_page))
|
||||
return [Post(post) for post in cursor.fetchall()]
|
||||
|
||||
class Category():
|
||||
def __init__ (self, row):
|
||||
self.id = row['id_cat']
|
||||
self.name = fix_encoding(row['name'])
|
||||
self.children = []
|
||||
|
||||
class Board():
|
||||
def __init__ (self, row):
|
||||
self.id = row['id_board']
|
||||
self.category = row['id_cat']
|
||||
self.parent_board = row['id_parent']
|
||||
self.child_level = row['child_level']
|
||||
self.name = fix_encoding(row['name'])
|
||||
self.description = fix_encoding(row['description'])
|
||||
self.children = []
|
||||
|
||||
class Thread():
|
||||
def __init__ (self, row):
|
||||
self.id = row['id_topic']
|
||||
self.parent = row['id_board']
|
||||
self.datetime = datetime.fromtimestamp(row['poster_time'])
|
||||
self.subject = fix_encoding(row['subject'])
|
||||
self.poster_name = fix_encoding(row['poster_name'])
|
||||
self.num_replies = row['num_replies']
|
||||
|
||||
class Post():
|
||||
def __init__ (self, row):
|
||||
self.id = row['id_msg']
|
||||
self.parent = row['id_topic']
|
||||
self.datetime = datetime.fromtimestamp(row['poster_time'])
|
||||
self.subject = fix_encoding(row['subject'])
|
||||
self.body = fix_encoding(row['body'])
|
||||
self.poster_name = fix_encoding(row['poster_name'])
|
98
epilogue/redirector.py
Normal file
98
epilogue/redirector.py
Normal file
@@ -0,0 +1,98 @@
|
||||
import argparse
|
||||
import gzip
|
||||
import urllib.request
|
||||
import json
|
||||
|
||||
from .archive_generator import ArchiveLinker, DEXES, FORUM_THREAD_INDEX
|
||||
|
||||
from flask import Flask, redirect, request
|
||||
app = Flask(__name__)
|
||||
|
||||
DEFAULT_ARCHIVES_DOMAIN = "https://archives.glitchcity.info/"
|
||||
DEFAULT_FORUMS_ARCHIVE = "{}forums".format(DEFAULT_ARCHIVES_DOMAIN)
|
||||
DEFAULT_WIKI_ARCHIVE = "{}wiki".format(DEFAULT_ARCHIVES_DOMAIN)
|
||||
|
||||
## Wiki redirector
|
||||
@app.route("/wiki/")
|
||||
def redirect_wiki_main ():
|
||||
return redirect_wiki("Main Page")
|
||||
|
||||
@app.route("/wiki/<path:path>")
|
||||
def redirect_wiki (path):
|
||||
return redirect(make_wiki_url(path))
|
||||
|
||||
def make_wiki_url (path):
|
||||
if path.endswith("/"):
|
||||
path = path[:-1]
|
||||
|
||||
return app.args.wiki_archive + app.wiki_linker.translate_page_title(path)
|
||||
|
||||
## Forum redirector
|
||||
@app.route('/forums/')
|
||||
def redirect_forums_index ():
|
||||
return redirect_forums("")
|
||||
|
||||
@app.route('/forums/<path:path>')
|
||||
def redirect_forums (path):
|
||||
return redirect(make_forum_url(request))
|
||||
|
||||
def make_forum_url (request):
|
||||
thread_id = request.args.get("topic", None)
|
||||
board_id = request.args.get("board", None)
|
||||
post_id = None
|
||||
|
||||
if thread_id:
|
||||
thread_id = strip_extension(thread_id)
|
||||
|
||||
if "." in thread_id:
|
||||
(thread_id, post_id) = thread_id.split(".")
|
||||
post_id = post_id[len("msg"):]
|
||||
|
||||
if not board_id:
|
||||
board_id = app.thread_index[thread_id]['parent']
|
||||
|
||||
try:
|
||||
if "." in board_id:
|
||||
board_id = board_id.split(".")[0]
|
||||
except TypeError: pass
|
||||
|
||||
url = app.args.forums_archive
|
||||
|
||||
if board_id:
|
||||
url = url + "board-{}".format(board_id)
|
||||
|
||||
if thread_id:
|
||||
url = url + "/thread-{}".format(thread_id)
|
||||
|
||||
if not url.endswith("/"):
|
||||
url = url + "/"
|
||||
|
||||
return url
|
||||
|
||||
def strip_extension (item):
|
||||
for extension in [".html"]:
|
||||
if item.endswith(extension):
|
||||
item = item[:-len(extension)]
|
||||
return item
|
||||
|
||||
def read_thread_index (forums_archive):
|
||||
with urllib.request.urlopen("{}{}".format(forums_archive, FORUM_THREAD_INDEX)) as gzipped_in:
|
||||
data = gzipped_in.read()
|
||||
return json.loads(gzip.decompress(data).decode())
|
||||
|
||||
def main ():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--wiki-archive", help="URL to wiki archive", default=DEFAULT_WIKI_ARCHIVE)
|
||||
parser.add_argument("--forums-archive", help="URL to forums archive", default=DEFAULT_FORUMS_ARCHIVE)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.wiki_archive.endswith("/"):
|
||||
args.wiki_archive = args.wiki_archive + "/"
|
||||
|
||||
if not args.forums_archive.endswith("/"):
|
||||
args.forums_archive = args.forums_archive + "/"
|
||||
|
||||
app.args = args
|
||||
app.thread_index = read_thread_index(args.forums_archive)
|
||||
app.wiki_linker = ArchiveLinker(directory_names=DEXES)
|
||||
app.run()
|
228
epilogue/wiki.py
Normal file
228
epilogue/wiki.py
Normal file
@@ -0,0 +1,228 @@
|
||||
from xml.etree import ElementTree
|
||||
|
||||
import mwparserfromhell
|
||||
from mwparserfromhell.nodes import Wikilink, Comment, ExternalLink, Heading, Tag, Template, Text
|
||||
|
||||
NAMESPACE = "{http://www.mediawiki.org/xml/export-0.10/}"
|
||||
PAGE_TAG = "{}page".format(NAMESPACE)
|
||||
ID_TAG = "{}id".format(NAMESPACE)
|
||||
TITLE_TAG = "{}title".format(NAMESPACE)
|
||||
REVISION_TAG = "{}revision".format(NAMESPACE)
|
||||
NS_TAG = "{}ns".format(NAMESPACE)
|
||||
REDIRECT_TAG = "{}redirect".format(NAMESPACE)
|
||||
|
||||
TEXT_TAG = "{}text".format(NAMESPACE)
|
||||
FORMAT_TAG = "{}format".format(NAMESPACE)
|
||||
MODEL_TAG = "{}model".format(NAMESPACE)
|
||||
TIMESTAMP_TAG = "{}timestamp".format(NAMESPACE)
|
||||
COMMENT_TAG = "{}comment".format(NAMESPACE)
|
||||
CONTRIBUTOR_TAG = "{}contributor".format(NAMESPACE)
|
||||
|
||||
USERNAME_TAG = "{}username".format(NAMESPACE)
|
||||
|
||||
NAMESPACES = {
|
||||
"MAIN": 0,
|
||||
"TEMPLATE": 10
|
||||
}
|
||||
|
||||
INTERWIKI_NAMESPACES = {
|
||||
"bp:": "https://bulbapedia.bulbagarden.net/wiki/{}",
|
||||
"wikipedia:": "https://en.wikipedia.org/wiki/{}"
|
||||
}
|
||||
|
||||
FILE_NAMESPACES = ["File:", "Image:"]
|
||||
CATEGORY_NAMESPACE = "Category:"
|
||||
CATEGORY_LINK_NAMESPACE = ":{}".format(CATEGORY_NAMESPACE)
|
||||
|
||||
class Wiki():
|
||||
def __init__ (self, xml_path):
|
||||
self.xml_path = xml_path
|
||||
|
||||
def get_pages (self):
|
||||
tree = ElementTree.parse(self.xml_path)
|
||||
return (Page(element) for element in tree.getroot() if element.tag == PAGE_TAG)
|
||||
|
||||
class Page():
|
||||
def __init__ (self, element):
|
||||
self.redirect = None
|
||||
self.revisions = []
|
||||
for child in element:
|
||||
if child.tag == ID_TAG:
|
||||
self.id = child.text
|
||||
elif child.tag == NS_TAG:
|
||||
self.namespace = int(child.text)
|
||||
elif child.tag == TITLE_TAG:
|
||||
self.title = child.text
|
||||
elif child.tag == REVISION_TAG:
|
||||
self.revisions.append(Revision(child))
|
||||
elif child.tag == REDIRECT_TAG:
|
||||
self.redirect = child.attrib['title']
|
||||
|
||||
def get_latest (self):
|
||||
return self.revisions[0]
|
||||
|
||||
class Revision():
|
||||
def __init__ (self, element):
|
||||
for child in element:
|
||||
if child.tag == ID_TAG:
|
||||
self.id = child.text
|
||||
elif child.tag == TEXT_TAG:
|
||||
self.text = child.text
|
||||
elif child.tag == CONTRIBUTOR_TAG:
|
||||
self.contributor = Contributor(child)
|
||||
elif child.tag == TIMESTAMP_TAG:
|
||||
self.timestamp = child.text
|
||||
elif child.tag == MODEL_TAG:
|
||||
self.model = child.text
|
||||
elif child.tag == COMMENT_TAG:
|
||||
self.comment = child.text
|
||||
|
||||
class Contributor():
|
||||
def __init__ (self, element):
|
||||
for child in element:
|
||||
if child.tag == ID_TAG:
|
||||
self.id = child.text
|
||||
elif child.tag == USERNAME_TAG:
|
||||
self.username = child.text
|
||||
|
||||
class Renderer():
|
||||
def __init__ (self, templates={}, linker=None):
|
||||
self.templates = templates
|
||||
self.linker = linker if linker else Linker()
|
||||
|
||||
def render (self, wikitext, base="", *args, **kwargs):
|
||||
categories = []
|
||||
wikitext = self.transclude_templates(wikitext, *args, **kwargs)
|
||||
|
||||
# parse out categories
|
||||
for link in wikitext.ifilter_wikilinks():
|
||||
if not link.title.startswith(CATEGORY_NAMESPACE):
|
||||
continue
|
||||
|
||||
wikitext.remove(link)
|
||||
categories.append(link.title[len(CATEGORY_NAMESPACE):])
|
||||
|
||||
rendered = [render(wikitext, base, self.linker)]
|
||||
if categories:
|
||||
rendered.append('<h2>Categories</h2><ul class="categories">')
|
||||
for category in categories:
|
||||
rendered.append('<li><a href="{}Category:{}">{}</a></li>'.format(
|
||||
base,
|
||||
self.linker.translate_page_title(category),
|
||||
category
|
||||
))
|
||||
rendered.append("</ul>")
|
||||
|
||||
return ("".join(rendered), categories)
|
||||
|
||||
def transclude_templates (self, wikitext, *args, **kwargs):
|
||||
wikitext = mwparserfromhell.parse(wikitext)
|
||||
for inclusion in wikitext.ifilter_templates():
|
||||
template_key = str(inclusion.name)
|
||||
template = self.templates.get(template_key, self.templates.get(template_key[0].upper() + template_key[1:], None))
|
||||
result = None
|
||||
if template:
|
||||
result = template(inclusion, *args, **kwargs)
|
||||
else:
|
||||
result = "<span class='unknown-template'>Template:{0}</span>".format(inclusion.name)
|
||||
|
||||
try:
|
||||
wikitext.replace(inclusion, result) #self.transclude_templates(result))
|
||||
except ValueError: pass
|
||||
return wikitext
|
||||
|
||||
def render (wikitext, base="", linker=None):
|
||||
rendered = []
|
||||
|
||||
if not linker:
|
||||
linker = Linker()
|
||||
|
||||
for node in wikitext.ifilter(False):
|
||||
# node types:
|
||||
# https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
|
||||
node_type = type(node)
|
||||
if node_type is Wikilink:
|
||||
image_name = linker.translate_image_title(node.title)
|
||||
if image_name:
|
||||
rendered.append('<img src="{}{}" />'.format(
|
||||
base,
|
||||
image_name,
|
||||
render(mwparserfromhell.parse(node.text), base, linker)
|
||||
))
|
||||
else:
|
||||
url = linker.translate_interwiki_title(node.title)
|
||||
if not url:
|
||||
url = "{}{}".format(base, linker.translate_page_title(node.title))
|
||||
|
||||
rendered.append('<a href="{}">{}</a>'.format(
|
||||
url,
|
||||
render(node.text if node.text else node.title, base, linker)
|
||||
))
|
||||
elif node_type is ExternalLink:
|
||||
rendered.append('<a href="{}">{}</a>'.format(
|
||||
node.url,
|
||||
render(node.title if node.title else node.url)
|
||||
))
|
||||
elif node_type is Tag:
|
||||
rendered.append("<{}>{}</{}>".format(
|
||||
render(node.tag),
|
||||
render(node.contents, base, linker),
|
||||
render(node.tag)
|
||||
))
|
||||
elif node_type is Heading:
|
||||
rendered.append('<h{} id="{}">{}</h{}>'.format(
|
||||
node.level,
|
||||
reformat_page_title(node.title),
|
||||
render(node.title, base, linker),
|
||||
node.level
|
||||
))
|
||||
elif node_type is Text:
|
||||
rendered.append(node.value)
|
||||
|
||||
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
||||
|
||||
class Linker():
|
||||
def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES):
|
||||
self.file_namespaces = file_namespaces
|
||||
self.interwiki_namespaces = interwiki_namespaces
|
||||
|
||||
def translate_interwiki_title (self, page_title):
|
||||
for namespace, url in self.interwiki_namespaces.items():
|
||||
if page_title.startswith(namespace):
|
||||
return url.format(page_title[len(namespace):])
|
||||
|
||||
def translate_page_title (self, page_title):
|
||||
if page_title.startswith(CATEGORY_LINK_NAMESPACE):
|
||||
page_title = page_title[1:]
|
||||
|
||||
return reformat_page_title(page_title)
|
||||
|
||||
def translate_image_title (self, page_title):
|
||||
for namespace in self.file_namespaces:
|
||||
if page_title.startswith(namespace):
|
||||
return reformat_page_title(page_title[len(namespace):])
|
||||
|
||||
def reformat_page_title (page_title):
|
||||
if not page_title:
|
||||
return ""
|
||||
|
||||
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(' ', '_'))
|
||||
|
||||
class Template():
|
||||
def __init__ (self, wikicode):
|
||||
self.wikicode = mwparserfromhell.parse(wikicode)
|
||||
for tag in self.wikicode.ifilter_tags():
|
||||
if tag.tag == "noinclude":
|
||||
self.wikicode.remove(tag)
|
||||
|
||||
def __call__ (self, inclusion, *args, **kwargs):
|
||||
parsed_wikicode = mwparserfromhell.parse(self.wikicode)
|
||||
for argument in parsed_wikicode.ifilter_arguments():
|
||||
value = argument.default if argument.default else argument.name
|
||||
if inclusion.has(argument.name):
|
||||
value = inclusion.get(argument.name)
|
||||
|
||||
try:
|
||||
parsed_wikicode.replace(argument, value)
|
||||
except ValueError: pass
|
||||
return parsed_wikicode
|
14
forum/create_sqlite_database
Executable file
14
forum/create_sqlite_database
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
SQLITE_OUT=forum.sqlite
|
||||
AWK=awk
|
||||
|
||||
if command -v mawk &> /dev/null; then
|
||||
AWK=mawk
|
||||
fi
|
||||
|
||||
$AWK -f ../mysql2sqlite/mysql2sqlite structure.sql | sqlite3 -echo $SQLITE_OUT
|
||||
$AWK -f ../mysql2sqlite/mysql2sqlite misc_data.sql | sqlite3 -echo $SQLITE_OUT
|
||||
$AWK -f ../mysql2sqlite/mysql2sqlite categories.sql | sqlite3 -echo $SQLITE_OUT
|
||||
$AWK -f ../mysql2sqlite/mysql2sqlite boards.sql | sqlite3 -echo $SQLITE_OUT
|
||||
$AWK -f ../mysql2sqlite/mysql2sqlite threads.sql | sqlite3 -echo $SQLITE_OUT
|
||||
#cat threads.sql | sqlite3 $SQLITE_OUT
|
203
forum/dump_forum_data
Executable file
203
forum/dump_forum_data
Executable file
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
|
||||
from xml.etree import ElementTree
|
||||
from subprocess import check_output, check_call
|
||||
|
||||
# Tables we don't want data from. These are usually populated with default data from an SMF install.
|
||||
DUMP_STRUCTURE_ONLY = [
|
||||
"smf_admin_info_files", "smf_ads", "smf_ads_settings", "smf_approval_queue", "smf_awards", "smf_ban_groups", "smf_ban_items",
|
||||
"smf_bbcodes", "smf_board_permissions", "smf_buddies", "smf_calendar", "smf_calendar_holidays", "smf_cf_fields", "smf_cf_forms",
|
||||
"smf_collapsed_categories", "smf_custom_actions", "smf_custom_fields", "smf_down_cat", "smf_down_catperm", "smf_down_comment",
|
||||
"smf_down_creport", "smf_down_custom_field", "smf_down_custom_field_data", "smf_down_file", "smf_down_groupquota", "smf_down_rating",
|
||||
"smf_down_report", "smf_down_userquota", "smf_global_announcements", "smf_global_announcements_boards", "smf_groupmods",
|
||||
"smf_group_moderators", "smf_login", "smf_log_actions", "smf_log_activity", "smf_log_banned", "smf_log_boards", "smf_log_comments",
|
||||
"smf_log_digest", "smf_log_errors", "smf_log_floodcontrol", "smf_log_group_requests", "smf_log_issues", "smf_log_karma",
|
||||
"smf_log_mark_read", "smf_log_member_notices", "smf_log_notify", "smf_log_notify_projects", "smf_log_online", "smf_log_packages",
|
||||
"smf_log_polls", "smf_log_projects", "smf_log_project_mark_read", "smf_log_reported", "smf_log_reported_comments",
|
||||
"smf_log_scheduled_tasks", "smf_log_search_messages", "smf_log_search_results", "smf_log_search_subjects", "smf_log_search_topics",
|
||||
"smf_log_spider_hits", "smf_log_spider_stats", "smf_log_subscribed", "smf_log_topics", "smf_mail_queue", "smf_membergroups", "smf_members",
|
||||
"smf_moderators", "smf_openid_assoc", "smf_package_servers", "smf_permissions", "smf_permission_profiles", "smf_personal_messages",
|
||||
"smf_picture_comments","smf_pm_attachments", "smf_pm_recipients", "smf_pm_rules", "smf_profile_albums", "smf_profile_comments",
|
||||
"smf_profile_pictures", "", "smf_scheduled_tasks", "smf_sessions", "smf_settings", "smf_smileys", "smf_spiders", "smf_subscriptions", "smf_themes"
|
||||
]
|
||||
|
||||
# Tables we want all the data from. Some legacy data that may be of interest is also here.
|
||||
DUMP_ALL_DATA = [
|
||||
# actual forum content
|
||||
"smf_polls", "smf_poll_choices", "smf_message_icons", "smf_attachments",
|
||||
|
||||
# pre-wiki (orange glove)
|
||||
"comments", "dirs", "docs", "glitchdex", "glitchdex2", "groups", "old", "staff", "statsbydex",
|
||||
|
||||
# felblog (old blogging system)
|
||||
"smf_felblog_categories", "smf_felblog_cmnt_log", "smf_felblog_comments", "smf_felblog_content", "smf_felblog_cont_log", "smf_felblog_manager",
|
||||
"smf_felblog_ratings", "smf_felblog_settings",
|
||||
|
||||
# arcade system
|
||||
"smf_games", "smf_games_category", "smf_games_challenge", "smf_games_favorite", "smf_games_high", "smf_games_rate",
|
||||
"smf_games_settings", "smf_games_tournament", "smf_games_tournament_players", "smf_games_tournament_results", "smf_games_tournament_scores",
|
||||
|
||||
# shop system
|
||||
"smf_shop_inventory", "smf_shop_items", "smf_shop_shops",
|
||||
|
||||
# project management system (we had this?)
|
||||
"smf_projects", "smf_projects_settings", "smf_project_developer", "smf_project_permissions", "smf_project_profiles", "smf_project_timeline",
|
||||
"smf_project_trackers", "smf_project_versions", "smf_issues", "smf_issue_attachments", "smf_issue_category", "smf_issue_comments",
|
||||
"smf_issue_tags",
|
||||
|
||||
# used for the IRC bridge
|
||||
"smf_slartibartfast"
|
||||
]
|
||||
|
||||
# Special tables we need to filter.
|
||||
CATEGORIES = "smf_categories"
|
||||
BOARDS = "smf_boards"
|
||||
TOPICS = "smf_topics"
|
||||
MESSAGES = "smf_messages"
|
||||
|
||||
# Dump filenames
|
||||
STRUCTURE_DUMP = "structure.sql"
|
||||
MISC_DUMP = "misc_data.sql"
|
||||
CATEGORIES_DUMP = "categories.sql"
|
||||
BOARDS_DUMP = "boards.sql"
|
||||
TOPICS_DUMP = "threads.sql"
|
||||
|
||||
# Categories we are not interested in archiving.
|
||||
# `id_cat` in (1, 2)
|
||||
DO_NOT_ARCHIVE_CATEGORIES = [
|
||||
7, # Links
|
||||
12, # Epsilon: ?????
|
||||
6, # Sigma: Higher Access
|
||||
8 # Omega: Garbage
|
||||
]
|
||||
|
||||
# Boards we are not interested in archiving.
|
||||
# `id_board` in (1, 2)
|
||||
DO_NOT_ARCHIVE_BOARDS = [
|
||||
24, 94, 118, 121, # Links
|
||||
40, # Exclusive Board
|
||||
65, # Requests for Moderatorship
|
||||
66, # Requests for Membership+
|
||||
67, # Requests for Distinguished Membership
|
||||
23, # M.A.S.K. HQ (Staff Board)
|
||||
22, # Admins Only Board
|
||||
89, # Test Board
|
||||
86, # Omega Archives
|
||||
51, 37, 79, 26, 47, 44, 45, 99, 93, 119, 96,
|
||||
62, 60, 80, 84, # Submit-A-Glitch Archives
|
||||
3, 4, 5, 57, 58, 59, 38, 54, 63, 64,
|
||||
68, 69, 70, 81, 82, 83,
|
||||
28, # The Dumpster Out Back
|
||||
123 # ?????
|
||||
]
|
||||
|
||||
# Regexes for sensitive information
|
||||
EMAIL_REGEX = re.compile(r"'[^'\s]+@[^'\s]+'")
|
||||
IP_REGEX = re.compile(r"'\d+\.\d+\.\d+\.\d+'")
|
||||
|
||||
class Database():
|
||||
def __init__(self, host, database, username, password):
|
||||
self.host = host
|
||||
self.database = database
|
||||
self.username = username
|
||||
self.password = password
|
||||
|
||||
def dump(self, tables, filename, *args):
|
||||
command = ["mysqldump"] + list(args) + self.auth()
|
||||
|
||||
if filename:
|
||||
command.append("--result-file={}".format(filename))
|
||||
|
||||
command.append(self.database)
|
||||
command = command + tables
|
||||
|
||||
print(">> {}".format(format_command(command)))
|
||||
if filename:
|
||||
check_call(command)
|
||||
return filename
|
||||
else:
|
||||
return check_output(command).strip().decode()
|
||||
|
||||
def query(self, query):
|
||||
command = ["mysql"] + self.auth() + [
|
||||
"--xml",
|
||||
self.database,
|
||||
"-e", query
|
||||
]
|
||||
|
||||
print(">> {}".format(format_command(command)))
|
||||
result = check_output(command).strip()
|
||||
return [row_from_xml(element) for element in ElementTree.fromstring(result)]
|
||||
|
||||
def auth(self):
|
||||
return [
|
||||
"--user={}".format(self.username),
|
||||
"--password={}".format(self.password),
|
||||
"--host={}".format(self.host)
|
||||
]
|
||||
|
||||
def format_command(command):
|
||||
return "{}".format([item for item in command if not item.startswith("--password")])
|
||||
|
||||
def row_from_xml(element):
|
||||
row = {}
|
||||
for child in element:
|
||||
row[child.attrib['name']] = child.text
|
||||
return row
|
||||
|
||||
parser = argparse.ArgumentParser(description="Forum scraper")
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
dest="host",
|
||||
default="127.0.0.1",
|
||||
help="Database host"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
dest="db",
|
||||
default="glitchcity",
|
||||
help="Database name"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--username",
|
||||
dest="username",
|
||||
default="glitchcity",
|
||||
help="Database username"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--password",
|
||||
dest="password",
|
||||
required=True,
|
||||
help="Database password"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
database = Database(args.host, args.db, args.username, args.password)
|
||||
|
||||
# Select which topics we DON'T want, based on the board ids we don't want
|
||||
do_not_archive_thread_ids = [row['id_topic'] for row in database.query("SELECT id_topic FROM smf_topics WHERE id_board IN ({})".format(",".join([str(id) for id in DO_NOT_ARCHIVE_BOARDS])))]
|
||||
|
||||
if not os.path.exists(STRUCTURE_DUMP):
|
||||
database.dump(DUMP_STRUCTURE_ONLY + DUMP_ALL_DATA + [CATEGORIES, BOARDS, TOPICS, MESSAGES], STRUCTURE_DUMP, "--no-data")
|
||||
|
||||
if not os.path.exists(MISC_DUMP):
|
||||
database.dump(DUMP_ALL_DATA, MISC_DUMP, "--no-create-info")
|
||||
|
||||
category_filter = ",".join([str(id) for id in DO_NOT_ARCHIVE_CATEGORIES])
|
||||
board_filter = ",".join([str(id) for id in DO_NOT_ARCHIVE_BOARDS])
|
||||
thread_filter = ",".join([str(id) for id in do_not_archive_thread_ids])
|
||||
|
||||
if not os.path.exists(CATEGORIES_DUMP):
|
||||
database.dump([CATEGORIES], CATEGORIES_DUMP, "--where=NOT id_cat in ({})".format(category_filter), "--no-create-info")
|
||||
|
||||
if not os.path.exists(BOARDS_DUMP):
|
||||
database.dump([BOARDS], BOARDS_DUMP, "--where=NOT id_board in ({})".format(board_filter), "--no-create-info")
|
||||
|
||||
with open(TOPICS_DUMP, "w", encoding="utf-8") as topics_dump:
|
||||
dump_content = database.dump([TOPICS, MESSAGES], None, "--where=NOT id_topic in ({})".format(thread_filter), "--no-create-info")
|
||||
dump_content = EMAIL_REGEX.sub("'*****@*****'", dump_content)
|
||||
dump_content = IP_REGEX.sub("'***.***.***.***'", dump_content)
|
||||
topics_dump.write(dump_content)
|
1
mysql2sqlite
Submodule
1
mysql2sqlite
Submodule
Submodule mysql2sqlite added at 2a759df790
18
setup.py
Normal file
18
setup.py
Normal file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from distutils.core import setup
|
||||
|
||||
setup(
|
||||
name='Glitch City Labs - Epilogue',
|
||||
version='0.0.1',
|
||||
description='Tools for exporting and creating archives of Glitch City Labs data',
|
||||
author='Adrian Kuschelyagi Malacoda',
|
||||
packages=['epilogue'],
|
||||
install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1', 'bbcode >= 1.1.0', 'mwparserfromhell >= 0.5.4', 'flask >= 1.1.2'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'epilogue = epilogue:main',
|
||||
'gclredirector = epilogue.redirector:main'
|
||||
]
|
||||
}
|
||||
)
|
21
static/style.css
Normal file
21
static/style.css
Normal file
@@ -0,0 +1,21 @@
|
||||
body { font-family: monospace; }
|
||||
|
||||
ul.boards { margin-left: 0; padding-left: 0; }
|
||||
.board { margin-bottom: 5px; }
|
||||
.category, .board { list-style-type: none;}
|
||||
.category .name, .board .name { font-weight: bold; }
|
||||
.board .board { margin-left: 15px; }
|
||||
|
||||
#threads { width: 100%; }
|
||||
|
||||
.label { font-weight: bold }
|
||||
article { border-top: 1px solid black; }
|
||||
section { margin-top: 15px; margin-bottom: 15px; }
|
||||
|
||||
.next { float: right; }
|
||||
.pagination { margin-bottom: 10px; }
|
||||
.pagination ul { list-style-type: none; margin-left: 0; padding-left: 0; display: inline; }
|
||||
.pagination li { display: inline; }
|
||||
|
||||
.page { padding-top: 15px; }
|
||||
.page table { width: 100%; }
|
7
templates/boards.mustache
Normal file
7
templates/boards.mustache
Normal file
@@ -0,0 +1,7 @@
|
||||
{{>header}}
|
||||
{{>forums_notice}}
|
||||
{{#categories}}
|
||||
<h2 class="category-name">{{name}}</h2>
|
||||
{{>child_boards}}
|
||||
{{/categories}}
|
||||
{{>footer}}
|
9
templates/category.mustache
Normal file
9
templates/category.mustache
Normal file
@@ -0,0 +1,9 @@
|
||||
{{>header}}
|
||||
{{>wiki_notice}}
|
||||
<h2>{{pagename}}</h2>
|
||||
<ul>
|
||||
{{#pages}}
|
||||
<li><a href="{{url}}">{{title}}</a></li>
|
||||
{{/pages}}
|
||||
</ul>
|
||||
{{>footer}}
|
11
templates/index.mustache
Normal file
11
templates/index.mustache
Normal file
@@ -0,0 +1,11 @@
|
||||
{{>header}}
|
||||
Welcome to the <b>Glitch City Laboratories Archives</b>.
|
||||
<p>Glitch City Laboratories was a Pokémon glitch website that existed from March 2006 to September 2020 (<a href="forums/board-2/thread-9114/page-0.html">announcement of closure</a>). This is an <b>archive</b> of content from the website prior to its closure.</p>
|
||||
<p>Further development and discussion is happening at <b><a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a></b>, the successor community.</p>
|
||||
<p>The <b><a href="https://glitchcity.wiki/">Glitch City Wiki</a></b> is the continuation of the Glitch City Laboratories wiki.</p>
|
||||
<h2>Archives</h2>
|
||||
<ul>
|
||||
<li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>) (<a href="forums.sql.gz">.sql.gz</a>) (<a href="forums.sqlite.gz">.sqlite.gz</a>)</li>
|
||||
<li><a href="wiki">Wiki</a> (<a href="wiki.tar.gz">.tar.gz</a>) (<a href="wiki.xml.gz">.xml.gz</a>)</li>
|
||||
</ul>
|
||||
{{>footer}}
|
7
templates/page.mustache
Normal file
7
templates/page.mustache
Normal file
@@ -0,0 +1,7 @@
|
||||
{{>header}}
|
||||
{{>wiki_notice}}
|
||||
<h2>{{page.title}}</h2>
|
||||
<article class="page">
|
||||
{{{text}}}
|
||||
</article>
|
||||
{{>footer}}
|
9
templates/partials/child_boards.mustache
Normal file
9
templates/partials/child_boards.mustache
Normal file
@@ -0,0 +1,9 @@
|
||||
<ul class="boards">
|
||||
{{#children}}
|
||||
<li class="board">
|
||||
<div class="name"><a href="board-{{id}}">{{name}}</a></div>
|
||||
<div class="description">{{{description}}}</div>
|
||||
{{>child_boards}}
|
||||
</li>
|
||||
{{/children}}
|
||||
</ul>
|
2
templates/partials/footer.mustache
Normal file
2
templates/partials/footer.mustache
Normal file
@@ -0,0 +1,2 @@
|
||||
</body>
|
||||
</html>
|
5
templates/partials/forums_notice.mustache
Normal file
5
templates/partials/forums_notice.mustache
Normal file
@@ -0,0 +1,5 @@
|
||||
<div class="notice">
|
||||
<p>Glitch City Laboratories closed on 1 September 2020 (<a href="{{base}}board-2/thread-9114/page-0.html">announcement</a>). This is an <b>archived</b> copy of a thread from Glitch City Laboratories Forums.</p>
|
||||
<p>You can join <a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a> to ask questions or discuss current developments.</p>
|
||||
<p>You may also download the archive of this forum in <a href="{{base}}../forums.tar.gz">.tar.gz</a>, <a href="{{base}}../forums.sql.gz">.sql.gz</a>, or <a href="{{base}}../forums.sqlite.gz">.sqlite.gz</a> formats.</p>
|
||||
</div>
|
8
templates/partials/header.mustache
Normal file
8
templates/partials/header.mustache
Normal file
@@ -0,0 +1,8 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Glitch City Laboratories Archives{{title}}</title>
|
||||
<link href="{{base}}style.css" rel="stylesheet" type="text/css" />
|
||||
<meta charset="UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1><a href="{{base}}">Glitch City Laboratories Archives</a></h1>
|
9
templates/partials/pagination.mustache
Normal file
9
templates/partials/pagination.mustache
Normal file
@@ -0,0 +1,9 @@
|
||||
<div class="pagination">
|
||||
<a class="prev" href="page-{{prev}}.html">Previous Page</a>
|
||||
<ul>
|
||||
{{#page_links}}
|
||||
<li><a href="{{link}}">{{label}}</a></li>
|
||||
{{/page_links}}
|
||||
</ul>
|
||||
<a class="next" href="page-{{next}}.html">Next Page</a>
|
||||
</div>
|
6
templates/partials/wiki_notice.mustache
Normal file
6
templates/partials/wiki_notice.mustache
Normal file
@@ -0,0 +1,6 @@
|
||||
<div class="notice">
|
||||
<p>Glitch City Laboratories closed on 1 September 2020 (<a href="{{base}}../forums/board-2/thread-9114/page-0.html">announcement</a>). This is an <b>archived</b> copy of an article from Glitch City Laboratories wiki.</p>
|
||||
<p><b>A live version of this article is available at the <a href="https://glitchcity.wiki/">Glitch City Wiki</a> <a href="https://glitchcity.wiki/{{pagename}}">here</a>.</b></p>
|
||||
<p>You can join <a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a> to ask questions or discuss current developments.</p>
|
||||
<p>You may also download the archive of the wiki in <a href="{{base}}../wiki.tar.gz">.tar.gz</a> or <a href="{{base}}../wiki.xml.gz">.xml.gz</a> formats.</p>
|
||||
</div>
|
17
templates/posts.mustache
Normal file
17
templates/posts.mustache
Normal file
@@ -0,0 +1,17 @@
|
||||
{{>header}}
|
||||
{{>forums_notice}}
|
||||
<h2><a href="../">{{board.name}}</a></h2>
|
||||
<h3>{{thread.subject}} - Page {{next}}</h3>
|
||||
{{>pagination}}
|
||||
{{#posts}}
|
||||
<article id="msg{{id}}">
|
||||
<header>
|
||||
<h4>{{subject}}</h4>
|
||||
<div><span class="label">Posted by:</span> {{poster_name}}</div>
|
||||
<div><span class="label">Date:</span> {{datetime}}</div>
|
||||
</header>
|
||||
<section>{{{body}}}</section>
|
||||
</article>
|
||||
{{/posts}}
|
||||
{{>pagination}}
|
||||
{{>footer}}
|
4
templates/redirect.mustache
Normal file
4
templates/redirect.mustache
Normal file
@@ -0,0 +1,4 @@
|
||||
<html>
|
||||
<head><meta http-equiv="refresh" content="0; url={{target}}" /></head>
|
||||
<body><p><a href="{{target}}">Redirect</a></p></body>
|
||||
</html>
|
20
templates/threads.mustache
Normal file
20
templates/threads.mustache
Normal file
@@ -0,0 +1,20 @@
|
||||
{{>header}}
|
||||
{{>forums_notice}}
|
||||
<h2>{{board.name}}</h2>
|
||||
<table id="threads">
|
||||
<tr>
|
||||
<th>Title</th>
|
||||
<th>Poster</th>
|
||||
<th>Date</th>
|
||||
<th>Replies</th>
|
||||
</tr>
|
||||
{{#threads}}
|
||||
<tr>
|
||||
<td class="thread-subject"><a href="thread-{{id}}">{{subject}}</a></td>
|
||||
<td class="thread-poster">{{poster_name}}</td>
|
||||
<td class="thread-date">{{datetime}}</td>
|
||||
<td class="replies">{{num_replies}}</td>
|
||||
</tr>
|
||||
{{/threads}}
|
||||
</table>
|
||||
{{>footer}}
|
54
wiki/find_pages
Executable file
54
wiki/find_pages
Executable file
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python3
|
||||
from urllib.request import urlopen
|
||||
from datetime import date
|
||||
import json
|
||||
|
||||
GCL_URL = "https://glitchcity.info/"
|
||||
API_ENDPOINT = "/w/api.php?action=query&generator=categorymembers&gcmtitle=Category:{}&prop=info&gcmlimit=100&format=json"
|
||||
|
||||
CATEGORIES = [
|
||||
"Generation I glitches", "Generation II glitches", "Generation III glitches", "Generation IV glitches",
|
||||
"Generation I glitch Pokémon", "Generation II glitch Pokémon", "Generation III glitch Pokémon", "Generation IV glitch Pokémon",
|
||||
"Pokémon Red and Blue glitch moves", "Pokémon Yellow glitch moves", "Generation II glitch moves",
|
||||
"Generation I glitch items", "Generation II glitch items", "Generation IV glitch items",
|
||||
"Generation I glitch Trainers", "Generation II glitch Trainers"
|
||||
]
|
||||
|
||||
FILTER_MODIFIED_SINCE_THEN = date(2020, 3, 31)
|
||||
|
||||
def get_pages_for_category(url, category):
|
||||
category = category.replace(" ", "_").replace("é", "%C3%A9")
|
||||
pages = {}
|
||||
continue_param = None
|
||||
while True:
|
||||
api_url = url + API_ENDPOINT.format(category)
|
||||
|
||||
if continue_param:
|
||||
api_url = api_url + "&gcmcontinue=" + continue_param
|
||||
|
||||
with urlopen(api_url) as result:
|
||||
result_object = json.loads(result.read())
|
||||
pages.update(result_object['query']['pages'])
|
||||
|
||||
if not "continue" in result_object:
|
||||
break
|
||||
|
||||
continue_param = result_object['continue']['gcmcontinue']
|
||||
return pages
|
||||
|
||||
def filter_page(page):
|
||||
touched = date(*[int(value) for value in page['touched'].split("T")[0].split("-")])
|
||||
return touched >= FILTER_MODIFIED_SINCE_THEN
|
||||
|
||||
all_pages = {}
|
||||
for category in CATEGORIES:
|
||||
category_pages = get_pages_for_category(GCL_URL, category)
|
||||
print("--> Found {} total pages in category {}".format(len(category_pages), category))
|
||||
all_pages.update(category_pages)
|
||||
|
||||
print("----> {} total pages to consider".format(len(all_pages)))
|
||||
filtered_pages = [page for page in all_pages.values() if filter_page(page)]
|
||||
print("----> {} filtered pages".format(len(filtered_pages)))
|
||||
|
||||
for page in filtered_pages:
|
||||
print(page['title'])
|
5593
wiki/wiki_pages
Normal file
5593
wiki/wiki_pages
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user