Add rudimentary script for generating the forum archive.

This commit is contained in:
Adrian Kuschelyagi Malacoda 2020-08-10 01:21:33 -05:00
parent 7026bc2b34
commit f3b0319d5a
15 changed files with 295 additions and 1 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/**/__pycache__

View File

@ -1,2 +1,27 @@
# Glitch City Laboratories (2003 - 2020) - Epilogue
This repository contains the tickets, scripts, and documentation for the end of Glitch City Laboratories.
## Installation
`pip install -e .`
## Standalone Epilogue Scripts
Run these before running commands from the epilogue package.
#### Wiki Data
##### `find_data`
Run this locally (it uses the MediaWiki HTTP API). Finds all pages in categories related to Pokemon generations 1 - 4 that have been edited since 31 March 2020.
#### Forum Data (`forum` directory)
#### `dump_forum_data`
Run this on a server with access to a Glitch City Laboratories database. It will dump the table schemas and data into several .sql files. Place these in the `forum` directory.
#### `create_sqlite_database`
Run this locally, after the `dump_forum_data` script has been run on the Glitch City Laboratories server and the dumps copied to the `forum` directory (same directory as this script). A file called `forum.sqlite` will be generated from the MariaDB dumps. This sqlite database is required for generating the static archive later.
Make sure the `mysql2sqlite` submodule is initialized and install `mawk` for the best results.
## Epilogue Package
## Misc Scripts
### `get_pms.php`
This is installed on Glitch City Laboratories and allows any logged-in user to export their PM inbox into a HTML export.

17
epilogue/__init__.py Normal file
View File

@ -0,0 +1,17 @@
import os
from .forum import Forum
from .archive_generator import ArchiveGenerator
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
TEMPLATES_DIR = os.path.join(BASEDIR, "templates")
STATIC_DIR = os.path.join(BASEDIR, "static")
ARCHIVES_BASEDIR = "archives"
FORUM_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "forums")
WIKI_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "wiki")
def main():
forum = Forum(FORUM_DATABASE)
generator = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR)
generator.generate_forum(forum, FORUM_ARCHIVES)

View File

@ -0,0 +1,102 @@
import os
import chevron
import logging
import shutil
from datetime import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ArchiveGenerator")
def format_datetime (timestamp):
return datetime.fromtimestamp(timestamp)
def prepare_thread (thread):
thread = dict(thread)
thread['datetime'] = format_datetime(thread['poster_time'])
return thread
def prepare_post (post):
return prepare_thread(post)
class ArchiveGenerator():
def __init__ (self, template_dir, static_dir):
self.template_dir = template_dir
self.static_dir = static_dir
def generate_forum (self, forum, out_dir):
logger.info("Archiving forum to %s", out_dir)
try:
os.makedirs(out_dir)
except FileExistsError: pass
shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
renderer = TemplateRenderer(self.template_dir, out_dir)
renderer.render_template_to_file("boards", "index.html", {
"categories": forum.get_board_tree()
})
for board in forum.get_boards():
self.generate_forum_board(forum, board, out_dir)
def generate_forum_board (self, forum, board, out_dir):
board_out_dir = os.path.join(out_dir, "board-{}".format(board['id_board']))
logger.info("Archiving board %s to %s", board['name'], board_out_dir)
try:
os.makedirs(board_out_dir)
except FileExistsError: pass
renderer = TemplateRenderer(self.template_dir, board_out_dir)
threads = [prepare_thread(thread) for thread in forum.get_threads_in_board(board)]
renderer.render_template_to_file("threads", "index.html", {
"title": " - {}".format(board['name']),
"base": "../",
"board": board,
"threads": threads
})
for thread in threads:
self.generate_forum_thread(forum, board, thread, board_out_dir)
def generate_forum_thread (self, forum, board, thread, out_dir):
thread_out_dir = os.path.join(out_dir, "thread-{}".format(thread['id_topic']))
logger.info("Archiving thread %s to %s", thread['subject'], thread_out_dir)
try:
os.makedirs(thread_out_dir)
except FileExistsError: pass
renderer = TemplateRenderer(self.template_dir, thread_out_dir)
renderer.render_template_to_file("page-0-redirect", "index.html")
page = 0
while True:
posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
if len(posts) < 1:
break
logger.info("Archiving page %s of thread %s", page, thread['subject'])
renderer.render_template_to_file("posts", "page-{}.html".format(page), {
"title": " - {} - Page {}".format(thread['subject'], page + 1),
"base": "../../",
"board": board,
"thread": thread,
"page": page,
"next": page + 1,
"prev": page - 1,
"posts": posts
})
page = page + 1
class TemplateRenderer():
def __init__ (self, template_dir, out_dir):
self.template_dir = template_dir
self.partials_dir = os.path.join(template_dir, "partials")
self.out_dir = out_dir
self.extension = "mustache"
def open_template (self, name):
return open(os.path.join(self.template_dir, "{}.{}".format(name, self.extension)))
def render_template_to_file (self, template_name, out_file, data={}):
with self.open_template(template_name) as template:
with open(os.path.join(self.out_dir, out_file), "w") as out:
out.write(chevron.render(template, data, self.partials_dir, self.extension))

57
epilogue/forum.py Normal file
View File

@ -0,0 +1,57 @@
import sqlite3
PREFIX = "smf_"
GET_BOARDS = "SELECT * FROM `{}boards`".format(PREFIX)
GET_CATEGORIES = "SELECT * FROM `{}categories`".format(PREFIX)
GET_THREADS = """
SELECT * FROM `{}topics` AS `topics`, `{}messages` AS `messages`
WHERE `topics`.`id_board`=? AND `messages`.`id_msg`=`topics`.`id_first_msg`
ORDER BY `id_last_msg` DESC
LIMIT ? OFFSET ?
""".format(PREFIX, PREFIX)
GET_POSTS = """
SELECT * FROM `{}messages`
WHERE `id_topic`=?
ORDER BY `poster_time` ASC
LIMIT ? OFFSET ?
""".format(PREFIX)
class Forum():
def __init__ (self, db_path):
self.connection = sqlite3.connect(db_path)
self.connection.row_factory = sqlite3.Row
def get_board_tree (self):
categories = [dict(category) for category in self.get_categories()]
boards = [dict(board) for board in self.get_boards()]
for category in categories:
category['children'] = [board for board in boards if board['id_cat'] == category['id_cat']]
for board in boards:
board['children'] = [board for board in boards if board['id_parent'] == board['id_board']]
return categories
def get_categories (self):
cursor = self.connection.cursor()
cursor.execute(GET_CATEGORIES)
return cursor.fetchall()
def get_boards (self):
cursor = self.connection.cursor()
cursor.execute(GET_BOARDS)
return cursor.fetchall()
def get_threads_in_board (self, board, page=0, per_page=2000):
try:
board = board['id_board']
except ValueError: pass
cursor = self.connection.cursor()
cursor.execute(GET_THREADS, (board, per_page, page * per_page))
return cursor.fetchall()
def get_posts_in_thread (self, thread, page=0, per_page=15):
try:
thread = thread['id_topic']
except ValueError: pass
cursor = self.connection.cursor()
cursor.execute(GET_POSTS, (thread, per_page, page * per_page))
return cursor.fetchall()

17
setup.py Normal file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env python
from distutils.core import setup
setup(
name='Glitch City Labs - Epilogue',
version='0.0.1',
description='Tools for exporting and creating archives of Glitch City Labs data',
author='Adrian Kuschelyagi Malacoda',
packages=['epilogue'],
install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1'],
entry_points={
'console_scripts': [
'epilogue = epilogue:main'
]
}
)

12
static/style.css Normal file
View File

@ -0,0 +1,12 @@
body { font-family: monospace; }
.category, .board { list-style-type: none;}
.category .name, .board .name { font-weight: bold; }
.board .board { margin-left: 10px; }
#threads { width: 100%; }
.label { font-weight: bold }
article { border-top: 1px solid black; }
section { margin-top: 15px; margin-bottom: 15px; }
.next { float: right; }

View File

@ -0,0 +1,6 @@
{{>header}}
{{#categories}}
<h2 class="category-name">{{name}}</h2>
{{>child_boards}}
{{/categories}}
{{>footer}}

View File

@ -0,0 +1,4 @@
<html>
<head><meta http-equiv="refresh" content="0; url=page-0.html" /></head>
<body><p><a href="page-0.html">Redirect</a></p></body>
</html>

View File

@ -0,0 +1,7 @@
{{#children}}
<li class="board">
<div class="name"><a href="board-{{id_board}}">{{name}}</a></div>
<div class="description">{{description}}</div>
{{>child_boards}}
</li>
{{/children}}

View File

@ -0,0 +1,2 @@
</body>
</html>

View File

@ -0,0 +1,7 @@
<html>
<head>
<title>Glitch City Laboratories Archives{{title}}</title>
<link href="{{base}}style.css" rel="stylesheet" type="text/css" />
</head>
<body>
<h1><a href="{{base}}">Glitch City Laboratories Archives</a></h1>

View File

@ -0,0 +1,4 @@
<div class="pagination">
<a class="prev" href="page-{{prev}}.html">Previous Page</a>
<a class="next" href="page-{{next}}.html">Next Page</a>
</div>

16
templates/posts.mustache Normal file
View File

@ -0,0 +1,16 @@
{{>header}}
<h2><a href="../">{{board.name}}</a></h2>
<h3>{{thread.subject}} - Page {{next}}</h3>
{{>pagination}}
{{#posts}}
<article id="post-{{id_msg}}">
<header>
<h4>{{subject}}</h4>
<div><span class="label">Posted by:</span> {{poster_name}}</div>
<div><span class="label">Date:</span> {{datetime}}</div>
</header>
<section>{{{body}}}</section>
</article>
{{/posts}}
{{>pagination}}
{{>footer}}

View File

@ -0,0 +1,17 @@
{{>header}}
<h2>{{board.name}}</h2>
<table id="threads">
<tr>
<th>Title</th>
<th>Poster</th>
<th>Date</th>
</tr>
{{#threads}}
<tr>
<td class="thread-subject"><a href="thread-{{id_topic}}">{{subject}}</a></td>
<td class="thread-poster">{{poster_name}}</td>
<td class="thread-date">{{datetime}}</td>
</tr>
{{/threads}}
</table>
{{>footer}}