Add rudimentary script for generating the forum archive.

This commit is contained in:
Adrian Kuschelyagi Malacoda 2020-08-10 01:21:33 -05:00
parent 7026bc2b34
commit f3b0319d5a
15 changed files with 295 additions and 1 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/**/__pycache__

View File

@ -1,2 +1,27 @@
# Glitch City Laboratories (2003 - 2020) - Epilogue # Glitch City Laboratories (2003 - 2020) - Epilogue
This repository contains the tickets, scripts, and documentation for the end of Glitch City Laboratories. This repository contains the tickets, scripts, and documentation for the end of Glitch City Laboratories.
## Installation
`pip install -e .`
## Standalone Epilogue Scripts
Run these before running commands from the epilogue package.
#### Wiki Data
##### `find_data`
Run this locally (it uses the MediaWiki HTTP API). Finds all pages in categories related to Pokemon generations 1 - 4 that have been edited since 31 March 2020.
#### Forum Data (`forum` directory)
#### `dump_forum_data`
Run this on a server with access to a Glitch City Laboratories database. It will dump the table schemas and data into several .sql files. Place these in the `forum` directory.
#### `create_sqlite_database`
Run this locally, after the `dump_forum_data` script has been run on the Glitch City Laboratories server and the dumps copied to the `forum` directory (same directory as this script). A file called `forum.sqlite` will be generated from the MariaDB dumps. This sqlite database is required for generating the static archive later.
Make sure the `mysql2sqlite` submodule is initialized and install `mawk` for the best results.
## Epilogue Package
## Misc Scripts
### `get_pms.php`
This is installed on Glitch City Laboratories and allows any logged-in user to export their PM inbox into a HTML export.

17
epilogue/__init__.py Normal file
View File

@ -0,0 +1,17 @@
import os
from .forum import Forum
from .archive_generator import ArchiveGenerator
BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite")
TEMPLATES_DIR = os.path.join(BASEDIR, "templates")
STATIC_DIR = os.path.join(BASEDIR, "static")
ARCHIVES_BASEDIR = "archives"
FORUM_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "forums")
WIKI_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "wiki")
def main():
forum = Forum(FORUM_DATABASE)
generator = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR)
generator.generate_forum(forum, FORUM_ARCHIVES)

View File

@ -0,0 +1,102 @@
import os
import chevron
import logging
import shutil
from datetime import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ArchiveGenerator")
def format_datetime (timestamp):
return datetime.fromtimestamp(timestamp)
def prepare_thread (thread):
thread = dict(thread)
thread['datetime'] = format_datetime(thread['poster_time'])
return thread
def prepare_post (post):
return prepare_thread(post)
class ArchiveGenerator():
def __init__ (self, template_dir, static_dir):
self.template_dir = template_dir
self.static_dir = static_dir
def generate_forum (self, forum, out_dir):
logger.info("Archiving forum to %s", out_dir)
try:
os.makedirs(out_dir)
except FileExistsError: pass
shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css"))
renderer = TemplateRenderer(self.template_dir, out_dir)
renderer.render_template_to_file("boards", "index.html", {
"categories": forum.get_board_tree()
})
for board in forum.get_boards():
self.generate_forum_board(forum, board, out_dir)
def generate_forum_board (self, forum, board, out_dir):
board_out_dir = os.path.join(out_dir, "board-{}".format(board['id_board']))
logger.info("Archiving board %s to %s", board['name'], board_out_dir)
try:
os.makedirs(board_out_dir)
except FileExistsError: pass
renderer = TemplateRenderer(self.template_dir, board_out_dir)
threads = [prepare_thread(thread) for thread in forum.get_threads_in_board(board)]
renderer.render_template_to_file("threads", "index.html", {
"title": " - {}".format(board['name']),
"base": "../",
"board": board,
"threads": threads
})
for thread in threads:
self.generate_forum_thread(forum, board, thread, board_out_dir)
def generate_forum_thread (self, forum, board, thread, out_dir):
thread_out_dir = os.path.join(out_dir, "thread-{}".format(thread['id_topic']))
logger.info("Archiving thread %s to %s", thread['subject'], thread_out_dir)
try:
os.makedirs(thread_out_dir)
except FileExistsError: pass
renderer = TemplateRenderer(self.template_dir, thread_out_dir)
renderer.render_template_to_file("page-0-redirect", "index.html")
page = 0
while True:
posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
if len(posts) < 1:
break
logger.info("Archiving page %s of thread %s", page, thread['subject'])
renderer.render_template_to_file("posts", "page-{}.html".format(page), {
"title": " - {} - Page {}".format(thread['subject'], page + 1),
"base": "../../",
"board": board,
"thread": thread,
"page": page,
"next": page + 1,
"prev": page - 1,
"posts": posts
})
page = page + 1
class TemplateRenderer():
def __init__ (self, template_dir, out_dir):
self.template_dir = template_dir
self.partials_dir = os.path.join(template_dir, "partials")
self.out_dir = out_dir
self.extension = "mustache"
def open_template (self, name):
return open(os.path.join(self.template_dir, "{}.{}".format(name, self.extension)))
def render_template_to_file (self, template_name, out_file, data={}):
with self.open_template(template_name) as template:
with open(os.path.join(self.out_dir, out_file), "w") as out:
out.write(chevron.render(template, data, self.partials_dir, self.extension))

57
epilogue/forum.py Normal file
View File

@ -0,0 +1,57 @@
import sqlite3
PREFIX = "smf_"
GET_BOARDS = "SELECT * FROM `{}boards`".format(PREFIX)
GET_CATEGORIES = "SELECT * FROM `{}categories`".format(PREFIX)
GET_THREADS = """
SELECT * FROM `{}topics` AS `topics`, `{}messages` AS `messages`
WHERE `topics`.`id_board`=? AND `messages`.`id_msg`=`topics`.`id_first_msg`
ORDER BY `id_last_msg` DESC
LIMIT ? OFFSET ?
""".format(PREFIX, PREFIX)
GET_POSTS = """
SELECT * FROM `{}messages`
WHERE `id_topic`=?
ORDER BY `poster_time` ASC
LIMIT ? OFFSET ?
""".format(PREFIX)
class Forum():
def __init__ (self, db_path):
self.connection = sqlite3.connect(db_path)
self.connection.row_factory = sqlite3.Row
def get_board_tree (self):
categories = [dict(category) for category in self.get_categories()]
boards = [dict(board) for board in self.get_boards()]
for category in categories:
category['children'] = [board for board in boards if board['id_cat'] == category['id_cat']]
for board in boards:
board['children'] = [board for board in boards if board['id_parent'] == board['id_board']]
return categories
def get_categories (self):
cursor = self.connection.cursor()
cursor.execute(GET_CATEGORIES)
return cursor.fetchall()
def get_boards (self):
cursor = self.connection.cursor()
cursor.execute(GET_BOARDS)
return cursor.fetchall()
def get_threads_in_board (self, board, page=0, per_page=2000):
try:
board = board['id_board']
except ValueError: pass
cursor = self.connection.cursor()
cursor.execute(GET_THREADS, (board, per_page, page * per_page))
return cursor.fetchall()
def get_posts_in_thread (self, thread, page=0, per_page=15):
try:
thread = thread['id_topic']
except ValueError: pass
cursor = self.connection.cursor()
cursor.execute(GET_POSTS, (thread, per_page, page * per_page))
return cursor.fetchall()

17
setup.py Normal file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env python
from distutils.core import setup
setup(
name='Glitch City Labs - Epilogue',
version='0.0.1',
description='Tools for exporting and creating archives of Glitch City Labs data',
author='Adrian Kuschelyagi Malacoda',
packages=['epilogue'],
install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1'],
entry_points={
'console_scripts': [
'epilogue = epilogue:main'
]
}
)

12
static/style.css Normal file
View File

@ -0,0 +1,12 @@
body { font-family: monospace; }
.category, .board { list-style-type: none;}
.category .name, .board .name { font-weight: bold; }
.board .board { margin-left: 10px; }
#threads { width: 100%; }
.label { font-weight: bold }
article { border-top: 1px solid black; }
section { margin-top: 15px; margin-bottom: 15px; }
.next { float: right; }

View File

@ -0,0 +1,6 @@
{{>header}}
{{#categories}}
<h2 class="category-name">{{name}}</h2>
{{>child_boards}}
{{/categories}}
{{>footer}}

View File

@ -0,0 +1,4 @@
<html>
<head><meta http-equiv="refresh" content="0; url=page-0.html" /></head>
<body><p><a href="page-0.html">Redirect</a></p></body>
</html>

View File

@ -0,0 +1,7 @@
{{#children}}
<li class="board">
<div class="name"><a href="board-{{id_board}}">{{name}}</a></div>
<div class="description">{{description}}</div>
{{>child_boards}}
</li>
{{/children}}

View File

@ -0,0 +1,2 @@
</body>
</html>

View File

@ -0,0 +1,7 @@
<html>
<head>
<title>Glitch City Laboratories Archives{{title}}</title>
<link href="{{base}}style.css" rel="stylesheet" type="text/css" />
</head>
<body>
<h1><a href="{{base}}">Glitch City Laboratories Archives</a></h1>

View File

@ -0,0 +1,4 @@
<div class="pagination">
<a class="prev" href="page-{{prev}}.html">Previous Page</a>
<a class="next" href="page-{{next}}.html">Next Page</a>
</div>

16
templates/posts.mustache Normal file
View File

@ -0,0 +1,16 @@
{{>header}}
<h2><a href="../">{{board.name}}</a></h2>
<h3>{{thread.subject}} - Page {{next}}</h3>
{{>pagination}}
{{#posts}}
<article id="post-{{id_msg}}">
<header>
<h4>{{subject}}</h4>
<div><span class="label">Posted by:</span> {{poster_name}}</div>
<div><span class="label">Date:</span> {{datetime}}</div>
</header>
<section>{{{body}}}</section>
</article>
{{/posts}}
{{>pagination}}
{{>footer}}

View File

@ -0,0 +1,17 @@
{{>header}}
<h2>{{board.name}}</h2>
<table id="threads">
<tr>
<th>Title</th>
<th>Poster</th>
<th>Date</th>
</tr>
{{#threads}}
<tr>
<td class="thread-subject"><a href="thread-{{id_topic}}">{{subject}}</a></td>
<td class="thread-poster">{{poster_name}}</td>
<td class="thread-date">{{datetime}}</td>
</tr>
{{/threads}}
</table>
{{>footer}}