From f3b0319d5ad771ba2e0c1e6dedb3eaedb959b5d7 Mon Sep 17 00:00:00 2001 From: Adrian Malacoda Date: Mon, 10 Aug 2020 01:21:33 -0500 Subject: [PATCH] Add rudimentary script for generating the forum archive. --- .gitignore | 1 + README.md | 27 +++++- epilogue/__init__.py | 17 ++++ epilogue/archive_generator.py | 102 +++++++++++++++++++++++ epilogue/forum.py | 57 +++++++++++++ setup.py | 17 ++++ static/style.css | 12 +++ templates/boards.mustache | 6 ++ templates/page-0-redirect.mustache | 4 + templates/partials/child_boards.mustache | 7 ++ templates/partials/footer.mustache | 2 + templates/partials/header.mustache | 7 ++ templates/partials/pagination.mustache | 4 + templates/posts.mustache | 16 ++++ templates/threads.mustache | 17 ++++ 15 files changed, 295 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 epilogue/__init__.py create mode 100644 epilogue/archive_generator.py create mode 100644 epilogue/forum.py create mode 100644 setup.py create mode 100644 static/style.css create mode 100644 templates/boards.mustache create mode 100644 templates/page-0-redirect.mustache create mode 100644 templates/partials/child_boards.mustache create mode 100644 templates/partials/footer.mustache create mode 100644 templates/partials/header.mustache create mode 100644 templates/partials/pagination.mustache create mode 100644 templates/posts.mustache create mode 100644 templates/threads.mustache diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfe54bd --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/**/__pycache__ diff --git a/README.md b/README.md index a20841e..6dc3deb 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,27 @@ # Glitch City Laboratories (2003 - 2020) - Epilogue -This repository contains the tickets, scripts, and documentation for the end of Glitch City Laboratories. \ No newline at end of file +This repository contains the tickets, scripts, and documentation for the end of Glitch City Laboratories. + +## Installation +`pip install -e .` + +## Standalone Epilogue Scripts +Run these before running commands from the epilogue package. + +#### Wiki Data +##### `find_data` +Run this locally (it uses the MediaWiki HTTP API). Finds all pages in categories related to Pokemon generations 1 - 4 that have been edited since 31 March 2020. + +#### Forum Data (`forum` directory) +#### `dump_forum_data` +Run this on a server with access to a Glitch City Laboratories database. It will dump the table schemas and data into several .sql files. Place these in the `forum` directory. + +#### `create_sqlite_database` +Run this locally, after the `dump_forum_data` script has been run on the Glitch City Laboratories server and the dumps copied to the `forum` directory (same directory as this script). A file called `forum.sqlite` will be generated from the MariaDB dumps. This sqlite database is required for generating the static archive later. + +Make sure the `mysql2sqlite` submodule is initialized and install `mawk` for the best results. + +## Epilogue Package + +## Misc Scripts +### `get_pms.php` +This is installed on Glitch City Laboratories and allows any logged-in user to export their PM inbox into a HTML export. \ No newline at end of file diff --git a/epilogue/__init__.py b/epilogue/__init__.py new file mode 100644 index 0000000..2f6d025 --- /dev/null +++ b/epilogue/__init__.py @@ -0,0 +1,17 @@ +import os +from .forum import Forum +from .archive_generator import ArchiveGenerator + +BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +FORUM_DATABASE = os.path.join(BASEDIR, "forum", "forum.sqlite") +TEMPLATES_DIR = os.path.join(BASEDIR, "templates") +STATIC_DIR = os.path.join(BASEDIR, "static") + +ARCHIVES_BASEDIR = "archives" +FORUM_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "forums") +WIKI_ARCHIVES = os.path.join(ARCHIVES_BASEDIR, "wiki") + +def main(): + forum = Forum(FORUM_DATABASE) + generator = ArchiveGenerator(TEMPLATES_DIR, STATIC_DIR) + generator.generate_forum(forum, FORUM_ARCHIVES) \ No newline at end of file diff --git a/epilogue/archive_generator.py b/epilogue/archive_generator.py new file mode 100644 index 0000000..a116a58 --- /dev/null +++ b/epilogue/archive_generator.py @@ -0,0 +1,102 @@ +import os +import chevron +import logging +import shutil +from datetime import datetime + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("ArchiveGenerator") + +def format_datetime (timestamp): + return datetime.fromtimestamp(timestamp) + +def prepare_thread (thread): + thread = dict(thread) + thread['datetime'] = format_datetime(thread['poster_time']) + return thread + +def prepare_post (post): + return prepare_thread(post) + +class ArchiveGenerator(): + def __init__ (self, template_dir, static_dir): + self.template_dir = template_dir + self.static_dir = static_dir + + def generate_forum (self, forum, out_dir): + logger.info("Archiving forum to %s", out_dir) + try: + os.makedirs(out_dir) + except FileExistsError: pass + + shutil.copyfile(os.path.join(self.static_dir, "style.css"), os.path.join(out_dir, "style.css")) + renderer = TemplateRenderer(self.template_dir, out_dir) + renderer.render_template_to_file("boards", "index.html", { + "categories": forum.get_board_tree() + }) + + for board in forum.get_boards(): + self.generate_forum_board(forum, board, out_dir) + + def generate_forum_board (self, forum, board, out_dir): + board_out_dir = os.path.join(out_dir, "board-{}".format(board['id_board'])) + logger.info("Archiving board %s to %s", board['name'], board_out_dir) + try: + os.makedirs(board_out_dir) + except FileExistsError: pass + + renderer = TemplateRenderer(self.template_dir, board_out_dir) + threads = [prepare_thread(thread) for thread in forum.get_threads_in_board(board)] + renderer.render_template_to_file("threads", "index.html", { + "title": " - {}".format(board['name']), + "base": "../", + "board": board, + "threads": threads + }) + + for thread in threads: + self.generate_forum_thread(forum, board, thread, board_out_dir) + + def generate_forum_thread (self, forum, board, thread, out_dir): + thread_out_dir = os.path.join(out_dir, "thread-{}".format(thread['id_topic'])) + logger.info("Archiving thread %s to %s", thread['subject'], thread_out_dir) + try: + os.makedirs(thread_out_dir) + except FileExistsError: pass + + renderer = TemplateRenderer(self.template_dir, thread_out_dir) + renderer.render_template_to_file("page-0-redirect", "index.html") + + page = 0 + while True: + posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)] + if len(posts) < 1: + break + + logger.info("Archiving page %s of thread %s", page, thread['subject']) + renderer.render_template_to_file("posts", "page-{}.html".format(page), { + "title": " - {} - Page {}".format(thread['subject'], page + 1), + "base": "../../", + "board": board, + "thread": thread, + "page": page, + "next": page + 1, + "prev": page - 1, + "posts": posts + }) + page = page + 1 + +class TemplateRenderer(): + def __init__ (self, template_dir, out_dir): + self.template_dir = template_dir + self.partials_dir = os.path.join(template_dir, "partials") + self.out_dir = out_dir + self.extension = "mustache" + + def open_template (self, name): + return open(os.path.join(self.template_dir, "{}.{}".format(name, self.extension))) + + def render_template_to_file (self, template_name, out_file, data={}): + with self.open_template(template_name) as template: + with open(os.path.join(self.out_dir, out_file), "w") as out: + out.write(chevron.render(template, data, self.partials_dir, self.extension)) \ No newline at end of file diff --git a/epilogue/forum.py b/epilogue/forum.py new file mode 100644 index 0000000..25688c3 --- /dev/null +++ b/epilogue/forum.py @@ -0,0 +1,57 @@ +import sqlite3 + +PREFIX = "smf_" +GET_BOARDS = "SELECT * FROM `{}boards`".format(PREFIX) +GET_CATEGORIES = "SELECT * FROM `{}categories`".format(PREFIX) +GET_THREADS = """ + SELECT * FROM `{}topics` AS `topics`, `{}messages` AS `messages` + WHERE `topics`.`id_board`=? AND `messages`.`id_msg`=`topics`.`id_first_msg` + ORDER BY `id_last_msg` DESC + LIMIT ? OFFSET ? +""".format(PREFIX, PREFIX) +GET_POSTS = """ + SELECT * FROM `{}messages` + WHERE `id_topic`=? + ORDER BY `poster_time` ASC + LIMIT ? OFFSET ? +""".format(PREFIX) + +class Forum(): + def __init__ (self, db_path): + self.connection = sqlite3.connect(db_path) + self.connection.row_factory = sqlite3.Row + + def get_board_tree (self): + categories = [dict(category) for category in self.get_categories()] + boards = [dict(board) for board in self.get_boards()] + for category in categories: + category['children'] = [board for board in boards if board['id_cat'] == category['id_cat']] + for board in boards: + board['children'] = [board for board in boards if board['id_parent'] == board['id_board']] + return categories + + def get_categories (self): + cursor = self.connection.cursor() + cursor.execute(GET_CATEGORIES) + return cursor.fetchall() + + def get_boards (self): + cursor = self.connection.cursor() + cursor.execute(GET_BOARDS) + return cursor.fetchall() + + def get_threads_in_board (self, board, page=0, per_page=2000): + try: + board = board['id_board'] + except ValueError: pass + cursor = self.connection.cursor() + cursor.execute(GET_THREADS, (board, per_page, page * per_page)) + return cursor.fetchall() + + def get_posts_in_thread (self, thread, page=0, per_page=15): + try: + thread = thread['id_topic'] + except ValueError: pass + cursor = self.connection.cursor() + cursor.execute(GET_POSTS, (thread, per_page, page * per_page)) + return cursor.fetchall() \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d587580 --- /dev/null +++ b/setup.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +from distutils.core import setup + +setup( + name='Glitch City Labs - Epilogue', + version='0.0.1', + description='Tools for exporting and creating archives of Glitch City Labs data', + author='Adrian Kuschelyagi Malacoda', + packages=['epilogue'], + install_requires=['pysqlite3 >= 0.4.3', 'chevron >= 0.13.1'], + entry_points={ + 'console_scripts': [ + 'epilogue = epilogue:main' + ] + } +) diff --git a/static/style.css b/static/style.css new file mode 100644 index 0000000..d7dc924 --- /dev/null +++ b/static/style.css @@ -0,0 +1,12 @@ +body { font-family: monospace; } + +.category, .board { list-style-type: none;} +.category .name, .board .name { font-weight: bold; } +.board .board { margin-left: 10px; } + +#threads { width: 100%; } + +.label { font-weight: bold } +article { border-top: 1px solid black; } +section { margin-top: 15px; margin-bottom: 15px; } +.next { float: right; } \ No newline at end of file diff --git a/templates/boards.mustache b/templates/boards.mustache new file mode 100644 index 0000000..b1dee6a --- /dev/null +++ b/templates/boards.mustache @@ -0,0 +1,6 @@ +{{>header}} +{{#categories}} +

{{name}}

+ {{>child_boards}} +{{/categories}} +{{>footer}} \ No newline at end of file diff --git a/templates/page-0-redirect.mustache b/templates/page-0-redirect.mustache new file mode 100644 index 0000000..7b69450 --- /dev/null +++ b/templates/page-0-redirect.mustache @@ -0,0 +1,4 @@ + + +

Redirect

+ \ No newline at end of file diff --git a/templates/partials/child_boards.mustache b/templates/partials/child_boards.mustache new file mode 100644 index 0000000..e6082da --- /dev/null +++ b/templates/partials/child_boards.mustache @@ -0,0 +1,7 @@ +{{#children}} +
  • + +
    {{description}}
    + {{>child_boards}} +
  • +{{/children}} \ No newline at end of file diff --git a/templates/partials/footer.mustache b/templates/partials/footer.mustache new file mode 100644 index 0000000..b317cc2 --- /dev/null +++ b/templates/partials/footer.mustache @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/templates/partials/header.mustache b/templates/partials/header.mustache new file mode 100644 index 0000000..6c45512 --- /dev/null +++ b/templates/partials/header.mustache @@ -0,0 +1,7 @@ + + + Glitch City Laboratories Archives{{title}} + + + +

    Glitch City Laboratories Archives

    \ No newline at end of file diff --git a/templates/partials/pagination.mustache b/templates/partials/pagination.mustache new file mode 100644 index 0000000..63e9e10 --- /dev/null +++ b/templates/partials/pagination.mustache @@ -0,0 +1,4 @@ + \ No newline at end of file diff --git a/templates/posts.mustache b/templates/posts.mustache new file mode 100644 index 0000000..7119d65 --- /dev/null +++ b/templates/posts.mustache @@ -0,0 +1,16 @@ +{{>header}} +

    {{board.name}}

    +

    {{thread.subject}} - Page {{next}}

    +{{>pagination}} +{{#posts}} +
    +
    +

    {{subject}}

    +
    Posted by: {{poster_name}}
    +
    Date: {{datetime}}
    +
    +
    {{{body}}}
    +
    +{{/posts}} +{{>pagination}} +{{>footer}} \ No newline at end of file diff --git a/templates/threads.mustache b/templates/threads.mustache new file mode 100644 index 0000000..92a927e --- /dev/null +++ b/templates/threads.mustache @@ -0,0 +1,17 @@ +{{>header}} +

    {{board.name}}

    + + + + + + + {{#threads}} + + + + + + {{/threads}} +
    TitlePosterDate
    {{subject}}{{poster_name}}{{datetime}}
    +{{>footer}} \ No newline at end of file