Compare commits

..

13 Commits

Author SHA1 Message Date
eff22ff325 Bolden names of successor websites. 2020-09-20 05:48:15 -05:00
6ab90d5fff Add link to GCW on the archive index page. 2020-09-20 05:47:35 -05:00
d98354046b Prefix image paths with images subdirectory (which does not yet exist) 2020-09-20 05:45:49 -05:00
f1ae73b737 Correctly parse out fragment from html archive link and append it in the correct place. 2020-09-20 05:40:57 -05:00
d062ca6787 Add anchors to headings. 2020-09-20 05:31:55 -05:00
a9adf51453 Add GCW link to archived wiki pages, for the live version of said archived article. 2020-09-20 05:28:37 -05:00
7157757d43 redirector: handle case where thread id can have an .html extension, because of course it can... 2020-09-03 04:12:54 -05:00
5859ee0408 Add notice to the top of archived pages that this is an archive. 2020-09-03 03:34:22 -05:00
b33ea016d5 convert all f-strings to old-style format strings for python 3.5 compatibility (since that is what's available on the server) 2020-09-01 02:46:37 -05:00
ab29250b74 add "added submissions" boards from Submit-A-Glitch to exclusion filter 2020-09-01 02:25:32 -05:00
167a03be3c fix syntax error 2020-09-01 01:31:39 -05:00
f65361e06b Implement forums redirector and default archive urls (since it's unlikely these will be changed). Also add redirectors for the index/main pages since those don't get handled by default. 2020-08-30 16:51:55 -05:00
c37cf4fc44 Implement thread index for mapping thread ids back to board ids, for use with the redirector.
The archive domain (archives.glitchcity.info) will host this file and the redirector will pull and unpack it when it starts up.
2020-08-30 16:50:21 -05:00
12 changed files with 161 additions and 42 deletions

View File

@@ -2,6 +2,8 @@ import os
import logging
import shutil
import math
import json
import gzip
from itertools import chain
from traceback import print_exc
@@ -22,12 +24,16 @@ DEX_TYPES = [
"MDIGlitchDex", "MetascriptDex", "TMHMDex", "StatDex", "PosterDex", "TypeDex", "UnownDex", "DollDex", "DefaultNameDex",
"BattleTypeDe", "BadgeDescriptionDex", "FacingDex"
]
DEXES = list(chain.from_iterable([[f"{dex_type}{language}" for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
DEXES = list(chain.from_iterable([["{}{}".format(dex_type, language) for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
FORUM_THREAD_INDEX = "thread_index.json.gz"
IMAGE_DIRECTORY = "images"
class ArchiveLinker(Linker):
def __init__ (self, directory_names=[]):
super().__init__()
self.directory_names = directory_names
self.image_directory = IMAGE_DIRECTORY
self.replacements = {
"/": "+",
#":": ""
@@ -35,10 +41,15 @@ class ArchiveLinker(Linker):
def translate_page_title (self, page_title):
page_title = super().translate_page_title(page_title)
fragment = ""
if "#" in page_title:
fragment = page_title[page_title.find("#"):]
page_title = page_title[:-len(fragment)]
directory_name = ""
for name in self.directory_names:
if page_title.startswith(f"{name}/"):
if page_title.startswith("{}/".format(name)):
directory_name = name
page_title = page_title[len(directory_name) + 1:]
break
@@ -46,7 +57,14 @@ class ArchiveLinker(Linker):
for key, value in self.replacements.items():
page_title = page_title.replace(key, value)
return f"{directory_name}{'/' if directory_name else ''}{page_title}.html"
return "{}{}{}.html{}".format(directory_name, '/' if directory_name else '', page_title, fragment)
def translate_image_title (self, page_title):
image_title = super().translate_image_title(page_title)
if not image_title:
return
return "{}/{}".format(self.image_directory, image_title)
def prepare_thread (thread):
thread.subject = html.unescape(thread.subject)
@@ -105,7 +123,7 @@ class ArchiveGenerator():
if page.redirect:
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
renderer.render_template_to_file("redirect", page_out, {
"target": f"{base}{linker.translate_page_title(page.redirect)}"
"target": "{}{}".format(base, linker.translate_page_title(page.redirect))
})
else:
logger.info("Archiving page %s to %s", page.title, page_out)
@@ -122,6 +140,7 @@ class ArchiveGenerator():
renderer.render_template_to_file("page", page_out, {
"title": " - {}".format(page.title),
"pagename": page.title,
"page": page,
"base": base,
"text": rendered
@@ -133,12 +152,13 @@ class ArchiveGenerator():
raise e
for category, pages in categories.items():
category_out = f"Category:{linker.translate_page_title(category)}"
category_out = "Category:{}".format(linker.translate_page_title(category))
logger.info("Archiving category %s to %s", category, category_out)
try:
renderer.render_template_to_file("category", category_out, {
"title": f" - {category}",
"title": " - {}".format(category),
"pagename": "Category:{}".format(category),
"category": category,
"pages": pages
})
@@ -158,10 +178,15 @@ class ArchiveGenerator():
"categories": forum.get_board_tree()
})
threads = []
for board in forum.get_boards():
self.generate_forum_board(forum, board, out_dir)
forum_threads = forum.get_threads_in_board(board)
threads = threads + forum_threads
self.generate_forum_board(forum, board, forum_threads, out_dir)
def generate_forum_board (self, forum, board, out_dir):
self.generate_thread_index(threads, os.path.join(out_dir, FORUM_THREAD_INDEX))
def generate_forum_board (self, forum, board, threads, out_dir):
board_out_dir = os.path.join(out_dir, "board-{}".format(board.id))
logger.info("Archiving board %s to %s", board.name, board_out_dir)
try:
@@ -169,7 +194,7 @@ class ArchiveGenerator():
except FileExistsError: pass
renderer = TemplateRenderer(self.template_dir, board_out_dir)
threads = [prepare_thread(thread) for thread in forum.get_threads_in_board(board)]
threads = [prepare_thread(thread) for thread in threads]
renderer.render_template_to_file("threads", "index.html", {
"title": " - {}".format(board.name),
"base": "../",
@@ -193,7 +218,7 @@ class ArchiveGenerator():
})
total_pages = math.ceil((thread.num_replies + 1) / DEFAULT_POSTS_PER_PAGE)
page_links = [{"label": page + 1, "link": f"page-{page}.html"} for page in range(total_pages)]
page_links = [{"label": page + 1, "link": "page-{}.html".format(page)} for page in range(total_pages)]
page = 0
while True:
posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
@@ -214,6 +239,13 @@ class ArchiveGenerator():
})
page = page + 1
def generate_thread_index (self,threads, out_path):
# with open(out_path, "wb") as out:
# pickle.dump({thread.id: {"parent": thread.parent} for thread in threads}, out, protocol=4)
threads = {thread.id: {"parent": thread.parent} for thread in threads}
with gzip.open(out_path, "w") as out:
out.write(json.dumps(threads).encode())
class TemplateRenderer():
def __init__ (self, template_dir, out_dir):
self.template_dir = template_dir

View File

@@ -1,39 +1,98 @@
import argparse
import gzip
import urllib.request
import json
from .archive_generator import ArchiveLinker, DEXES
from .archive_generator import ArchiveLinker, DEXES, FORUM_THREAD_INDEX
from flask import Flask, redirect, request
app = Flask(__name__)
DEFAULT_ARCHIVES_DOMAIN = "https://archives.glitchcity.info/"
DEFAULT_FORUMS_ARCHIVE = "{}forums".format(DEFAULT_ARCHIVES_DOMAIN)
DEFAULT_WIKI_ARCHIVE = "{}wiki".format(DEFAULT_ARCHIVES_DOMAIN)
## Wiki redirector
@app.route("/wiki/")
def redirect_wiki_main ():
return redirect_wiki("Main Page")
@app.route("/wiki/<path:path>")
def redirect_wiki (path):
return redirect(make_wiki_url(path))
def make_wiki_url (path):
url = app.args.wiki_archive
if path.endswith("/"):
path = path[:-1]
return app.args.wiki_archive + app.wiki_linker.translate_page_title(path)
## Forum redirector
@app.route('/forums/')
def redirect_forums_index ():
return redirect_forums("")
@app.route('/forums/<path:path>')
def redirect_forums (path):
return redirect(make_forum_url(request))
def make_forum_url (request):
thread_id = request.args.get("topic", None)
board_id = request.args.get("board", None)
post_id = None
if thread_id:
thread_id = strip_extension(thread_id)
if "." in thread_id:
(thread_id, post_id) = thread_id.split(".")
post_id = post_id[len("msg"):]
if not board_id:
board_id = app.thread_index[thread_id]['parent']
try:
if "." in board_id:
board_id = board_id.split(".")[0]
except TypeError: pass
url = app.args.forums_archive
if board_id:
url = url + "board-{}".format(board_id)
if thread_id:
url = url + "/thread-{}".format(thread_id)
if not url.endswith("/"):
url = url + "/"
return url + app.wiki_linker.translate_page_title(path)
return url
## Forum redirector
@app.route('/forums/<path:path>')
def redirect_forums (path):
return redirect(make_forum_url(path))
def strip_extension (item):
for extension in [".html"]:
if item.endswith(extension):
item = item[:-len(extension)]
return item
def make_forum_url (request):
return str(request)
def read_thread_index (forums_archive):
with urllib.request.urlopen("{}{}".format(forums_archive, FORUM_THREAD_INDEX)) as gzipped_in:
data = gzipped_in.read()
return json.loads(gzip.decompress(data).decode())
def main ():
parser = argparse.ArgumentParser()
parser.add_argument("--wiki-archive", help="URL to wiki archive")
parser.add_argument("--forums-archive", help="URL to forums archive")
parser.add_argument("--wiki-archive", help="URL to wiki archive", default=DEFAULT_WIKI_ARCHIVE)
parser.add_argument("--forums-archive", help="URL to forums archive", default=DEFAULT_FORUMS_ARCHIVE)
args = parser.parse_args()
app.args = parser.parse_args()
if not args.wiki_archive.endswith("/"):
args.wiki_archive = args.wiki_archive + "/"
if not args.forums_archive.endswith("/"):
args.forums_archive = args.forums_archive + "/"
app.args = args
app.thread_index = read_thread_index(args.forums_archive)
app.wiki_linker = ArchiveLinker(directory_names=DEXES)
app.run()

View File

@@ -32,7 +32,7 @@ INTERWIKI_NAMESPACES = {
FILE_NAMESPACES = ["File:", "Image:"]
CATEGORY_NAMESPACE = "Category:"
CATEGORY_LINK_NAMESPACE = f":{CATEGORY_NAMESPACE}"
CATEGORY_LINK_NAMESPACE = ":{}".format(CATEGORY_NAMESPACE)
class Wiki():
def __init__ (self, xml_path):
@@ -152,7 +152,7 @@ def render (wikitext, base="", linker=None):
else:
url = linker.translate_interwiki_title(node.title)
if not url:
url = f"{base}{linker.translate_page_title(node.title)}"
url = "{}{}".format(base, linker.translate_page_title(node.title))
rendered.append('<a href="{}">{}</a>'.format(
url,
@@ -170,8 +170,9 @@ def render (wikitext, base="", linker=None):
render(node.tag)
))
elif node_type is Heading:
rendered.append("<h{}>{}</h{}>".format(
rendered.append('<h{} id="{}">{}</h{}>'.format(
node.level,
reformat_page_title(node.title),
render(node.title, base, linker),
node.level
))
@@ -205,7 +206,7 @@ def reformat_page_title (page_title):
if not page_title:
return ""
return f"{page_title[0].upper()}{page_title[1:].replace(' ', '_')}"
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(' ', '_'))
class Template():
def __init__ (self, wikicode):

View File

@@ -77,21 +77,21 @@ DO_NOT_ARCHIVE_CATEGORIES = [
# Boards we are not interested in archiving.
# `id_board` in (1, 2)
DO_NOT_ARCHIVE_BOARDS = [
24, 94, 118, 121 # Links
40, # Exclusive Board
65, # Requests for Moderatorship
66, # Requests for Membership+
67, # Requests for Distinguished Membership
23, # M.A.S.K. HQ (Staff Board)
22, # Admins Only Board
89, # Test Board
86, # Omega Archives
24, 94, 118, 121, # Links
40, # Exclusive Board
65, # Requests for Moderatorship
66, # Requests for Membership+
67, # Requests for Distinguished Membership
23, # M.A.S.K. HQ (Staff Board)
22, # Admins Only Board
89, # Test Board
86, # Omega Archives
51, 37, 79, 26, 47, 44, 45, 99, 93, 119, 96,
62, # Submit-A-Glitch Archives
62, 60, 80, 84, # Submit-A-Glitch Archives
3, 4, 5, 57, 58, 59, 38, 54, 63, 64,
68, 69, 70, 81, 82, 83,
28, # The Dumpster Out Back
123 # ?????
28, # The Dumpster Out Back
123 # ?????
]
# Regexes for sensitive information

View File

@@ -1,4 +1,5 @@
{{>header}}
{{>forums_notice}}
{{#categories}}
<h2 class="category-name">{{name}}</h2>
{{>child_boards}}

View File

@@ -0,0 +1,9 @@
{{>header}}
{{>wiki_notice}}
<h2>{{pagename}}</h2>
<ul>
{{#pages}}
<li><a href="{{url}}">{{title}}</a></li>
{{/pages}}
</ul>
{{>footer}}

View File

@@ -1,5 +1,8 @@
{{>header}}
Welcome to the <b>Glitch City Laboratories Archives</b>.
<p>Glitch City Laboratories was a Pok&eacute;mon glitch website that existed from March 2006 to September 2020 (<a href="forums/board-2/thread-9114/page-0.html">announcement of closure</a>). This is an <b>archive</b> of content from the website prior to its closure.</p>
<p>Further development and discussion is happening at <b><a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a></b>, the successor community.</p>
<p>The <b><a href="https://glitchcity.wiki/">Glitch City Wiki</a></b> is the continuation of the Glitch City Laboratories wiki.</p>
<h2>Archives</h2>
<ul>
<li><a href="forums">Forums</a> (<a href="forums.tar.gz">.tar.gz</a>) (<a href="forums.sql.gz">.sql.gz</a>) (<a href="forums.sqlite.gz">.sqlite.gz</a>)</li>

View File

@@ -1,4 +1,5 @@
{{>header}}
{{>wiki_notice}}
<h2>{{page.title}}</h2>
<article class="page">
{{{text}}}

View File

@@ -0,0 +1,5 @@
<div class="notice">
<p>Glitch City Laboratories closed on 1 September 2020 (<a href="{{base}}board-2/thread-9114/page-0.html">announcement</a>). This is an <b>archived</b> copy of a thread from Glitch City Laboratories Forums.</p>
<p>You can join <a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a> to ask questions or discuss current developments.</p>
<p>You may also download the archive of this forum in <a href="{{base}}../forums.tar.gz">.tar.gz</a>, <a href="{{base}}../forums.sql.gz">.sql.gz</a>, or <a href="{{base}}../forums.sqlite.gz">.sqlite.gz</a> formats.</p>
</div>

View File

@@ -0,0 +1,6 @@
<div class="notice">
<p>Glitch City Laboratories closed on 1 September 2020 (<a href="{{base}}../forums/board-2/thread-9114/page-0.html">announcement</a>). This is an <b>archived</b> copy of an article from Glitch City Laboratories wiki.</p>
<p><b>A live version of this article is available at the <a href="https://glitchcity.wiki/">Glitch City Wiki</a> <a href="https://glitchcity.wiki/{{pagename}}">here</a>.</b></p>
<p>You can join <a href="https://discord.com/invite/EA7jxJ6">Glitch City Research Institute</a> to ask questions or discuss current developments.</p>
<p>You may also download the archive of the wiki in <a href="{{base}}../wiki.tar.gz">.tar.gz</a> or <a href="{{base}}../wiki.xml.gz">.xml.gz</a> formats.</p>
</div>

View File

@@ -1,4 +1,5 @@
{{>header}}
{{>forums_notice}}
<h2><a href="../">{{board.name}}</a></h2>
<h3>{{thread.subject}} - Page {{next}}</h3>
{{>pagination}}

View File

@@ -1,4 +1,5 @@
{{>header}}
{{>forums_notice}}
<h2>{{board.name}}</h2>
<table id="threads">
<tr>